├── .github
    └── ISSUE_TEMPLATE
    │   ├── bug_report.md
    │   └── feature_request.md
├── .gitignore
├── .travis.yml
├── AUTHORS
├── Cargo.toml
├── LICENSE
├── README.md
├── analyze
    ├── __init__.py
    ├── ethplot.mplstyle
    ├── profile
    │   ├── __init__.py
    │   ├── compare_timeseries.py
    │   ├── correlation.py
    │   ├── event_detail.py
    │   ├── stats.py
    │   └── timeseries.py
    └── util.py
├── doc
    ├── correlation_heatmap.png
    ├── counters_vs_events.png
    ├── intro.svg
    ├── manual.md
    ├── perf_event_plot.png
    ├── results.csv
    └── timeseries.csv
├── src
    ├── aggregate.rs
    ├── cmd.yml
    ├── counters.toml
    ├── main.rs
    ├── mkgroup.rs
    ├── pair.rs
    ├── profile.rs
    ├── scale.rs
    ├── search.rs
    ├── stats.rs
    └── util.rs
└── tests
    ├── pair
        └── manifest.toml
    └── test_readme.sh


/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug report
 3 | about: Create a report to help us improve
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Describe the bug**
11 | A clear and concise description of what the bug is and what platform you are running on
12 | and of what you expected to happen.
13 | 
14 | **To Reproduce**
15 | Steps to reproduce the behavior:
16 | 1. Run autoperf with these arguments (add -vvv for trace debug output).
17 | 2. Invoke script(s) ...
18 | 4. See error
19 | 
20 | **Machine (please complete the following information):**
21 |  - Linux version: [use uname -a]
22 |  - Machine: [cpuid output]
23 |  - perf version: [perf --version]
24 |  - autoperf version: [autoperf --version]
25 |  - output of: ls /sys/bus/event_source/devices/
26 | 
27 | **Additional context**
28 | Add any other context about the problem here.
29 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Feature request
 3 | about: Suggest an idea for this project
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Is your feature request related to a problem? Please describe.**
11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
12 | 
13 | **Describe the solution you'd like**
14 | A clear and concise description of what you want to happen.
15 | 
16 | **Describe alternatives you've considered**
17 | A clear and concise description of any alternative solutions or features you've considered.
18 | 
19 | **Additional context**
20 | Add any other context or screenshots about the feature request here.
21 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | target
2 | Cargo.lock
3 | *.pyc
4 | .sync-config.cson
5 | __pycache__
6 | .ipynb_checkpoints
7 | .vscode
8 | .DS_Store


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | dist: xenial
 2 | services: docker
 3 | sudo: required
 4 | language: rust
 5 | rust:
 6 |   - nightly
 7 | 
 8 | script:
 9 |   - bash tests/test_readme.sh
10 | 
11 | notifications:
12 |   email:
13 |     on_success: never
14 | 


--------------------------------------------------------------------------------
/AUTHORS:
--------------------------------------------------------------------------------
1 | Denny Lin <dennylin93@connect.hku.hk>
2 | Besmira Nushi <besmira.nushi@microsoft.com>
3 | Gerd Zellweger <mail@gerdzellweger.com>
4 | 


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "autoperf"
 3 | description = "Automate the recording and instrumentation of programs with performance counters."
 4 | version = "0.9.5"
 5 | authors = [
 6 |     "Denny Lin <dennylin93@connect.hku.hk>",
 7 |     "Besmira Nushi <besmira.nushi@microsoft.com>",
 8 |     "Gerd Zellweger <mail@gerdzellweger.com>"
 9 | ]
10 | edition = '2018'
11 | license = "MIT"
12 | repository = "https://github.com/gz/autoperf"
13 | documentation = "https://docs.rs/autoperf"
14 | 
15 | [dependencies]
16 | pbr = "1.0.1"
17 | log = "0.4"
18 | env_logger = "0.6"
19 | csv = "0.*"
20 | lazy_static = "0.1.*"
21 | perfcnt = "0.7"
22 | toml = "0.2"
23 | nom = "^1.2.3"
24 | libc = "0.2.16"
25 | clap = { version = "2", features = ["yaml"] }
26 | x86 = { version = "0.45", features = ["performance-counter"] }
27 | phf = "0.9.*"
28 | itertools = "0.5"
29 | rustc-serialize = "0.3"
30 | wait-timeout = "0.1"
31 | 
32 | [profile.release]
33 | debug = true
34 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2017 Gerd Zellweger
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in
13 | all copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | THE SOFTWARE.


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | [![Build Status](https://travis-ci.org/gz/autoperf.svg)](https://travis-ci.org/gz/autoperf)
  2 | [![Crates.io](https://img.shields.io/crates/v/autoperf.svg)](https://crates.io/crates/autoperf) 
  3 | [![docs.rs/autoperf](https://docs.rs/autoperf/badge.svg)](https://docs.rs/crate/autoperf/)
  4 | 
  5 | 
  6 | # autoperf
  7 | 
  8 | autoperf simplifies the instrumentation of programs with performance
  9 | counters on Intel machines. Rather than trying to learn how to measure every
 10 | event and manually programming event values in counter registers or perf, you
 11 | can use autoperf which will repeatedly run your program until it has measured
 12 | every single performance event on your machine. autoperf tries to compute a
 13 | schedule that maximizes the amount of events measured per run, and
 14 | minimizes the total number of runs while avoiding multiplexing of events on
 15 | counters.
 16 | 
 17 | <p align="center">
 18 |     <img src="https://gz.github.io/autoperf/doc/intro.svg" width="90%">
 19 | </p>
 20 | 
 21 | <br />
 22 | <img align="right" src="https://gz.github.io/autoperf/doc/counters_vs_events.png" width="45%">
 23 | 
 24 | ## Background
 25 | 
 26 | Performance monitoring units typically distinguish between performance events and counters. 
 27 | Events refer to observations on the micro-architectural level 
 28 | (e.g., a TLB miss, a page-walk etc.), whereas counters are hardware registers that 
 29 | count the occurrence of events. The figure on the right shows the number of different 
 30 | observable events for different Intel micro-architectures. Note that current systems 
 31 | provide a very large choice of possible events to monitor. The number of measurable 
 32 | counters per PMU is limited (typically from two to eight). For example, if the same 
 33 | events are measured on all PMUs on a SkylakeX (Xeon Gold 5120) machine, we can only 
 34 | observe a maximum of 48 different events (without sampling). autoperf simplifies the process 
 35 | of fully measuring and recording every performance event for a given program.
 36 | In our screen session above, recorded on a SkylakeX machine with ~3500 distinct events, 
 37 | we can see how autoperf automatically runs a program 1357 times while measuring and recording 
 38 | a different set of events in every run.
 39 | <br clear="right"/>
 40 | 
 41 | # Installation
 42 | 
 43 | autoperf is known to work with Ubuntu 18.04 on Skylake and
 44 | IvyBridge/SandyBridge architectures. All Intel architectures should work,
 45 | please file a bug request if it doesn't. autoperf builds on `perf` from the
 46 | Linux project and a few other libraries that can be installed using:
 47 | 
 48 | ```
 49 | $ sudo apt-get update
 50 | $ sudo apt-get install likwid cpuid hwloc numactl util-linux
 51 | ```
 52 | 
 53 | To run the example analysis scripts, you'll need these python3 libraries:
 54 | ```
 55 | $ pip3 install ascii_graph matplotlib pandas argparse numpy
 56 | ```
 57 | 
 58 | You'll also need the *nightly version* of the rust compiler which is 
 59 | best installed using rustup:
 60 | ```
 61 | $ curl https://sh.rustup.rs -sSf | sh -s -- -y --default-toolchain nightly
 62 | $ source $HOME/.cargo/env
 63 | ```
 64 | 
 65 | autoperf is published on crates.io, so once you have rust and cargo installed, 
 66 | you can get it directly from there:
 67 | ```
 68 | $ cargo +nightly install autoperf
 69 | ```
 70 | 
 71 | Or alternatively, clone and build the repository yourself:
 72 | ```
 73 | $ git clone https://github.com/gz/autoperf.git
 74 | $ cd autoperf
 75 | $ cargo build --release
 76 | $ ./target/release/autoperf --help
 77 | ```
 78 | 
 79 | autoperf uses perf internally to interface with Linux and the performance
 80 | counter hardware. perf recommends that the following settings are disabled.
 81 | Therefore, autoperf will check the values of those configurations and refuse to
 82 | start if they are not set like below:
 83 | ```
 84 | sudo sh -c 'echo 0 >> /proc/sys/kernel/kptr_restrict'
 85 | sudo sh -c 'echo 0 > /proc/sys/kernel/nmi_watchdog'
 86 | sudo sh -c 'echo -1 > /proc/sys/kernel/perf_event_paranoid'
 87 | ```
 88 | 
 89 | # Usage
 90 | 
 91 | autoperf has a few commands, use `--help` to get a better overview of all the
 92 | options.
 93 | 
 94 | ## Profiling
 95 | 
 96 | The **profile** command instruments a single program by running it multiple times
 97 | until every performance event is measured. For example,
 98 | ```
 99 | $ autoperf profile sleep 2
100 | ```
101 | will repeatedly run `sleep 2` while measuring different performance events 
102 | with performance counters every time. Once completed, you will find an `out`
103 | folder with many csv files that contain measurements from individual runs.
104 | 
105 | ## Aggregating results
106 | 
107 | To combine all those runs into a single CSV result file you can use the
108 | **aggregate** command: 
109 | ```
110 | $ autoperf aggregate ./out
111 | ``` 
112 | This will do some sanity checking and produce a `results.csv` 
113 | ([reduced example](../master/doc/results.csv)) file which contains 
114 | all the measured data.
115 | 
116 | ## Analyze results
117 | 
118 | Performance events are measured individually on every core (and other
119 | monitoring units). The `timeseries.py` can aggregate events by taking the
120 | average, stddef, min, max etc. and producing a time-series matrix ([see a
121 | reduced example](../master/doc/timeseries.csv)).
122 | 
123 | ```
124 | python3 analyze/profile/timeseries.py ./out
125 | ```
126 | 
127 | Now you have all the data, so you can start asking some questions. As an
128 | example, the following script tells you how events were correlated
129 | when your program was running:
130 | 
131 | ```
132 | $ python3 analyze/profile/correlation.py ./out
133 | $ open out/correlation_heatmap.png
134 | ```
135 | 
136 | Event correlation for the `autoperf profile sleep 2` command
137 | above looks like this (every dot represents the correlation of the timeseries 
138 | between two measured performance events, this is from a Skylake machine with
139 | around 1700 non-zero event measurement):
140 | ![Correlation Heatmap](/doc/correlation_heatmap.png)
141 | 
142 | You can look at individual events too:
143 | ```
144 | python3 analyze/profile/event_detail.py --resultdir ./out --features AVG.OFFCORE_RESPONSE.ALL_RFO.L3_MISS.REMOTE_HIT_FORWARD
145 | ```
146 | ![Plot events](/doc/perf_event_plot.png)
147 | 
148 | There are more scripts in the `analyze` folder to better work with the captured 
149 | data-sets. Have a look.
150 | 
151 | ## What do I use this for?
152 | 
153 | autoperf allows you to quickly gather lots of performance (or training) data and
154 | reason about it quantitatively. For example, we initially developed autoperf to
155 | build ML classifiers that the Barrelfish scheduler could use for detecting
156 | application slowdown and make better scheduling decisions. autoperf can gather
157 | that data to generate such classifiers without requiring domain knowledge about 
158 | events, aside from how to measure them.
159 | 
160 | You can read more about our experiments here:
161 | 
162 | * https://dl.acm.org/citation.cfm?id=2967360.2967375 
163 | * https://www.research-collection.ethz.ch/handle/20.500.11850/155854
164 | 
165 | Last but not least, autoperf can potentially be useful in many other scenarios:
166 |  * Find out what performance events are relevant for your workload
167 |  * Analyzing and finding performance issues in your code or with different versions of your code
168 |  * Generate classifiers to detect hardware exploits (side channels/spectre/meltdown etc.)
169 |  * ...
170 | 


--------------------------------------------------------------------------------
/analyze/__init__.py:
--------------------------------------------------------------------------------
1 | __name__ = 'analyze'
2 | 


--------------------------------------------------------------------------------
/analyze/ethplot.mplstyle:
--------------------------------------------------------------------------------
 1 | font.size: 18.0
 2 | font.family: sans-serif
 3 | font.sans-serif: Supria Sans, Lucida Grande, Bitstream Vera Sans, Helvetica Neue LT Pro
 4 | font.style: normal
 5 | font.variant: normal
 6 | 
 7 | lines.linewidth: 4
 8 | lines.solid_capstyle: butt
 9 | 
10 | # Don't really want a frame but in case we do we want the fancy one:
11 | legend.frameon: false
12 | legend.fancybox: true
13 | 
14 | axes.prop_cycle: cycler('color', ['30a2da', 'fc4f30', 'e5ae38', '6d904f', '8b8b8b'])
15 | axes.facecolor: ffffff
16 | axes.edgecolor: cbcbcb
17 | axes.labelsize: large
18 | axes.axisbelow: true
19 | axes.grid: true
20 | axes.linewidth: 2.0
21 | axes.titlesize: x-large
22 | axes.labelweight: light
23 | 
24 | # Remove small ticks at the labels, not necessary with grid:
25 | xtick.major.size: 0
26 | ytick.major.size: 0
27 | xtick.minor.size: 0
28 | ytick.minor.size: 0
29 | 
30 | # Adds more space between x[0] and y[0] tick labels:
31 | xtick.major.pad: 7
32 | ytick.major.pad: 7
33 | 
34 | patch.edgecolor: f0f0f0
35 | patch.linewidth: 0.5
36 | 
37 | svg.fonttype: path
38 | 
39 | grid.linestyle: -
40 | grid.linewidth: 1.0
41 | grid.color: cbcbcb
42 | 
43 | savefig.edgecolor: f0f0f0
44 | savefig.facecolor: ffffff
45 | savefig.dpi: 300
46 | savefig.bbox: tight
47 | savefig.pad_inches: 0.05
48 | 
49 | figure.subplot.left: 0.00
50 | figure.subplot.right: 1.0
51 | figure.subplot.bottom: 0.00
52 | figure.subplot.top: 0.9
53 | #figure.subplot.wspace  : 0.2    # the amount of width reserved for blank space between subplots
54 | #figure.subplot.hspace  : 0.2    # the amount of height reserved for white space between subplots
55 | 


--------------------------------------------------------------------------------
/analyze/profile/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gz/autoperf/17d5615061392e6fc4ca33e57e689485846e59bb/analyze/profile/__init__.py


--------------------------------------------------------------------------------
/analyze/profile/compare_timeseries.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | """
 3 | Compares two timeseries and looks for differences.
 4 | 
 5 | It does that by summing up the maximas observed for every event on a given
 6 | slice of the time-series. Which leads to a single value (magnitude) per
 7 | observed event.
 8 | Then given two timeseries A and B we compare them by calculating a normalized
 9 | factor A.event / (A.event + B.event) to find values that predominantly trigger
10 | only in A and B.event / (A.event + B.event) to find values that trigger
11 | predominantly in B.
12 | """
13 | 
14 | import os
15 | import sys
16 | import pandas as pd
17 | import numpy as np
18 | import matplotlib
19 | 
20 | 
21 | def timeseries_file(data_directory):
22 |     timeseries_file = os.path.join(data_directory, 'timeseries_avg_nonzero.csv')
23 |     if os.path.exists(timeseries_file):
24 |         return pd.read_csv(timeseries_file, index_col=0, skipinitialspace=True)
25 |     else:
26 |         print("Generating timeseries_avg_nonzero.csv")
27 |         timeseries = util.load_as_X(os.path.join(data_directory, 'results.csv'),
28 |                 aggregate_samples=['mean'], cut_off_nan=True, remove_zero=True)
29 |         timeseries.to_csv(timeseries_file)
30 |         return timeseries
31 | 
32 | def usage(progname):
33 |     print('usage:', progname, '[data_input_dir for A] [data_input_dir for B]')
34 |     sys.exit(0)
35 | 
36 | if __name__ == '__main__':
37 |     sys.path.insert(1, os.path.join(sys.path[0], '..', ".."))
38 |     from analyze import util
39 | 
40 |     if len(sys.argv) > 3:
41 |         usage(sys.argv[0])
42 | 
43 |     dfA = timeseries_file(sys.argv[1])
44 |     dfA = dfA[-15:].sum() # TODO range is hard-coded, adjust
45 |     
46 |     dfB = timeseries_file(sys.argv[2])
47 |     dfB = dfB[-15:].sum() # TODO range is hard-coded, adjust
48 | 
49 |     max_among_both = pd.concat([dfA, dfB]).max(level=0)
50 | 
51 |     normA = (dfA / (dfA + dfB)).dropna()
52 |     normB = (dfB / (dfA + dfB)).dropna()
53 | 
54 |     fmt_string = "{event}: {fraction:.2f} ({absolute1}-{absolute2}={res})"
55 | 
56 | 
57 |     print("Events that predominantly trigger in {} and not in {}\n".format(sys.argv[2], sys.argv[1]))
58 |     print("Event name: Fraction (progB - progA = difference)")
59 |     print("=================================================")
60 |     for (name, val) in normB.sort_values().iteritems():
61 |         if val > 0.95:
62 |             print (fmt_string.format(event=name, fraction=val, absolute1=dfB[name], absolute2=dfA[name], res=dfB[name]-dfA[name]))
63 |     
64 |     print("")
65 |     print("")
66 | 
67 |     print("Events that predominantly trigger in {} and not in {}\n".format(sys.argv[1], sys.argv[2]))
68 |     
69 |     print("Event name: Fraction (progA - progB = difference)")
70 |     print("=================================================")
71 |     for (name, val) in normA.sort_values().iteritems():
72 |         if val > 0.95:
73 |             print (fmt_string.format(event=name, fraction=val, absolute1=dfA[name], absolute2=dfB[name], res=dfA[name]-dfB[name]))
74 | 
75 | 


--------------------------------------------------------------------------------
/analyze/profile/correlation.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | """
 3 | Compute the pairwise correlation for all events in results.csv
 4 | and stores it in correlation_matrix.csv.
 5 | Also generates a heatmap for the computed matrix 
 6 | and stores it in correlation_heatmap.csv.
 7 | """
 8 | 
 9 | import os
10 | import sys
11 | import pandas as pd
12 | import numpy as np
13 | import matplotlib
14 | 
15 | matplotlib.use('Agg')
16 | from matplotlib import pyplot as plt, font_manager
17 | from matplotlib.colors import LinearSegmentedColormap
18 | 
19 | colors = LinearSegmentedColormap.from_list('seismic',
20 |                                            ['#ca0020', '#ffffff', '#2a99d6'])
21 | 
22 | if __name__ == "__main__":
23 |     sys.path.insert(1, os.path.join(sys.path[0], '..', ".."))
24 |     from analyze import util
25 | 
26 | def correlation_matrix(data_directory):
27 |     df = util.load_as_X(os.path.join(data_directory, 'results.csv'), cut_off_nan=True, remove_zero=True)
28 |     correlation_matrix = df.corr()
29 |     # Ensure all values in correlation matrix are valid
30 |     assert not correlation_matrix.isnull().values.any()
31 | 
32 |     correlation_file = os.path.join(data_directory, 'correlation_matrix.csv')
33 |     correlation_matrix.to_csv(correlation_file)
34 |     print("Generated correlation_matrix.csv")
35 | 
36 | def correlation_heatmap(data_directory):
37 |     data_file = os.path.join(data_directory, 'correlation_matrix.csv')
38 |     data = pd.read_csv(data_file, header=0, index_col=0)
39 |     def make_heatmap(plot_output_dir, data):
40 |         plt.style.use([os.path.join(sys.path[0], "..", 'ethplot.mplstyle')])
41 |         fig, ax = plt.subplots()
42 | 
43 |         ax.xaxis.set_visible(False)
44 |         ax.yaxis.set_visible(False)
45 |         plt.xlim(0, data.shape[0])
46 |         plt.ylim(0, data.shape[1])
47 | 
48 |         c = plt.pcolor(data.iloc[::-1], cmap=colors, vmin=-1.0, vmax=1.0)
49 |         colorbar = plt.colorbar(c, ticks=[-1, 0, 1])
50 | 
51 |         #ticks_font = font_manager.FontProperties(family='Decima Mono')
52 |         #plt.setp(colorbar.ax.get_yticklabels(), fontproperties=ticks_font)
53 |         plt.savefig(os.path.join(plot_output_dir, 'correlation_heatmap.png'), format='png')
54 |         print("Generated correlation_heatmap.png")
55 | 
56 |     make_heatmap(data_directory, data)
57 | 
58 | def usage(progname):
59 |     print('usage:', progname, '[data_input_dir]')
60 |     sys.exit(0)
61 | 
62 | if __name__ == '__main__':
63 |     if len(sys.argv) > 2:
64 |         usage(sys.argv[0])
65 |     correlation_matrix(sys.argv[1])
66 |     correlation_heatmap(sys.argv[1])
67 | 
68 | 


--------------------------------------------------------------------------------
/analyze/profile/event_detail.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Display information about a single event.
 5 | """
 6 | 
 7 | import sys
 8 | import os
 9 | import re
10 | import argparse
11 | import pandas as pd
12 | import numpy as np
13 | import matplotlib
14 | matplotlib.use('Agg')
15 | 
16 | from matplotlib import pyplot as plt, font_manager
17 | plt.style.use([os.path.join(sys.path[0], '..', 'ethplot.mplstyle')])
18 | 
19 | if __name__ == "__main__":
20 |     sys.path.insert(1, os.path.join(sys.path[0], '..', ".."))
21 |     from analyze import util
22 | 
23 | def plot_events(df, features, filename, output_dir, title=None):
24 |     fig = plt.figure()
25 |     if title:
26 |         fig.suptitle(title)
27 | 
28 |     ax1 = fig.add_subplot(1, 1, 1)
29 |     ax1.set_xlabel('Time [s]')
30 |     ax1.set_ylabel('Events observed [count]')
31 |     ax1.spines['top'].set_visible(False)
32 |     ax1.spines['right'].set_visible(False)
33 |     ax1.get_xaxis().tick_bottom()
34 |     ax1.get_yaxis().tick_left()
35 | 
36 |     for feature in features:
37 |         ax1.plot(df[feature], label=feature)
38 | 
39 |     ax1.xaxis.set_ticks(np.arange(0, len(df), 4))
40 | 
41 |     val, labels = plt.xticks()
42 |     plt.xticks(val, ["{}".format(x / 4) for x in val])
43 | 
44 |     ax1.set_ylim(ymin=0.0)
45 |     ax1.legend(loc='best', prop={'size': 8})
46 | 
47 |     plt.savefig(os.path.join(output_dir, filename  + ".png"), format='png')
48 |     plt.clf()
49 |     plt.close()
50 |     print("Generated file {}".format(filename + ".png"))
51 | 
52 | 
53 | def make_plot(from_directory, features):
54 |     df = util.load_as_X(os.path.join(from_directory, 'results.csv'), aggregate_samples = ['mean', 'std', 'max', 'min'], cut_off_nan=True)
55 |     filename = "perf_event_plot_{}".format("_".join(features))
56 |     plot_events(df, features, filename, from_directory)
57 | 
58 | if __name__ == '__main__':
59 |     pd.set_option('display.max_rows', 37)
60 |     pd.set_option('display.max_columns', 15)
61 |     pd.set_option('display.width', 200)
62 | 
63 |     parser = argparse.ArgumentParser(description="Plot an event counter.")
64 |     parser.add_argument('--resultdir', dest='dir', type=str, help="Result directory of the profile run.", required=True)
65 |     parser.add_argument('--features', dest='features', nargs='+', type=str, help="Which events to plot (add 'AVG.', 'STD.', 'MAX.' or 'MIN.' in front of the event name)", required=True)
66 |     args = parser.parse_args()
67 | 
68 |     make_plot(args.dir, args.features)
69 | 


--------------------------------------------------------------------------------
/analyze/profile/stats.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Gathers and prints some information about the results.csv file from a profile run.
 5 | Intended for quick sanity checking of the results.
 6 | """
 7 | 
 8 | import sys, os
 9 | import pandas as pd
10 | 
11 | from ascii_graph import Pyasciigraph
12 | 
13 | sys.path.insert(1, os.path.join(os.path.realpath(os.path.split(__file__)[0]), '..', ".."))
14 | from analyze.util import get_all_zero_events
15 | 
16 | def histogram(L):
17 |     d = {}
18 |     for x in L:
19 |         if x in d:
20 |             d[x] += 1
21 |         else:
22 |             d[x] = 1
23 |     return d
24 | 
25 | 
26 | def yield_cpu_sample_lengths(df):
27 |     for idx in df.index.unique():
28 |         if not idx.startswith("uncore_"):
29 |             yield len(df.loc[[idx], 'SAMPLE_VALUE'])
30 | 
31 | def yield_uncore_sample_lengths(df):
32 |     for idx in df.index.unique():
33 |         if idx.startswith("uncore_"):
34 |             yield len(df.loc[[idx], 'SAMPLE_VALUE'])
35 | 
36 | def samples_histogram(df, lengths_fn):
37 |     lengths = histogram(lengths_fn(df))
38 |     data = []
39 |     for key, value in lengths.items():
40 |         data.append( ("%d samples" % key, value) )
41 |     data = sorted(data, key=lambda x: x[1])
42 |     return data
43 | 
44 | if __name__ == '__main__':
45 |     data_directory = sys.argv[1]
46 |     df = pd.read_csv(os.path.join(data_directory, 'results.csv'), index_col=0, skipinitialspace=True)
47 | 
48 |     all_events = df.index.unique()
49 |     all_zero = get_all_zero_events(df)
50 | 
51 |     print("Total Events measured:", len(all_events))
52 |     title = "List of event samples that reported only zeroes (%d / %d):" % (len(all_zero), len(all_events))
53 |     print('\n  - '.join([title] + all_zero))
54 |     df = df.drop(all_zero)
55 | 
56 |     # Sample histogram
57 |     graph = Pyasciigraph()
58 |     for line in graph.graph('Recorded CPU samples histogram:', samples_histogram(df, yield_cpu_sample_lengths)):
59 |         print(line)
60 | 
61 |     graph = Pyasciigraph()
62 |     for line in graph.graph('Recorded uncore samples histogram:', samples_histogram(df, yield_uncore_sample_lengths)):
63 |         print(line)
64 | 
65 |     print("The 25 events with fewest samples are:")
66 |     for idx in sorted(df.index.unique(), key=lambda x: len(df.loc[[x], 'SAMPLE_VALUE']))[:25]:
67 |         print(idx, ":", len(df.loc[[idx], 'SAMPLE_VALUE']), "samples")
68 | 


--------------------------------------------------------------------------------
/analyze/profile/timeseries.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | """
 3 | Compute the timeseries data as a CSV file.
 4 | """
 5 | 
 6 | import os
 7 | import sys
 8 | import pandas as pd
 9 | import numpy as np
10 | import matplotlib
11 | 
12 | 
13 | if __name__ == "__main__":
14 |     sys.path.insert(1, os.path.join(sys.path[0], '..', ".."))
15 |     from analyze import util
16 | 
17 | def timeseries_file(data_directory):
18 |     timeseries = util.load_as_X(os.path.join(data_directory, 'results.csv'), aggregate_samples = ['mean', 'std', 'max', 'min'], cut_off_nan=True)
19 |     timeseries_file = os.path.join(data_directory, 'timeseries.csv')
20 |     timeseries.to_csv(timeseries_file)
21 |     print("Generated timeseries.csv")
22 | 
23 | def usage(progname):
24 |     print('usage:', progname, '[data_input_dir]')
25 |     sys.exit(0)
26 | 
27 | if __name__ == '__main__':
28 |     if len(sys.argv) > 2:
29 |         usage(sys.argv[0])
30 |     timeseries_file(sys.argv[1])
31 | 
32 | 


--------------------------------------------------------------------------------
/analyze/util.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | import pandas as pd
  3 | import numpy as np
  4 | 
  5 | READ_BANK_EVENTS = ["UNC_M_RD_CAS_RANK{}.BANK{}".format(i,j) for i in range(0,8) for j in range(0,8) ]
  6 | WRITE_BANK_EVENTS = ["UNC_M_WR_CAS_RANK{}.BANK{}".format(i,j) for i in range(0,8) for j in range(0,8) ]
  7 | 
  8 | def merge_bank_rank_events(df, minmax=False):
  9 |     matrix = pd.DataFrame(df)
 10 |     matrix.reset_index(inplace=True)
 11 |     pivot_table = matrix.pivot(index='INDEX', columns='EVENT_NAME', values='SAMPLE_VALUE')
 12 |     df = pivot_table
 13 | 
 14 |     read_rank_banks = pd.DataFrame()
 15 |     for e in READ_BANK_EVENTS:
 16 |         read_rank_banks[e] = df[e]
 17 | 
 18 |     write_rank_banks = pd.DataFrame()
 19 |     for e in WRITE_BANK_EVENTS:
 20 |         write_rank_banks[e] = df[e]
 21 | 
 22 |     merged_banks = pd.DataFrame()
 23 |     merged_banks['SUM.UNC_M_RD_CAS.*'] = read_rank_banks.sum(axis=1)
 24 |     merged_banks['STD.UNC_M_RD_CAS.*'] = read_rank_banks.std(axis=1, ddof=0)
 25 |     merged_banks['SUM.UNC_M_WR_CAS.*'] = write_rank_banks.sum(axis=1)
 26 |     merged_banks['STD.UNC_M_WR_CAS.*'] = write_rank_banks.std(axis=1, ddof=0)
 27 |     if minmax:
 28 |         merged_banks['MAX.UNC_M_WR_CAS.*'] = write_rank_banks.max(axis=1)
 29 |         merged_banks['MIN.UNC_M_WR_CAS.*'] = write_rank_banks.min(axis=1)
 30 |         merged_banks['MAX.UNC_M_RD_CAS.*'] = read_rank_banks.max(axis=1)
 31 |         merged_banks['MIN.UNC_M_RD_CAS.*'] = read_rank_banks.min(axis=1)
 32 |     #print(merged_banks)
 33 |     return merged_banks
 34 | 
 35 | def add_metrics(df):
 36 |     matrix = pd.DataFrame(df)
 37 |     matrix.reset_index(inplace=True)
 38 |     pivot_table = matrix.pivot(index='INDEX', columns='EVENT_NAME', values='SAMPLE_VALUE')
 39 |     df = pivot_table
 40 | 
 41 |     metrics = pd.DataFrame()
 42 |     metrics['ENG.IPC'] = pivot_table['INST_RETIRED.ANY_P'] / pivot_table['CPU_CLK_UNHALTED.THREAD_P_ANY']
 43 |     metrics['ENG.DSB_SWITCHES'] = pivot_table['DSB2MITE_SWITCHES.PENALTY_CYCLES'] / pivot_table['CPU_CLK_UNHALTED.THREAD_P_ANY']
 44 |     metrics['ENG.MS_SWITCHES'] = 3 * pivot_table['IDQ.MS_SWITCHES'] / pivot_table['CPU_CLK_UNHALTED.THREAD_P_ANY']
 45 |     metrics['ENG.L2_BOUND'] = (pivot_table['CYCLE_ACTIVITY.STALLS_L1D_PENDING'] - pivot_table['CYCLE_ACTIVITY.STALLS_L2_PENDING']) / pivot_table['CPU_CLK_UNHALTED.THREAD_P_ANY']
 46 |     metrics['ENG.L3_HIT_FRACTION'] = pivot_table['MEM_LOAD_UOPS_RETIRED.LLC_HIT'] / (pivot_table['MEM_LOAD_UOPS_RETIRED.LLC_HIT']+7*pivot_table['MEM_LOAD_UOPS_RETIRED.LLC_MISS'])
 47 |     metrics['ENG.L3_BOUND'] = (metrics['ENG.L3_HIT_FRACTION'] * pivot_table['CYCLE_ACTIVITY.STALLS_L2_PENDING']) / pivot_table['CPU_CLK_UNHALTED.THREAD_P_ANY']
 48 |     metrics['ENG.MEM_BOUND'] = ((1 - metrics['ENG.L3_HIT_FRACTION']) * pivot_table['CYCLE_ACTIVITY.STALLS_L2_PENDING']) / pivot_table['CPU_CLK_UNHALTED.THREAD_P_ANY']
 49 |     #metrics['ENG.STALLS_MEM_ANY'] = pd.concat(pivot_table['CPU_CLK_UNHALTED.THREAD'], pivot_table['CYCLE_ACTIVITY.STALLS_L1D_PENDING']).min(axis=1)
 50 |     #metrics['ENG.STORES_BOUND'] = (pivot_table['RESOURCE_STALLS.SB'] - metrics['ENG.STALLS_MEM_ANY']) / pivot_table['CPU_CLK_UNHALTED.THREAD_P_ANY']
 51 | 
 52 |     return metrics
 53 | 
 54 | def aggregation_matrix(prefix, series, drop_bank_events=False):
 55 |     matrix = pd.DataFrame(series)
 56 |     matrix.reset_index(inplace=True)
 57 |     pivot_table = matrix.pivot(index='INDEX', columns='EVENT_NAME', values='SAMPLE_VALUE')
 58 |     if drop_bank_events:
 59 |         pivot_table.drop(READ_BANK_EVENTS, axis=1, inplace=True)
 60 |         pivot_table.drop(WRITE_BANK_EVENTS, axis=1, inplace=True)
 61 | 
 62 |     pivot_table.rename(columns=lambda x: "{}.{}".format(prefix, x), inplace=True)
 63 |     return pivot_table
 64 | 
 65 | def load_as_X(f, aggregate_samples=['mean'], remove_zero=False, cut_off_nan=True):
 66 |     """
 67 |     Transform CSV file into a matrix X (used for most ML inputs).
 68 |     The rows will be different times, the columns are the events.
 69 | 
 70 |     Keyword arguments:
 71 |     aggregate_samples -- Aggregate samples from all CPUs at time t.
 72 |     remove_zero -- Remove features that are all zero.
 73 |     cut_off_nan -- Remove everything after first NaN value is encountered.
 74 |     """
 75 |     # Parse file
 76 |     raw_data = pd.read_csv(f, sep=',', skipinitialspace=True)
 77 |     raw_data.set_index(['EVENT_NAME'], inplace=True)
 78 |     raw_data.sort_index(inplace=True)
 79 | 
 80 |     # Remove events whose deltas are all 0:
 81 |     if remove_zero:
 82 |         raw_data = raw_data.drop(get_all_zero_events(raw_data))
 83 | 
 84 |     # Convert time
 85 |     time_to_ms(raw_data)
 86 | 
 87 |     # Aggregate all event samples from the same event at time
 88 |     aggregates = []
 89 |     drop_bank_events = 'rbmerge' in aggregate_samples or 'rbmerge2' in aggregate_samples or 'rbdrop' in aggregate_samples
 90 | 
 91 |     start_at = 0
 92 |     if aggregate_samples:
 93 |         grouped_df = raw_data.groupby(['EVENT_NAME', 'INDEX'])
 94 |         grouped_df_multiple = grouped_df.filter(lambda x: len(x) > 1).groupby(['EVENT_NAME', 'INDEX'])
 95 |         for agg in aggregate_samples:
 96 |             if agg == 'mean':
 97 |                 series = grouped_df['SAMPLE_VALUE'].mean()
 98 |                 aggregates.append(aggregation_matrix('AVG', series, drop_bank_events=drop_bank_events))
 99 |             elif agg == 'std':
100 |                 series = grouped_df_multiple['SAMPLE_VALUE'].std(ddof=0)
101 |                 matrix = aggregation_matrix('STD', series, drop_bank_events=drop_bank_events)
102 |                 aggregates.append(matrix)
103 |             elif agg == 'max':
104 |                 series = grouped_df_multiple['SAMPLE_VALUE'].max()
105 |                 aggregates.append(aggregation_matrix('MAX', series, drop_bank_events=drop_bank_events))
106 |             elif agg == 'min':
107 |                 series = grouped_df_multiple['SAMPLE_VALUE'].min()
108 |                 aggregates.append(aggregation_matrix('MIN', series, drop_bank_events=drop_bank_events))
109 |             elif agg == 'rbmerge':
110 |                 series = grouped_df['SAMPLE_VALUE'].mean()
111 |                 aggregates.append(merge_bank_rank_events(series))
112 |             elif agg == 'rbmerge2':
113 |                 series = grouped_df['SAMPLE_VALUE'].mean()
114 |                 aggregates.append(merge_bank_rank_events(series, minmax=True))
115 |             elif agg == 'cut1':
116 |                 start_at = 1
117 |             elif agg == 'cut2':
118 |                 start_at = 2
119 |             elif agg == 'cut4':
120 |                 start_at = 4
121 |             elif agg == 'rbdrop':
122 |                 pass
123 |             elif agg == 'metrics':
124 |                 series = grouped_df['SAMPLE_VALUE'].mean()
125 |                 aggregates.append(add_metrics(series))
126 |             else:
127 |                 assert "Unknown aggregation: {}. Supported are: [mean, std, max, min, rbmerge, cut1, cut2, cut4].".format(agg)
128 |     df = pd.concat(aggregates, axis=1)
129 | 
130 |     # Cut off everything after first row with a NaN value
131 |     if cut_off_nan:
132 |         min_idx = minimum_nan_index(df)
133 |         throw_away = df.shape[0]-min_idx
134 |         if throw_away > df.shape[0] * (0.20):
135 |             print("Throwing away {} out of {} samples for {}".format(throw_away, df.shape[0], f))
136 |         df = df[:min_idx]
137 | 
138 |     if "merge4" in aggregate_samples:
139 |         # Aggregate 4 rows to get 1sec sampling time
140 |         df.reset_index(inplace=True)
141 |         df['MergeLabel'] = pd.Series([ math.ceil(i / 4.0) for i in range(1, len(df)+1) ])
142 |         df = df.groupby(['MergeLabel']).sum()
143 | 
144 |     if "merge2" in aggregate_samples:
145 |         # Aggregate 2 rows to get 0.5sec sampling time
146 |         df.reset_index(inplace=True)
147 |         df['MergeLabel'] = pd.Series([ math.ceil(i / 2.0) for i in range(1, len(df)+1) ])
148 |         df = df.groupby(['MergeLabel']).sum()
149 | 
150 |     return df[start_at:]
151 | 
152 | 
153 | def minimum_nan_index(df):
154 |     """
155 |     Return the earliest index that contains NaN over all columns or None
156 |     if there are no NaN values in any columns.
157 | 
158 |     # Example
159 |     For the following matrix it returns 1 as (1,1) is NaN:
160 |     idx | EVENT1   EVENT2  EVENT3 .... EVENTN
161 |       0 |     12        9       5          12
162 |       1 |      1      NaN       2           5
163 |       2 |      0      NaN     100          12
164 |       3 |      0      NaN       1          99
165 |     """
166 |     nans = pd.isnull(df).any(1).to_numpy().nonzero()[0]
167 |     if len(nans) == 0:
168 |         return df.shape[0]
169 |     else:
170 |         return min(nans)
171 | 
172 | def get_zero_features_in_matrix(df):
173 |     """
174 |     Given a pandas DataFrame loaded from a matrix_X*.csv file,
175 |     return all columns (features) where the values are always zero.
176 |     """
177 |     zero_events = []
178 |     for col in df:
179 |         if not df[col].any():
180 |             # col.split(".", 1)[1] for getting event name
181 |             zero_events.append(col)
182 |     return zero_events
183 | 
184 | def get_all_zero_events(df):
185 |     """
186 |     Given a pandas DataFrame loaded from a results.csv file,
187 |     return all event names where the counts are always 0
188 |     """
189 |     event_names = []
190 |     for idx in df.index.unique():
191 |         if df.loc[idx, 'SAMPLE_VALUE'].sum() == 0:
192 |             event_names.append(idx)
193 |     return event_names
194 | 
195 | def time_to_ms(df):
196 |     """
197 |     Transforn the perf time (floating point, seconds)
198 |     to miliseconds (absolute numbers)
199 |     """
200 |     df['TIME'] = df['TIME'].map(lambda x: int(x * 1000))
201 | 


--------------------------------------------------------------------------------
/doc/correlation_heatmap.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gz/autoperf/17d5615061392e6fc4ca33e57e689485846e59bb/doc/correlation_heatmap.png


--------------------------------------------------------------------------------
/doc/counters_vs_events.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gz/autoperf/17d5615061392e6fc4ca33e57e689485846e59bb/doc/counters_vs_events.png


--------------------------------------------------------------------------------
/doc/manual.md:
--------------------------------------------------------------------------------
 1 | # autoperf
 2 | 
 3 | User manual, currently still under construction.
 4 | 
 5 | ## profile -- measure all the things
 6 | 
 7 | ## aggregate -- combine results
 8 | 
 9 | ## stats -- generate some stats about all events
10 | 
11 | ## search -- finding undocumented events
12 | 
13 | ## pair -- profiling pairwise combinations of programs
14 | 
15 | A more advanced feature is the pairwise instrumentation of programs.
16 | Say you have a set of programs and you want to study their pairwise 
17 | interactions with each other. You would first define a manifest like this:
18 | 
19 | ```
20 | [experiment]
21 | configurations = ["L3-SMT", "L3-SMT-cores"]
22 | 
23 | [programA]
24 | name = "gcc"
25 | binary = "gcc"
26 | arguments = ["-j", "4", "out.c", "-o", "out"]
27 | 
28 | [programB]
29 | name = "objdump"
30 | binary = "objdump"
31 | arguments = ["--disassemble", "/bin/true"]
32 | 
33 | [programC]
34 | name = "cat"
35 | binary = "cat"
36 | arguments = ["/var/log/messages"]
37 | env = { LC_ALL = "C" }
38 | use_watch_repeat = true
39 | ```
40 | 
41 | After saving this as a file called `manifest.toml` in a folder called
42 | `pairings` you could call `autoperf` with the following arguments:
43 | 
44 | ```
45 | $ autoperf pair ./pairings
46 | ```
47 | 
48 | This essentially does what the profile command does, but for every individual
49 | program defined in the manifest. In addition, it does even more profile
50 | commands for programA while continously running programB or programC in the
51 | background (once this is done it does the same for programB and programC).
52 | 
53 | If this is confusing and you want to get first hand experience of what we would
54 | really be running here you can also pass the `-d` argument to the pair
55 | sub-command. In this case, autoperf just prints a plan of what it would be
56 | doing, rather than launching any programs.
57 | 
58 | ### Manifest settings
59 | 
60 | The manifest format has a few configuration parameters. A full manifest file with
61 | all possible configurations and documentation in the comments is shown in 
62 | `./tests/pair/manifest.toml`.
63 | 
64 | * **configuration** is a list of possible mappings of the program to cores:
65 |   * L1-SMT: Programs are placed on a single core, each gets one hyper-thread.
66 |   * L3-SMT: Programs are placed on a single socket, applications each gets one hyper-thread interleaved (i.e., cores are shared between apps).
67 |   * L3-SMT-cores: Programs are placed on a single socket, applications get a full core (i.e., hyper-threads are not shared between apps).
68 |   * L3-cores: Programs are placed on a single socket, use a core per application but leave the other hyper-thread idle.
69 |   * Full-L3: Use the whole machine, program allocated an entire L3/socket, program threads allocate an entire core (hyper-threads are left idle).
70 |   * Full-SMT-L3: Use the whole machines, programs allocate an entire L3/socket (use hyper-threads).
71 |   * Full-cores: Use the whole machine, programs use cores from all sockets interleaved (hyper-threads are left idle).
72 |   * Full-SMT-cores: Use the whole machine, programs use cores from all sockets interleaved (hyper-threads are used).
73 | 


--------------------------------------------------------------------------------
/doc/perf_event_plot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gz/autoperf/17d5615061392e6fc4ca33e57e689485846e59bb/doc/perf_event_plot.png


--------------------------------------------------------------------------------
/doc/timeseries.csv:
--------------------------------------------------------------------------------
1 | INDEX,AVG.ARITH.DIVIDER_ACTIVE,AVG.BACLEARS.ANY,AVG.BR_INST_RETIRED.ALL_BRANCHES,AVG.BR_INST_RETIRED.ALL_BRANCHES_PEBS,AVG.BR_MISP_RETIRED.ALL_BRANCHES,AVG.BR_MISP_RETIRED.ALL_BRANCHES_PEBS,AVG.CORE_POWER.LVL0_TURBO_LICENSE,AVG.CORE_POWER.LVL1_TURBO_LICENSE,AVG.CORE_POWER.LVL2_TURBO_LICENSE,AVG.CORE_POWER.THROTTLE,AVG.CORE_SNOOP_RESPONSE.RSP_IFWDFE,AVG.CORE_SNOOP_RESPONSE.RSP_IFWDM,AVG.CORE_SNOOP_RESPONSE.RSP_IHITFSE,AVG.CORE_SNOOP_RESPONSE.RSP_IHITI,AVG.CORE_SNOOP_RESPONSE.RSP_SFWDFE,AVG.CORE_SNOOP_RESPONSE.RSP_SFWDM,AVG.CORE_SNOOP_RESPONSE.RSP_SHITFSE
2 | 0,5727.107143,440.3035714,8074.25,6999.678571,214.0892857,2807.660714,245537.875,0,0,0,25.07142857,43.85714286,41.28571429,84.51785714,4.589285714,19.69642857,0.017857143
3 | 1,248.8392857,489.3214286,5429.589286,112373.8214,299.6607143,249.0714286,758765.2857,0,0,2828.75,16.46428571,37.67857143,39.89285714,328.3035714,0.464285714,15.57142857,0.660714286
4 | 2,349.6964286,1042.678571,4930.071429,6325.285714,267.5178571,241.0714286,420077.875,0,0,575.5,11.78571429,58.64285714,32.69642857,2207.553571,2.75,21.28571429,0
5 | 3,187.5357143,955.8392857,4236.589286,13601.94643,3865.125,299.75,359782.9107,0,0,1371.857143,11.71428571,1783.821429,1780.035714,19.21428571,0.928571429,16.01785714,2.25
6 | 4,279.75,2824.25,114628.8036,4538.732143,206.5892857,349.9285714,387673.6964,0,0,1884.25,147.3035714,36.08928571,27.23214286,17.98214286,364.7321429,1095.642857,0
7 | 5,201.3214286,664.4285714,7428.482143,5686.553571,295.1071429,234.8214286,1428578.696,0,0,2118.392857,763.2678571,1215.232143,26.19642857,10.51785714,0.607142857,21.83928571,0.107142857
8 | 6,4375.625,668.3571429,11369.17857,4829.142857,546.6428571,2416.357143,284930.3929,0,0,1992.5,12.64285714,34.23214286,29.78571429,11.96428571,1.678571429,16.58928571,0
9 | 7,191.5178571,799.375,5276.285714,113554.2857,626.6607143,260.5892857,294178.4107,0,0,2813.214286,14.875,39.78571429,27.64285714,8.071428571,2.517857143,15.69642857,0.071428571


--------------------------------------------------------------------------------
/src/aggregate.rs:
--------------------------------------------------------------------------------
  1 | use csv;
  2 | use log::*;
  3 | use std::collections::HashMap;
  4 | use std::collections::HashSet;
  5 | use std::fs;
  6 | use std::fs::{File, Metadata};
  7 | use std::io;
  8 | use std::io::prelude::*;
  9 | use std::path::Path;
 10 | use std::path::PathBuf;
 11 | use std::process;
 12 | use std::str::FromStr;
 13 | use toml;
 14 | 
 15 | use crate::util::*;
 16 | 
 17 | use perfcnt::linux::perf_file::PerfFile;
 18 | use perfcnt::linux::perf_format::{EventData, EventDesc, EventType};
 19 | 
 20 | // I have no idea if the perf format guarantees that events appear always in the same order :S
 21 | fn verify_events_in_order(events: &Vec<EventDesc>, values: &Vec<(u64, Option<u64>)>) -> bool {
 22 |     for (idx, v) in values.iter().enumerate() {
 23 |         // Don't have id's we can't veryify anything
 24 |         if v.1.is_none() {
 25 |             warn!(
 26 |                 "Don't have IDs with the sample values, so we can't tell which event a sample \
 27 |                  belongs to."
 28 |             );
 29 |             return true;
 30 |         }
 31 | 
 32 |         let id: u64 = v.1.unwrap_or(0);
 33 |         if !events.get(idx).map_or(false, |ev| ev.ids.contains(&id)) {
 34 |             return false;
 35 |         }
 36 |     }
 37 | 
 38 |     return true;
 39 | }
 40 | 
 41 | /// Extracts the perf stat file and writes it to a CSV file that looks like this:
 42 | /// "EVENT_NAME", "TIME", "SOCKET", "CORE", "CPU", "NODE", "UNIT", "SAMPLE_VALUE"
 43 | fn parse_perf_csv_file(
 44 |     mt: &MachineTopology,
 45 |     cpus: &Vec<&CpuInfo>,
 46 |     cpu_filter: Filter,
 47 |     sockets: &Vec<Socket>,
 48 |     breakpoints: &Vec<String>,
 49 |     path: &Path,
 50 |     writer: &mut csv::Writer<File>,
 51 | ) -> io::Result<()> {
 52 |     // Check if it's a file:
 53 |     let meta: Metadata = fs::metadata(path)?;
 54 |     if !meta.file_type().is_file() {
 55 |         error!("Not a file {:?}", path);
 56 |     }
 57 | 
 58 |     let mut erronous_events: HashMap<String, bool> = HashMap::new();
 59 |     type OutputRow = (String, String, Socket, Core, Cpu, Node, String, u64);
 60 |     let mut parsed_rows: Vec<OutputRow> = Vec::with_capacity(5000);
 61 | 
 62 |     // All the sockets this program is running on:
 63 |     let mut all_sockets: Vec<Socket> = cpus.iter().map(|c| c.socket).collect();
 64 |     all_sockets.sort();
 65 |     all_sockets.dedup();
 66 | 
 67 |     // Timestamps for filtering start and end:
 68 |     let mut start: Option<f64> = None;
 69 |     let mut end: Option<f64> = None;
 70 | 
 71 |     let mut rdr = csv::Reader::from_file(path)
 72 |         .unwrap()
 73 |         .has_headers(false)
 74 |         .delimiter(b';')
 75 |         .flexible(true);
 76 |     for record in rdr.decode() {
 77 |         if record.is_ok() {
 78 |             type SourceRow = (f64, String, String, String, String, String, f64);
 79 |             let (time, cpu, value_string, _, event, _, percent): SourceRow =
 80 |                 record.expect("Should not happen (in is_ok() branch)!");
 81 | 
 82 |             // Perf will just report first CPU on the socket for uncore events,
 83 |             // so we temporarily encode the location in the event name and
 84 |             // extract it here again:
 85 |             let (unit, event_name) = if !event.starts_with("uncore_") {
 86 |                 // Normal case, we just take the regular event and cpu fields from perf stat
 87 |                 (String::from("cpu"), String::from(event.trim()))
 88 |             } else {
 89 |                 // Uncore events, use first part of the event name as the location
 90 |                 let (unit, name) = event.split_at(event.find(".").unwrap());
 91 |                 (
 92 |                     String::from(unit),
 93 |                     String::from(name.trim_start_matches(".").trim()),
 94 |                 )
 95 |             };
 96 | 
 97 |             if erronous_events.contains_key(&event_name) {
 98 |                 // Skip already reported, bad events
 99 |                 continue;
100 |             }
101 | 
102 |             if !cpu.starts_with("CPU") {
103 |                 error!(
104 |                     "{:?}: Unkown CPU value {}, skipping this row.",
105 |                     path.as_os_str(),
106 |                     cpu
107 |                 );
108 |                 continue;
109 |             }
110 | 
111 |             let cpu_nr = match u64::from_str(&cpu[3..].trim()) {
112 |                 Ok(v) => v,
113 |                 Err(_e) => {
114 |                     error!(
115 |                         "{:?}: CPU value is not a number '{}', skipping this row.",
116 |                         path.as_os_str(),
117 |                         cpu
118 |                     );
119 |                     continue;
120 |                 }
121 |             };
122 |             let cpuinfo: &CpuInfo = mt
123 |                 .cpu(cpu_nr)
124 |                 .expect("Invalid CPU number (check run.toml or lspcu.csv)");
125 | 
126 |             if value_string.trim() == "<not counted>" {
127 |                 warn!(
128 |                     "{:?}: Event '{}' was not counted. This is a bug, please report it!",
129 |                     path.as_os_str(),
130 |                     event_name
131 |                 );
132 |                 erronous_events.insert(event_name.clone(), true);
133 |                 continue;
134 |             }
135 |             if value_string.trim() == "<not supported>" {
136 |                 warn!(
137 |                     "{:?}: Event '{}' was not measured correctly with perf. This is a bug, please report it!",
138 |                     path.as_os_str(),
139 |                     event_name
140 |                 );
141 |                 erronous_events.insert(event_name.clone(), true);
142 |                 continue;
143 |             }
144 |             if percent < 91.0 {
145 |                 error!(
146 |                     "{:?}: has multiplexed event '{}'. This is a bug, please report it!",
147 |                     path.as_os_str(),
148 |                     event_name
149 |                 );
150 |                 erronous_events.insert(event_name.clone(), true);
151 |                 continue;
152 |             }
153 | 
154 |             let value = u64::from_str(value_string.trim()).expect(
155 |                 format!("Parsed string '{}' should be a value by now!", value_string).as_str(),
156 |             );
157 | 
158 |             if breakpoints.len() >= 1
159 |                 && value == 1
160 |                 && event_name.ends_with(breakpoints[0].as_str())
161 |                 && cpus.iter().any(|c| c.cpu == cpu_nr)
162 |             {
163 |                 if start.is_some() {
164 |                     error!(
165 |                         "{:?}: Start breakpoint ({:?}) triggered multiple times.",
166 |                         path.as_os_str(),
167 |                         breakpoints[0]
168 |                     );
169 |                 }
170 |                 start = Some(time)
171 |             }
172 |             if breakpoints.len() >= 2
173 |                 && value == 1
174 |                 && event_name.ends_with(breakpoints[1].as_str())
175 |                 && cpus.iter().any(|c| c.cpu == cpu_nr)
176 |             {
177 |                 if end.is_some() {
178 |                     warn!(
179 |                         "{:?}: End breakpoint ({:?}) triggered multiple times. Update end \
180 |                          breakpoint.",
181 |                         path.as_os_str(),
182 |                         breakpoints[1]
183 |                     );
184 |                 }
185 |                 end = Some(time)
186 |             }
187 | 
188 |             parsed_rows.push((
189 |                 event_name,
190 |                 time.to_string(),
191 |                 cpuinfo.socket,
192 |                 cpuinfo.core,
193 |                 cpu_nr,
194 |                 cpuinfo.node.node,
195 |                 unit,
196 |                 value,
197 |             ));
198 |         } else {
199 |             // Ignore lines that start with # (comments) but fail in case another
200 |             // line can not be parsed:
201 |             match record.unwrap_err() {
202 |                 csv::Error::Decode(s) => {
203 |                     if !s.starts_with("Failed converting '#") {
204 |                         panic!("Can't decode line {}.", s)
205 |                     }
206 |                 }
207 |                 e => panic!("Unrecoverable error {} while decoding.", e),
208 |             };
209 |         }
210 |     }
211 |     if breakpoints.len() >= 1 && start.is_none() {
212 |         error!(
213 |             "{:?}: We did not find a trigger for start breakpoint ({:?})",
214 |             path.as_os_str(),
215 |             breakpoints[0]
216 |         );
217 |     }
218 |     if breakpoints.len() == 2 && end.is_none() {
219 |         warn!(
220 |             "{:?}: We did not find a trigger for end breakpoint ({:?})",
221 |             path.as_os_str(),
222 |             breakpoints[1]
223 |         );
224 |     }
225 |     if breakpoints.len() == 2 && end.is_some() && start.is_some() {
226 |         let start_s = start.unwrap_or(0.0);
227 |         let end_s = end.unwrap_or(0.0);
228 |         if end_s <= start_s {
229 |             error!(
230 |                 "{:?}: End breakpoint is before start breakpoint ({:?} -- {:?})",
231 |                 path.as_os_str(),
232 |                 start,
233 |                 end
234 |             );
235 |         } else if (end_s - start_s) < 1.0 {
236 |             warn!("Region of interest very short ({} s)", end_s - start_s);
237 |         }
238 |     }
239 | 
240 |     let mut current_index = 0;
241 |     let mut time_to_index: HashMap<String, usize> = HashMap::new();
242 |     let mut is_recording: bool = start.is_none();
243 |     let start = start.map(|s| s.to_string());
244 |     let end = end.map(|s| s.to_string());
245 |     for r in parsed_rows {
246 |         let (event_name, time, socket, core, cpu, node, unit, value): OutputRow = r;
247 | 
248 |         // Skip all events before we have the breakpoint
249 |         is_recording = match start {
250 |             Some(ref start_time) => is_recording || time == start_time.as_str(),
251 |             None => true,
252 |         };
253 |         is_recording = match end {
254 |             Some(ref end_time) => is_recording && time != end_time.as_str(),
255 |             None => true,
256 |         };
257 |         if !is_recording {
258 |             continue;
259 |         }
260 | 
261 |         if erronous_events.contains_key(&event_name) {
262 |             // We do two passes here because we may get an erronous event only
263 |             // at a later point in time in the CSV file
264 |             // (when we already parsed this event a few times)
265 |             continue;
266 |         }
267 | 
268 |         if breakpoints.len() > 2
269 |             && (event_name.contains(breakpoints[0].as_str())
270 |                 || event_name.contains(breakpoints[1].as_str()))
271 |         {
272 |             // We don't need to breakpoints in the resulting CSV file
273 |             continue;
274 |         }
275 | 
276 |         // Skip all events that we don't want to attribute fully to our program:
277 |         let include = if unit.trim() == "cpu" {
278 |             match cpu_filter {
279 |                 Filter::All => true,
280 |                 Filter::Exclusive => cpus.iter().any(|c| c.cpu == cpu),
281 |                 Filter::Shared => all_sockets.contains(&socket),
282 |                 Filter::None => false,
283 |             }
284 |         } else if unit.starts_with("uncore") {
285 |             sockets.contains(&socket)
286 |         } else {
287 |             error!("Unkown unit '{}', not included!", unit);
288 |             false
289 |         };
290 | 
291 |         if !include {
292 |             // Skip this event
293 |             continue;
294 |         }
295 | 
296 |         if !time_to_index.contains_key(&time) {
297 |             time_to_index.insert(time.clone(), current_index);
298 |             current_index += 1;
299 |         }
300 | 
301 |         writer
302 |             .encode(&[
303 |                 event_name.as_str(),
304 |                 format!("{}", *time_to_index.get(&time).unwrap()).as_str(),
305 |                 time.as_str(),
306 |                 socket.to_string().as_str(),
307 |                 core.to_string().as_str(),
308 |                 cpu.to_string().as_str(),
309 |                 node.to_string().as_str(),
310 |                 unit.as_str(),
311 |                 value.to_string().as_str(),
312 |             ])
313 |             .unwrap();
314 |     }
315 | 
316 |     Ok(())
317 | }
318 | 
319 | /// Extracts the data and writes it to a CSV file that looks like this:
320 | /// "EVENT_NAME", "INDEX", "TIME", "SOCKET", "CORE", "CPU", "NODE", "UNIT", "SAMPLE_VALUE"
321 | fn parse_perf_file(
322 |     path: &Path,
323 |     event_names: Vec<&str>,
324 |     writer: &mut csv::Writer<File>,
325 | ) -> io::Result<()> {
326 |     // Check if it's a file:
327 |     let meta: Metadata = fs::metadata(path)?;
328 |     if !meta.file_type().is_file() {
329 |         error!("Not a file {:?}", path);
330 |     }
331 |     // TODO: Should just pass Path to PerfFile
332 |     let mut file = File::open(path)?;
333 |     let mut buf: Vec<u8> = Vec::with_capacity(meta.len() as usize);
334 |     file.read_to_end(&mut buf)?;
335 |     let pf = PerfFile::new(buf);
336 | 
337 |     // debug!("GroupDescriptions: {:?}", pf.get_group_descriptions());
338 |     // debug!("EventDescription: {:?}", pf.get_event_description());
339 | 
340 |     let event_desc = pf.get_event_description().unwrap();
341 |     let event_info: Vec<(&EventDesc, &&str)> = event_desc.iter().zip(event_names.iter()).collect();
342 |     // debug!("Event Infos: {:?}", event_info);
343 | 
344 |     for e in pf.data() {
345 |         if e.header.event_type != EventType::Sample {
346 |             continue;
347 |         }
348 | 
349 |         match e.data {
350 |             EventData::Sample(rec) => {
351 |                 // println!("{:?}", rec);
352 |                 let time = format!("{}", rec.time.unwrap());
353 |                 let ptid = rec.ptid.unwrap();
354 |                 let _pid = format!("{}", ptid.pid);
355 |                 let _tid = format!("{}", ptid.tid);
356 |                 let cpu = format!("{}", rec.cpu.unwrap().cpu);
357 |                 // let ip = format!("0x{:x}", rec.ip.unwrap());
358 | 
359 |                 let v = rec.v.unwrap();
360 |                 assert!(verify_events_in_order(&event_desc, &v.values));
361 |                 // TODO: verify event names match EventDesc in `event_info`!
362 | 
363 |                 for reading in v.values.iter() {
364 |                     let (event_count, maybe_id) = *reading;
365 |                     let id = maybe_id.unwrap();
366 |                     let &(_, name) = event_info.iter().find(|ev| ev.0.ids.contains(&id)).unwrap();
367 |                     let sample_value = format!("{}", event_count);
368 | 
369 |                     writer
370 |                         .encode(&[name, time.as_str(), cpu.as_str(), sample_value.as_str()])
371 |                         .unwrap();
372 |                 }
373 |             }
374 |             _ => unreachable!("Should not happen"),
375 |         }
376 |     }
377 | 
378 |     Ok(())
379 | }
380 | 
381 | #[derive(Debug, Eq, PartialEq, Clone, Copy)]
382 | enum Filter {
383 |     All,
384 |     Exclusive,
385 |     Shared,
386 |     None,
387 | }
388 | 
389 | impl Filter {
390 |     fn new(what: &str) -> Filter {
391 |         match what {
392 |             "all" => Filter::All,
393 |             "exclusive" => Filter::Exclusive,
394 |             "shared" => Filter::Shared,
395 |             "none" => Filter::None,
396 |             _ => panic!("clap-rs should ensure nothing else is passed..."),
397 |         }
398 |     }
399 | }
400 | 
401 | pub fn aggregate(path: &Path, cpu_filter: &str, uncore_filter: &str, save_to: &Path) {
402 |     if !path.exists() {
403 |         error!("Input directory does not exist {:?}", path);
404 |         process::exit(1);
405 |     }
406 | 
407 |     let mut lscpu_file: PathBuf = path.to_path_buf();
408 |     lscpu_file.push("lscpu.csv");
409 |     let mut numactl_file: PathBuf = path.to_path_buf();
410 |     numactl_file.push("numactl.dat");
411 |     let mt = MachineTopology::from_files(&lscpu_file, &numactl_file);
412 | 
413 |     let mut run_config: PathBuf = path.to_path_buf();
414 |     run_config.push("run.toml");
415 | 
416 |     let (cpus, breakpoints) = if run_config.as_path().exists() {
417 |         let mut file = File::open(run_config.as_path()).expect("run.toml file does not exist?");
418 |         let mut run_string = String::new();
419 |         let _ = file.read_to_string(&mut run_string).unwrap();
420 |         let mut parser = toml::Parser::new(run_string.as_str());
421 |         let doc = match parser.parse() {
422 |             Some(doc) => doc,
423 |             None => {
424 |                 error!("Can't parse the run.toml file:\n{:?}", parser.errors);
425 |                 process::exit(3);
426 |             }
427 |         };
428 | 
429 |         let a: &toml::Table = doc["a"]
430 |             .as_table()
431 |             .expect("run.toml: 'a' should be a table.");
432 |         let deployment: &toml::Table = doc
433 |             .get("deployment")
434 |             .expect("deployment?")
435 |             .as_table()
436 |             .expect("run.toml: 'a.deployment' should be a table.");
437 |         let cpus: Vec<u64> = deployment
438 |             .get("a")
439 |             .expect("deployment.a")
440 |             .as_slice()
441 |             .expect("run.tom: 'a.deployment.a' should be an array")
442 |             .iter()
443 |             .map(|c| {
444 |                 c.as_table().expect("table")["cpu"]
445 |                     .as_integer()
446 |                     .expect("int") as u64
447 |             })
448 |             .collect();
449 |         let breakpoints: Vec<String> = a
450 |             .get("breakpoints")
451 |             .expect("no breakpoints?")
452 |             .as_slice()
453 |             .expect("breakpoints not an array?")
454 |             .iter()
455 |             .map(|s| s.as_str().expect("breakpoint not a string?").to_string())
456 |             .collect();
457 | 
458 |         (cpus, breakpoints)
459 |     } else {
460 |         debug!("Couldn't find a run.toml, we include counter values from all CPUs and sockets");
461 |         let cpus: Vec<u64> = mt.cores();
462 |         // No breakpoints
463 |         let breakpoint: Vec<String> = Vec::new();
464 |         (cpus, breakpoint)
465 |     };
466 | 
467 |     // All the CPUs this program is (exclusively) running on:
468 |     let all_cpus: Vec<&CpuInfo> = cpus
469 |         .into_iter()
470 |         .map(|c| {
471 |             mt.cpu(c)
472 |                 .expect("Invalid CPU in run.toml or wrong lscpu.csv?")
473 |         })
474 |         .collect();
475 | 
476 |     // All the sockets this program is running on:
477 |     let mut all_sockets: Vec<Socket> = all_cpus.iter().map(|c| c.socket).collect();
478 |     all_sockets.sort();
479 |     all_sockets.dedup();
480 | 
481 |     let uncore_filter = Filter::new(uncore_filter);
482 |     let cpu_filter = Filter::new(cpu_filter);
483 | 
484 |     let mut considered_sockets: Vec<Socket> = Vec::new();
485 |     // Find out if we should include the uncore events for every socket that we're running on
486 |     match uncore_filter {
487 |         Filter::Exclusive => {
488 |             for socket in all_sockets.into_iter() {
489 |                 let socket_set: HashSet<Cpu> =
490 |                     mt.cpus_on_socket(socket).iter().map(|c| c.cpu).collect();
491 |                 let program_set: HashSet<Cpu> = all_cpus.iter().map(|c| c.cpu).collect();
492 |                 let diff: Vec<Cpu> = socket_set.difference(&program_set).cloned().collect();
493 | 
494 |                 if diff.len() == 0 {
495 |                     debug!(
496 |                         "Uncore from socket {:?} considered since A uses it exclusively.",
497 |                         socket
498 |                     );
499 |                     considered_sockets.push(socket);
500 |                 }
501 |             }
502 |         }
503 |         Filter::All => considered_sockets.append(&mut mt.sockets()),
504 |         Filter::Shared => {
505 |             debug!(
506 |                 "Uncore from sockets {:?} added since A uses these sockets at least partially.",
507 |                 all_sockets
508 |             );
509 |             considered_sockets.append(&mut all_sockets);
510 |         }
511 |         Filter::None => debug!("Ignore all uncore events."),
512 |     };
513 | 
514 |     // Read perf.csv file:
515 |     let mut csv_data: PathBuf = path.to_owned();
516 |     csv_data.push("perf.csv");
517 |     let csv_data_path = csv_data.as_path();
518 |     if !csv_data_path.exists() {
519 |         error!("File not found: {:?}", csv_data_path);
520 |         return;
521 |     }
522 |     type Row = (String, String, String, String, String, String);
523 |     let mut rdr = csv::Reader::from_file(csv_data_path).unwrap();
524 |     let rows = rdr.decode().collect::<csv::Result<Vec<Row>>>().unwrap();
525 | 
526 |     // Create result.csv file:
527 |     let csv_result: PathBuf = save_to.to_owned();
528 |     let mut wrtr = csv::Writer::from_file(csv_result.as_path()).unwrap();
529 |     wrtr.encode(&[
530 |         "EVENT_NAME",
531 |         "INDEX",
532 |         "TIME",
533 |         "SOCKET",
534 |         "CORE",
535 |         "CPU",
536 |         "NODE",
537 |         "UNIT",
538 |         "SAMPLE_VALUE",
539 |     ])
540 |     .unwrap();
541 | 
542 |     // Write content in result.csv
543 |     for row in rows {
544 |         let (_, event_names, _, _, file, _) = row;
545 |         let _string_names: Vec<&str> = event_names.split(",").collect();
546 | 
547 |         let mut perf_data = path.to_owned();
548 |         perf_data.push(&file);
549 | 
550 |         let file_ext = perf_data
551 |             .extension()
552 |             .expect("File does not have an extension");
553 |         match file_ext.to_str().unwrap() {
554 |             "data" => parse_perf_file(
555 |                 perf_data.as_path(),
556 |                 event_names.split(",").collect(),
557 |                 &mut wrtr,
558 |             )
559 |             .unwrap(),
560 |             "csv" => parse_perf_csv_file(
561 |                 &mt,
562 |                 &all_cpus,
563 |                 cpu_filter,
564 |                 &considered_sockets,
565 |                 &breakpoints,
566 |                 perf_data.as_path(),
567 |                 &mut wrtr,
568 |             )
569 |             .unwrap(),
570 |             _ => panic!("Unknown file extension, I can't parse this."),
571 |         };
572 |     }
573 | 
574 |     info!("Merging completed");
575 | }
576 | 


--------------------------------------------------------------------------------
/src/cmd.yml:
--------------------------------------------------------------------------------
  1 | name: autoperf
  2 | version: "0.2"
  3 | author: Gerd Zellweger <mail@gerdzellweger.com>
  4 | about: Use performance counters with little domain knowledge.
  5 | args:
  6 |     - verbose:
  7 |         short: v
  8 |         multiple: true
  9 |         help: Do more verbose logging (-v = info, -vv = debug, -vvv = trace, default is warn).
 10 | subcommands:
 11 |     - profile:
 12 |         settings:
 13 |             - TrailingVarArg
 14 |         about: Measure all available H/W events for a command using `perf stat`.
 15 |         args:
 16 |             - dryrun:
 17 |                 short: d
 18 |                 long: dryrun
 19 |                 help: Don't run anything, just print what we would do.
 20 |             - output:
 21 |                 short: o
 22 |                 long: output
 23 |                 help: Set output directory.
 24 |                 value_name: DIRECTORY
 25 |                 takes_value: true
 26 |             - COMMAND:
 27 |                 help: Command to execute.
 28 |                 required: true
 29 |                 multiple: true
 30 |     - pair:
 31 |         about: Run a set of programs pairwise together on the machine (while measuring all counters).
 32 |         args:
 33 |             - dryrun:
 34 |                 short: d
 35 |                 long: dryrun
 36 |                 help: Don't run anything, just print what we would do.
 37 |             - start:
 38 |                 short: b
 39 |                 long: base
 40 |                 help: Skip the first x configuration (used to evaluate the same manifest simultaneously on multiple identical machines).
 41 |                 takes_value: true
 42 |             - step:
 43 |                 short: s
 44 |                 long: step
 45 |                 help: Only evaluate every x-th configuration (used to evaluate the same manifest simultaneously on multiple identical machines).
 46 |                 takes_value: true
 47 |             - directory:
 48 |                 help: Path of manifest directory.
 49 |                 required: true
 50 |     - aggregate:
 51 |         about: Merge counter measurements generated with the `profile` or `pair` command in a single CSV file.
 52 |         args:
 53 |             - core:
 54 |                 short: c
 55 |                 long: core
 56 |                 takes_value: true
 57 |                 possible_values: [ all, shared, exclusive, none ]
 58 |                 help: Which core events to include (default all, exclusive = only cores where program was running on, none = drop core events).
 59 |             - uncore:
 60 |                 short: u
 61 |                 long: uncore
 62 |                 takes_value: true
 63 |                 possible_values: [ all, shared, exclusive, none ]
 64 |                 help: Which uncore events to include (default all, exclusive = only uncore units that are exclusively used by program, none = drop uncore events).
 65 |             - output:
 66 |                 short: o
 67 |                 long: output
 68 |                 help: Set output file (defaults to <input dir>/results.csv).
 69 |                 value_name: FILENAME
 70 |                 takes_value: true
 71 |             - directory:
 72 |                 help: Set input directory.
 73 |                 required: true
 74 |     - stats:
 75 |         about: Dump statistics about performance events on Intel architectures.
 76 |         args:
 77 |             - directory:
 78 |                 help: Where to store the generated output (CSV) files.
 79 |                 required: true
 80 |     - search:
 81 |         about: Find performance events potentially not covered in Intel publicly available manuals.
 82 | 
 83 | #
 84 | # We don't support these two subcommands at the moment:
 85 | #
 86 | 
 87 | #    - scale:
 88 | #        settings:
 89 | #            - TrailingVarArg
 90 | #        about: Measure all available H/W events for an application using `perf stat`. Do multiple runs and increase the number of threads.
 91 | #        args:
 92 | #            - debug:
 93 | #                short: d
 94 | #                long: debug
 95 | #                help: Print debug information verbosely.
 96 | #            - dryrun:
 97 | #                short: r
 98 | #                long: dryrun
 99 | #                help: Don't run anything, just print what it would do.
100 | #            - directory:
101 | #                help: Path of manifest directory.
102 | #                required: true
103 | 
104 | #    - mkgroup:
105 | #        settings:
106 | #            - TrailingVarArg
107 | #        about: Given a machine, form the largest possible group of events from a given ranking input file.
108 | #        args:
109 | #            - file:
110 | #                short: i
111 | #                long: input
112 | #                help: Input file (weka ranking).
113 | #                value_name: FILE
114 | #                takes_value: true
115 | 


--------------------------------------------------------------------------------
/src/counters.toml:
--------------------------------------------------------------------------------
 1 | [broadwell]
 2 | family = 6
 3 | models = [61, 71]
 4 | fixed_counters = { CPU = 3 }
 5 | programmable_counters = { CPU = 4, ARB = 2, CBO = 2  }
 6 | 
 7 | [broadwellX]
 8 | family = 6
 9 | models = [79, 86]
10 | fixed_counters = { CPU = 3, UBO = 1, IMC = 1 }
11 | programmable_counters = { CPU = 4, UBO = 2, CBO = 4, HA = 4, IMC = 4, IRP = 4, PCU = 4, QPI = 4, R2PCIE = 4, R3QPI = 3, SBO = 4  }
12 | 
13 | [haswell]
14 | family = 6
15 | models = [60, 69, 70]
16 | fixed_counters = { CPU = 3 }
17 | programmable_counters = { CPU = 4, ARB = 2, CBO = 2  }
18 | 
19 | [haswellX]
20 | family = 6
21 | models = [63]
22 | fixed_counters = { CPU = 3, UBO = 1, IMC = 1 }
23 | programmable_counters = { CPU = 4, UBO = 2, CBO = 4, HA = 4, IMC = 4, IRP = 4, PCU = 4, QPI = 4, R2PCIE = 4, R3QPI = 3, SBO = 4  }
24 | 
25 | [ivybridge]
26 | family = 6
27 | models = [58]
28 | fixed_counters = { CPU = 3 }
29 | programmable_counters = { CPU = 4, ARB = 2, CBO = 2  }
30 | 
31 | [ivybridgeep]
32 | family = 6
33 | models = [62]
34 | fixed_counters = { CPU = 3, UBO = 1, IMC = 1 }
35 | programmable_counters = { CPU = 4, UBO = 2, CBO = 4, HA = 4, IMC = 4, IRP = 4, PCU = 4, QPI = 4, R2PCIE = 4, R3QPI = 3, SBO = 4  }
36 | 
37 | [skylake]
38 | family = 6
39 | models = [78, 94, 142, 158]
40 | fixed_counters = { CPU = 3 }
41 | programmable_counters = { CPU = 4, CBO = 2, ARB = 2 }
42 | 
43 | [skylakeX]
44 | family = 6
45 | models = [85]
46 | fixed_counters = { CPU = 3, UBO = 1, IMC = 1 }
47 | programmable_counters = { CPU = 4, UPI = 4, CHA = 4, IIO = 4, IMC = 4, IRP = 4, M2M = 4, M3UPI = 4, PCU = 4, UBO = 2, CBO = 4 }
48 | 


--------------------------------------------------------------------------------
/src/main.rs:
--------------------------------------------------------------------------------
  1 | use clap::{load_yaml, App};
  2 | use std::path::{Path, PathBuf};
  3 | use std::str::FromStr;
  4 | 
  5 | mod aggregate;
  6 | mod mkgroup;
  7 | mod pair;
  8 | mod profile;
  9 | mod scale;
 10 | mod search;
 11 | mod stats;
 12 | mod util;
 13 | use log::*;
 14 | 
 15 | use aggregate::aggregate;
 16 | use pair::pair;
 17 | use profile::profile;
 18 | use stats::stats;
 19 | 
 20 | use mkgroup::mkgroup;
 21 | use search::print_unknown_events;
 22 | 
 23 | fn setup_logging(lvl: &str) {
 24 |     use env_logger::Env;
 25 |     env_logger::from_env(Env::default().default_filter_or(lvl)).init();
 26 | }
 27 | 
 28 | fn main() {
 29 |     let yaml = load_yaml!("cmd.yml");
 30 |     let matches = App::from_yaml(yaml).get_matches();
 31 | 
 32 |     let level = match matches.occurrences_of("verbose") {
 33 |         0 => "warn",
 34 |         1 => "info",
 35 |         2 => "debug",
 36 |         3 => "trace",
 37 |         _ => "trace",
 38 |     };
 39 |     setup_logging(level);
 40 | 
 41 |     if let Some(matches) = matches.subcommand_matches("profile") {
 42 |         let output_path = Path::new(matches.value_of("output").unwrap_or("out"));
 43 |         let cmd: Vec<String> = matches
 44 |             .values_of("COMMAND")
 45 |             .unwrap()
 46 |             .map(|s| s.to_string())
 47 |             .collect();
 48 | 
 49 |         let dryrun: bool = matches.is_present("dryrun");
 50 |         profile(
 51 |             output_path,
 52 |             ".",
 53 |             cmd,
 54 |             Default::default(),
 55 |             Default::default(),
 56 |             false,
 57 |             None,
 58 |             dryrun,
 59 |         );
 60 |     }
 61 |     if let Some(matches) = matches.subcommand_matches("aggregate") {
 62 |         let input_directory = Path::new(matches.value_of("directory").unwrap_or("out"));
 63 |         let output_path: PathBuf = match matches.value_of("output") {
 64 |             Some(v) => PathBuf::from(v),
 65 |             None => {
 66 |                 let mut pb = input_directory.to_path_buf();
 67 |                 pb.push("results.csv");
 68 |                 pb
 69 |             }
 70 |         };
 71 |         let uncore_filter: &str = matches.value_of("uncore").unwrap_or("all");
 72 |         let core_filter: &str = matches.value_of("core").unwrap_or("all");
 73 | 
 74 |         aggregate(
 75 |             input_directory,
 76 |             core_filter,
 77 |             uncore_filter,
 78 |             &output_path.as_path(),
 79 |         );
 80 |     }
 81 |     if let Some(matches) = matches.subcommand_matches("pair") {
 82 |         let output_path = Path::new(matches.value_of("directory").unwrap_or("out"));
 83 |         let start: usize = usize::from_str(matches.value_of("start").unwrap_or("0")).unwrap_or(0);
 84 |         let stepping: usize = usize::from_str(matches.value_of("step").unwrap_or("1")).unwrap_or(1);
 85 |         if stepping == 0 {
 86 |             error!("skip amount must be > 0");
 87 |             std::process::exit(1);
 88 |         }
 89 | 
 90 |         let dryrun: bool = matches.is_present("dryrun");
 91 |         pair(output_path, dryrun, start, stepping);
 92 |     }
 93 |     if let Some(matches) = matches.subcommand_matches("scale") {
 94 |         let _output_path = Path::new(matches.value_of("directory").unwrap_or("out"));
 95 |         let _dryrun: bool = matches.is_present("dryrun");
 96 |         // scale(output_path, dryrun);
 97 |     }
 98 |     if let Some(matches) = matches.subcommand_matches("stats") {
 99 |         let output_path = Path::new(matches.value_of("directory").unwrap_or("out"));
100 |         stats(output_path);
101 |     }
102 |     if let Some(_matches) = matches.subcommand_matches("search") {
103 |         print_unknown_events();
104 |     }
105 |     if let Some(matches) = matches.subcommand_matches("mkgroup") {
106 |         let ranking_file = Path::new(matches.value_of("file").unwrap_or("notfound"));
107 |         mkgroup(ranking_file);
108 |     }
109 | }
110 | 


--------------------------------------------------------------------------------
/src/mkgroup.rs:
--------------------------------------------------------------------------------
 1 | use std::collections::HashMap;
 2 | 
 3 | use std::path::Path;
 4 | 
 5 | use csv;
 6 | use phf::Map;
 7 | 
 8 | use super::profile::{MonitoringUnit, PerfEvent, PerfEventGroup};
 9 | use log::*;
10 | use x86::perfcnt::intel::{events, EventDescription};
11 | 
12 | pub fn mkgroup(ranking_file: &Path) {
13 |     let core_counter: &'static Map<&'static str, EventDescription<'static>> = &events().unwrap();
14 |     let uncore_counter: &'static Map<&'static str, EventDescription<'static>> = &events().unwrap();
15 | 
16 |     let mut res = HashMap::with_capacity(11);
17 |     res.insert(MonitoringUnit::CPU, 4);
18 |     res.insert(MonitoringUnit::UBox, 2);
19 |     res.insert(MonitoringUnit::CBox, 4);
20 |     res.insert(MonitoringUnit::HA, 4);
21 |     res.insert(MonitoringUnit::IMC, 4);
22 |     res.insert(MonitoringUnit::IRP, 4);
23 |     res.insert(MonitoringUnit::PCU, 4);
24 |     res.insert(MonitoringUnit::QPI, 4);
25 |     res.insert(MonitoringUnit::R2PCIe, 4);
26 |     res.insert(MonitoringUnit::R3QPI, 2); // According to the manual this is 3 but then it multiplexes...
27 |     res.insert(MonitoringUnit::QPI, 4); // Not in the manual?
28 | 
29 |     // Accuracy,Config,Error,Event,F1 score,Precision/Recall,Samples,Samples detail,Test App
30 |     // Accuracy,Error,Event,F1 score,Precision,Recall,Samples Test 0,Samples Test 1,Samples Test Total,Samples Training 0,Samples Training 1,Samples Training Total,Tested Application,Training Configs
31 |     type OutputRow = (
32 |         f64,
33 |         String,
34 |         String,
35 |         f64,
36 |         f64,
37 |         f64,
38 |         String,
39 |         String,
40 |         String,
41 |         String,
42 |     );
43 |     let mut rdr = csv::Reader::from_file(ranking_file)
44 |         .unwrap()
45 |         .has_headers(true);
46 |     let mut events_added = HashMap::with_capacity(25);
47 | 
48 |     let mut group = PerfEventGroup::new(&res);
49 | 
50 |     for row in rdr.decode() {
51 |         let (_, _, feature_name, _, _, _, _, _, _, _): OutputRow = row.unwrap();
52 |         // println!("{:?}", feature_name);
53 |         let splits: Vec<&str> = feature_name.splitn(2, ".").collect();
54 |         let event_name = String::from(splits[1]);
55 |         let feature_name = String::from(feature_name.clone());
56 | 
57 |         let maybe_e: Option<&'static EventDescription> = core_counter.get(event_name.as_str());
58 | 
59 |         // If we already measure the event, just return it (in case a feature shows up with AVG. and
60 |         // STD.)
61 |         if events_added.contains_key(&event_name) {
62 |             println!("{}", feature_name);
63 |         } else {
64 |             // Otherwise, let's see if we can still add it to the group:
65 |             match maybe_e {
66 |                 Some(event) => match group.add_event(PerfEvent(event)) {
67 |                     Ok(()) => {
68 |                         events_added.insert(event_name, true);
69 |                         println!("{}", feature_name);
70 |                     }
71 |                     Err(e) => info!(
72 |                         "Unable to add event: '{}' to {:?} because of '{}'",
73 |                         event_name, event.unit, e
74 |                     ),
75 |                 },
76 |                 None => {
77 |                     let maybe_ue: Option<&'static EventDescription> =
78 |                         uncore_counter.get(event_name.as_str());
79 |                     match maybe_ue {
80 |                         Some(uncore_event) => match group.add_event(PerfEvent(uncore_event)) {
81 |                             Ok(()) => {
82 |                                 events_added.insert(event_name, true);
83 |                                 println!("{}", feature_name);
84 |                             }
85 |                             Err(e) => info!(
86 |                                 "Unable to add event: '{}' to {:?} because of '{}'",
87 |                                 event_name, uncore_event.unit, e
88 |                             ),
89 |                         },
90 |                         None => {
91 |                             // panic!("Didn't find event {} in data set?", event_name);
92 |                         }
93 |                     }
94 |                 }
95 |             };
96 |         }
97 |     }
98 | }
99 | 


--------------------------------------------------------------------------------
/src/pair.rs:
--------------------------------------------------------------------------------
  1 | use std::fs;
  2 | use std::fs::File;
  3 | use std::io;
  4 | use std::io::prelude::*;
  5 | use std::path::Path;
  6 | use std::path::PathBuf;
  7 | use std::process;
  8 | use std::process::{Child, Command, Stdio};
  9 | use std::thread;
 10 | 
 11 | use std::fmt;
 12 | 
 13 | use itertools::{iproduct, Itertools};
 14 | use rustc_serialize::Encodable;
 15 | use std::time::Duration;
 16 | use wait_timeout::ChildExt;
 17 | 
 18 | use log::*;
 19 | use toml;
 20 | 
 21 | use super::profile;
 22 | use super::util::*;
 23 | 
 24 | fn get_hostname() -> Option<String> {
 25 |     use libc::gethostname;
 26 | 
 27 |     let mut buf: [i8; 64] = [0; 64];
 28 |     let err = unsafe { gethostname(buf.as_mut_ptr(), buf.len()) };
 29 | 
 30 |     if err != 0 {
 31 |         info!(
 32 |             "Can't read the hostname with gethostname: {}",
 33 |             io::Error::last_os_error()
 34 |         );
 35 |         return None;
 36 |     }
 37 | 
 38 |     // find the first 0 byte (i.e. just after the data that gethostname wrote)
 39 |     let actual_len = buf.iter().position(|byte| *byte == 0).unwrap_or(buf.len());
 40 |     let c_str: Vec<u8> = buf[..actual_len].into_iter().map(|i| *i as u8).collect();
 41 | 
 42 |     Some(String::from_utf8(c_str).unwrap())
 43 | }
 44 | 
 45 | #[derive(Debug, RustcEncodable)]
 46 | struct Deployment<'a> {
 47 |     description: &'static str,
 48 |     a: Vec<&'a CpuInfo>,
 49 |     b: Vec<&'a CpuInfo>,
 50 |     mem: Vec<NodeInfo>,
 51 | }
 52 | 
 53 | impl<'a> Deployment<'a> {
 54 |     pub fn new(
 55 |         desc: &'static str,
 56 |         half_a: Vec<&'a CpuInfo>,
 57 |         half_b: Vec<&'a CpuInfo>,
 58 |         mem: Vec<NodeInfo>,
 59 |     ) -> Deployment<'a> {
 60 |         Deployment {
 61 |             description: desc,
 62 |             a: half_a,
 63 |             b: half_b,
 64 |             mem: mem,
 65 |         }
 66 |     }
 67 | 
 68 |     /// Split by just simply interleaving everything
 69 |     /// TODO: this only works because we make assumption on how CpuInfo is ordered..
 70 |     pub fn split_interleaved(
 71 |         desc: &'static str,
 72 |         possible_groupings: Vec<Vec<&'a CpuInfo>>,
 73 |         size: u64,
 74 |     ) -> Deployment<'a> {
 75 |         let mut cpus = possible_groupings.into_iter().last().unwrap();
 76 | 
 77 |         let cpus_len = cpus.len();
 78 |         assert!(cpus_len % 2 == 0);
 79 | 
 80 |         let upper_half = cpus.split_off(cpus_len / 2);
 81 |         let lower_half = cpus;
 82 | 
 83 |         let mut node: NodeInfo = lower_half[0].node;
 84 |         node.memory = size;
 85 | 
 86 |         Deployment::new(desc, lower_half, upper_half, vec![node])
 87 |     }
 88 | 
 89 |     /// Split but makes sure a group shares the SMT threads
 90 |     pub fn split_smt_aware(
 91 |         desc: &'static str,
 92 |         possible_groupings: Vec<Vec<&'a CpuInfo>>,
 93 |         size: u64,
 94 |     ) -> Deployment<'a> {
 95 |         let cpus = possible_groupings.into_iter().last().unwrap();
 96 |         let cpus_len = cpus.len();
 97 |         assert!(cpus_len % 2 == 0);
 98 | 
 99 |         let mut cores: Vec<Core> = cpus.iter().map(|c| c.core).collect();
100 |         assert!(cores.len() % 2 == 0);
101 |         cores.sort();
102 |         cores.dedup();
103 | 
104 |         let mut upper_half: Vec<&CpuInfo> = Vec::with_capacity(cpus_len / 2);
105 |         let mut lower_half: Vec<&CpuInfo> = Vec::with_capacity(cpus_len / 2);
106 | 
107 |         for (i, core) in cores.into_iter().enumerate() {
108 |             let cpus_on_core: Vec<&&CpuInfo> = cpus.iter().filter(|c| c.core == core).collect();
109 |             if i % 2 == 0 {
110 |                 lower_half.extend(cpus_on_core.into_iter());
111 |             } else {
112 |                 upper_half.extend(cpus_on_core.into_iter());
113 |             }
114 |         }
115 | 
116 |         let mut node: NodeInfo = lower_half[0].node;
117 |         node.memory = size;
118 | 
119 |         Deployment::new(desc, lower_half, upper_half, vec![node])
120 |     }
121 | 
122 |     /// Split but makes sure a group shares the SMT threads
123 |     pub fn split_l3_aware(
124 |         desc: &'static str,
125 |         possible_groupings: Vec<Vec<&'a CpuInfo>>,
126 |         size: u64,
127 |     ) -> Deployment<'a> {
128 |         let cpus = possible_groupings.into_iter().last().unwrap();
129 |         let cpus_len = cpus.len();
130 |         assert!(cpus_len % 2 == 0);
131 | 
132 |         let mut l3s: Vec<L3> = cpus.iter().map(|c| c.l3).collect();
133 |         assert!(l3s.len() % 2 == 0);
134 |         l3s.sort();
135 |         l3s.dedup();
136 | 
137 |         let mut upper_half: Vec<&CpuInfo> = Vec::with_capacity(cpus_len / 2);
138 |         let mut lower_half: Vec<&CpuInfo> = Vec::with_capacity(cpus_len / 2);
139 | 
140 |         for (i, l3) in l3s.into_iter().enumerate() {
141 |             let cpus_on_l3: Vec<&&CpuInfo> = cpus.iter().filter(|c| c.l3 == l3).collect();
142 |             if i % 2 == 0 {
143 |                 upper_half.extend(cpus_on_l3.into_iter());
144 |             } else {
145 |                 lower_half.extend(cpus_on_l3.into_iter());
146 |             }
147 |         }
148 | 
149 |         let mut node: NodeInfo = lower_half[0].node;
150 |         node.memory = size;
151 | 
152 |         Deployment::new(desc, lower_half, upper_half, vec![node])
153 |     }
154 | }
155 | 
156 | impl<'a> fmt::Display for Deployment<'a> {
157 |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
158 |         let a: Vec<Cpu> = self.a.iter().map(|c| c.cpu).collect();
159 |         let b: Vec<Cpu> = self.b.iter().map(|c| c.cpu).collect();
160 | 
161 |         write!(f, "Deployment Plan for {}:\n", self.description)?;
162 |         write!(f, "-- Program A cores: {:?}\n", a)?;
163 |         write!(f, "-- Program B cores: {:?}\n", b)?;
164 |         write!(f, "-- Use memory:\n")?;
165 |         for n in self.mem.iter() {
166 |             write!(f, " - On node {}: {} Bytes\n", n.node, n.memory)?;
167 |         }
168 |         Ok(())
169 |     }
170 | }
171 | 
172 | #[derive(Debug, RustcEncodable)]
173 | struct Program<'a> {
174 |     name: String,
175 |     manifest_path: &'a Path,
176 |     binary: String,
177 |     working_dir: String,
178 |     args: Vec<String>,
179 |     env: Vec<(String, String)>,
180 |     antagonist_args: Vec<String>,
181 |     breakpoints: Vec<String>,
182 |     checkpoints: Vec<String>,
183 |     is_openmp: bool,
184 |     is_parsec: bool,
185 |     use_watch_repeat: bool,
186 |     alone: bool,
187 | }
188 | 
189 | impl<'a> Program<'a> {
190 |     fn from_toml(
191 |         manifest_path: &'a Path,
192 |         config: &toml::Table,
193 |         alone_default: bool,
194 |     ) -> Program<'a> {
195 |         let name: String = config["name"]
196 |             .as_str()
197 |             .expect("program.name not a string")
198 |             .to_string();
199 |         let binary: String = config["binary"]
200 |             .as_str()
201 |             .expect("program.binary not a string")
202 |             .to_string();
203 | 
204 |         let default_working_dir = String::from(manifest_path.to_str().unwrap());
205 |         let working_dir: String = config
206 |             .get("working_dir")
207 |             .map_or(default_working_dir.clone(), |v| {
208 |                 v.as_str()
209 |                     .expect("program.working_dir not a string")
210 |                     .to_string()
211 |             })
212 |             .replace("$MANIFEST_DIR", default_working_dir.as_str());
213 | 
214 |         let openmp: bool = config.get("openmp").map_or(false, |v| {
215 |             v.as_bool().expect("'program.openmp' should be boolean")
216 |         });
217 |         let parsec: bool = config.get("parsec").map_or(false, |v| {
218 |             v.as_bool().expect("'program.parsec' should be boolean")
219 |         });
220 |         let watch_repeat: bool = config.get("use_watch_repeat").map_or(false, |v| {
221 |             v.as_bool()
222 |                 .expect("'program.use_watch_repeat' should be boolean")
223 |         });
224 |         let alone: bool = config.get("alone").map_or(alone_default, |v| {
225 |             v.as_bool().expect("'program.alone' should be boolean")
226 |         });
227 |         let args: Vec<String> = config["arguments"]
228 |             .as_slice()
229 |             .expect("program.arguments not an array?")
230 |             .iter()
231 |             .map(|s| {
232 |                 s.as_str()
233 |                     .expect("program1 argument not a string?")
234 |                     .to_string()
235 |             })
236 |             .collect();
237 |         let antagonist_args: Vec<String> =
238 |             config
239 |                 .get("antagonist_arguments")
240 |                 .map_or(args.clone(), |v| {
241 |                     v.as_slice()
242 |                         .expect("program.antagonist_arguments not an array?")
243 |                         .iter()
244 |                         .map(|s| {
245 |                             s.as_str()
246 |                                 .expect("program2 argument not a string?")
247 |                                 .to_string()
248 |                         })
249 |                         .collect()
250 |                 });
251 |         let env: Vec<(String, String)> = config.get("env").map_or(Vec::new(), |t| {
252 |             t.as_table()
253 |                 .expect("program.env not a table?")
254 |                 .iter()
255 |                 .map(|(k, v)| {
256 |                     (
257 |                         k.as_str().to_string(),
258 |                         v.as_str()
259 |                             .expect("env value needs to be a string")
260 |                             .to_string(),
261 |                     )
262 |                 })
263 |                 .collect()
264 |         });
265 | 
266 |         let breakpoints: Vec<String> = config.get("breakpoints").map_or(Vec::new(), |bs| {
267 |             bs.as_slice()
268 |                 .expect("program.breakpoints not an array?")
269 |                 .iter()
270 |                 .map(|s| {
271 |                     s.as_str()
272 |                         .expect("program breakpoint not a string?")
273 |                         .to_string()
274 |                 })
275 |                 .collect()
276 |         });
277 |         // TODO: this is currently not in use (remove?)
278 |         let checkpoints: Vec<String> = config.get("checkpoints").map_or(Vec::new(), |cs| {
279 |             cs.as_slice()
280 |                 .expect("program.checkpoints not an array?")
281 |                 .iter()
282 |                 .map(|s| {
283 |                     s.as_str()
284 |                         .expect("program checkpoint not a string?")
285 |                         .to_string()
286 |                 })
287 |                 .collect()
288 |         });
289 | 
290 |         Program {
291 |             name: name,
292 |             manifest_path: manifest_path,
293 |             binary: binary,
294 |             is_openmp: openmp,
295 |             is_parsec: parsec,
296 |             env: env,
297 |             alone: alone,
298 |             working_dir: working_dir,
299 |             use_watch_repeat: watch_repeat,
300 |             args: args,
301 |             antagonist_args: antagonist_args,
302 |             breakpoints: breakpoints,
303 |             // TODO: this is currently not in use (remove?)
304 |             checkpoints: checkpoints,
305 |         }
306 |     }
307 | 
308 |     fn get_cmd(&self, antagonist: bool, cores: &Vec<&CpuInfo>) -> Vec<String> {
309 |         let nthreads = cores.len();
310 |         let mut cmd = vec![&self.binary];
311 | 
312 |         if !antagonist {
313 |             cmd.extend(self.args.iter());
314 |         } else {
315 |             cmd.extend(self.antagonist_args.iter());
316 |         }
317 | 
318 |         cmd.iter()
319 |             .map(|s| s.replace("$NUM_THREADS", format!("{}", nthreads).as_str()))
320 |             .map(|s| {
321 |                 s.replace(
322 |                     "$MANIFEST_DIR",
323 |                     format!("{}", self.manifest_path.to_str().unwrap()).as_str(),
324 |                 )
325 |             })
326 |             .collect()
327 |     }
328 | 
329 |     fn get_env(&self, antagonist: bool, cores: &Vec<&CpuInfo>) -> Vec<(String, String)> {
330 |         let mut env: Vec<(String, String)> = Vec::with_capacity(2);
331 |         let cpus: Vec<String> = cores.iter().map(|c| format!("{}", c.cpu)).collect();
332 |         // TODO: remove this feature:
333 |         if self.is_openmp {
334 |             env.push((String::from("OMP_PROC_BIND"), String::from("true")));
335 |             env.push((
336 |                 String::from("OMP_PLACES"),
337 |                 format!("{{{}}}", cpus.join(",")),
338 |             ));
339 |         }
340 |         // TODO: remove this feature:
341 |         else if self.is_parsec {
342 |             assert!(!self.is_openmp);
343 |             env.push((
344 |                 String::from("LD_PRELOAD"),
345 |                 format!(
346 |                     "{}/bin/libhooks.so.0.0.0",
347 |                     self.manifest_path.to_str().unwrap()
348 |                 ),
349 |             ));
350 |             env.push((String::from("PARSEC_CPU_NUM"), format!("{}", cpus.len())));
351 |             env.push((
352 |                 String::from("PARSEC_CPU_BASE"),
353 |                 format!("{}", cpus.join(",")),
354 |             ));
355 |             if antagonist {
356 |                 env.push((String::from("PARSEC_REPEAT"), String::from("1")));
357 |             }
358 |         }
359 | 
360 |         // keep this one:
361 |         for (k, v) in self.env.clone() {
362 |             env.push((k, v));
363 |         }
364 | 
365 |         env
366 |     }
367 | }
368 | 
369 | #[derive(RustcEncodable)]
370 | struct Run<'a> {
371 |     manifest_path: &'a Path,
372 |     output_path: PathBuf,
373 |     a: &'a Program<'a>,
374 |     b: Option<&'a Program<'a>>,
375 |     deployment: &'a Deployment<'a>,
376 | }
377 | 
378 | impl<'a> Run<'a> {
379 |     fn new(
380 |         manifest_path: &'a Path,
381 |         output_path: &'a Path,
382 |         a: &'a Program<'a>,
383 |         b: Option<&'a Program<'a>>,
384 |         deployment: &'a Deployment,
385 |     ) -> Run<'a> {
386 |         let mut out_dir = output_path.to_path_buf();
387 |         out_dir.push(deployment.description);
388 |         mkdir(&out_dir);
389 |         match b {
390 |             Some(p) => out_dir.push(format!("{}_vs_{}", a.name, p.name)),
391 |             None => out_dir.push(a.name.as_str()),
392 |         }
393 | 
394 |         Run {
395 |             manifest_path: manifest_path,
396 |             output_path: out_dir,
397 |             a: a,
398 |             b: b,
399 |             deployment: deployment,
400 |         }
401 |     }
402 | 
403 |     fn profile_a(&self) -> io::Result<()> {
404 |         let cmd = self.a.get_cmd(false, &self.deployment.a);
405 |         let env = self.a.get_env(false, &self.deployment.a);
406 |         let mut bps: Vec<String> = self.a.breakpoints.iter().map(|s| s.to_string()).collect();
407 |         // TODO: this is currently not in use (remove?)
408 |         bps.extend(self.a.checkpoints.iter().map(|s| s.to_string()));
409 |         // let cps = self.a.checkpoints.iter().map(|s| s.to_string()).collect();
410 | 
411 |         debug!(
412 |             "Spawning {:?} with environment {:?} breakpoints {:?}",
413 |             cmd, env, bps
414 |         );
415 |         profile::profile(
416 |             &self.output_path,
417 |             self.a.working_dir.as_str(),
418 |             cmd,
419 |             env,
420 |             bps,
421 |             false,
422 |             None,
423 |             false,
424 |         );
425 |         Ok(())
426 |     }
427 | 
428 |     fn start_b(&mut self) -> Option<Child> {
429 |         self.b.map(|b| {
430 |             let mut command_args = b.get_cmd(true, &self.deployment.b);
431 |             let env = b.get_env(true, &self.deployment.b);
432 |             if b.use_watch_repeat {
433 |                 command_args.insert(0, String::from("-t"));
434 |                 command_args.insert(0, String::from("-n0"));
435 |                 command_args.insert(0, String::from("watch"));
436 |             }
437 | 
438 |             debug!("Spawning {:?} with environment {:?}", command_args, env);
439 |             debug!("Working dir for B is: {}", b.working_dir.as_str());
440 | 
441 |             let mut cmd = Command::new(&command_args[0]);
442 |             let cmd = cmd
443 |                 .stdout(Stdio::piped())
444 |                 .current_dir(b.working_dir.as_str())
445 |                 .stderr(Stdio::piped())
446 |                 .args(&command_args[1..]);
447 | 
448 |             // Add the environment:
449 |             for (key, value) in env {
450 |                 cmd.env(key, value);
451 |             }
452 | 
453 |             match cmd.spawn() {
454 |                 Ok(child) => child,
455 |                 Err(_) => panic!("Can't spawn program B"),
456 |             }
457 |         })
458 |     }
459 | 
460 |     fn save_output<T: io::Read>(&self, filename: &str, what: &mut T) -> io::Result<()> {
461 |         let mut stdout = String::new();
462 |         what.read_to_string(&mut stdout)?;
463 |         let mut stdout_path = self.output_path.clone();
464 |         stdout_path.push(filename);
465 |         let mut f = File::create(stdout_path.as_path())?;
466 |         f.write_all(stdout.as_bytes())
467 |     }
468 | 
469 |     fn save_run_information(&self) -> io::Result<()> {
470 |         let mut run_toml_path = self.output_path.clone();
471 |         run_toml_path.push("run.toml");
472 |         let mut f = File::create(run_toml_path.as_path())?;
473 |         let mut e = toml::Encoder::new();
474 |         self.encode(&mut e).unwrap();
475 |         f.write_all(toml::encode_str(&e.toml).as_bytes())?;
476 | 
477 |         let mut run_txt_path = self.output_path.clone();
478 |         run_txt_path.push("run.txt");
479 |         let mut f = File::create(run_txt_path.as_path())?;
480 |         f.write_all(format!("{}", self).as_bytes())
481 |     }
482 | 
483 |     fn is_completed(&self) -> bool {
484 |         // Is this run already done (in case we restart):
485 |         let mut completed_file: PathBuf = self.output_path.to_path_buf();
486 |         completed_file.push("completed");
487 |         if completed_file.exists() {
488 |             true
489 |         } else {
490 |             false
491 |         }
492 |     }
493 | 
494 |     fn profile(&mut self) -> io::Result<()> {
495 |         mkdir(&self.output_path);
496 |         if self.is_completed() {
497 |             warn!(
498 |                 "Run {} already completed, skipping.",
499 |                 self.output_path.to_string_lossy()
500 |             );
501 |             return Ok(());
502 |         }
503 | 
504 |         self.save_run_information()?;
505 | 
506 |         // Profile together with B
507 |         let maybe_app_b: Option<Child> = self.start_b();
508 |         if maybe_app_b.is_some() {
509 |             debug!("Wait for B to warmup before starting to profile A");
510 |             let one_min = Duration::from_millis(60000);
511 |             thread::sleep(one_min);
512 |         }
513 | 
514 |         self.profile_a()?;
515 | 
516 |         match maybe_app_b {
517 |             Some(mut app_b) => {
518 |                 match app_b.wait_timeout(Duration::from_millis(200)).unwrap() {
519 |                     Some(status) => {
520 |                         // The Application B has already exited, this means it probably crashed
521 |                         // while we were profiling (bad). We can't use these results.
522 |                         app_b
523 |                             .stdout
524 |                             .map(|mut c| self.save_output("B_stdout.txt", &mut c));
525 |                         app_b
526 |                             .stderr
527 |                             .map(|mut c| self.save_output("B_stderr.txt", &mut c));
528 | 
529 |                         let mut completed_path = self.output_path.clone();
530 |                         completed_path.push("completed");
531 |                         fs::remove_file(completed_path)?;
532 | 
533 |                         panic!(
534 |                             "B has crashed during measurements {:?}. This is bad.",
535 |                             status.code()
536 |                         );
537 |                         // TODO: save error code and continue (?)
538 |                     }
539 |                     None => {
540 |                         app_b.kill()?;
541 |                         app_b.wait()?;
542 |                         app_b
543 |                             .stdout
544 |                             .map(|mut c| self.save_output("B_stdout.txt", &mut c));
545 |                         app_b
546 |                             .stderr
547 |                             .map(|mut c| self.save_output("B_stderr.txt", &mut c));
548 |                     }
549 |                 }
550 |             }
551 |             None => {}
552 |         };
553 | 
554 |         Ok(())
555 |     }
556 | }
557 | 
558 | impl<'a> fmt::Display for Run<'a> {
559 |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
560 |         write!(
561 |             f,
562 |             "A: ENV = {:?} CMD = {:?}\n",
563 |             self.a.get_env(false, &self.deployment.a),
564 |             self.a.get_cmd(false, &self.deployment.a)
565 |         )?;
566 |         write!(f, "A Breakpoints: {:?}\n", self.a.breakpoints)?;
567 |         write!(f, "A Checkpoints: {:?}\n", self.a.checkpoints)?;
568 |         match self.b {
569 |             Some(b) => {
570 |                 write!(
571 |                     f,
572 |                     "B: {:?} {:?}\n",
573 |                     b.get_env(true, &self.deployment.b),
574 |                     b.get_cmd(true, &self.deployment.b)
575 |                 )?;
576 |                 write!(f, "{}", &self.deployment)?;
577 |             }
578 |             None => {
579 |                 write!(f, "No other program running.")?;
580 |             }
581 |         }
582 |         Ok(())
583 |     }
584 | }
585 | 
586 | pub fn pair(manifest_folder: &Path, dryrun: bool, start: usize, stepping: usize) {
587 |     let canonical_manifest_path =
588 |         fs::canonicalize(&manifest_folder).expect("canonicalize manifest path does not work");
589 | 
590 |     let mut out_dir = canonical_manifest_path.to_path_buf();
591 |     let hostname = get_hostname().unwrap_or(String::from("unknown"));
592 |     out_dir.push(hostname);
593 |     mkdir(&out_dir);
594 | 
595 |     let mt = MachineTopology::new();
596 | 
597 |     let mut manifest: PathBuf = canonical_manifest_path.to_path_buf();
598 |     manifest.push("manifest.toml");
599 |     let mut file = File::open(manifest.as_path()).expect("manifest.toml file does not exist?");
600 |     let mut manifest_string = String::new();
601 |     let _ = file.read_to_string(&mut manifest_string).unwrap();
602 |     let mut parser = toml::Parser::new(manifest_string.as_str());
603 |     let doc = match parser.parse() {
604 |         Some(doc) => doc,
605 |         None => {
606 |             error!("Can't parse the manifest file:\n{:?}", parser.errors);
607 |             process::exit(1);
608 |         }
609 |     };
610 |     let experiment: &toml::Table = doc["experiment"]
611 |         .as_table()
612 |         .expect("Error in manifest.toml: 'experiment' should be a table.");
613 |     let configuration: &[toml::Value] = experiment["configurations"]
614 |         .as_slice()
615 |         .expect("Error in manifest.toml: 'configuration' attribute should be a list.");
616 |     let configs: Vec<String> = configuration
617 |         .iter()
618 |         .map(|s| {
619 |             s.as_str()
620 |                 .expect("configuration elements should be strings")
621 |                 .to_string()
622 |         })
623 |         .collect();
624 |     let run_alone: bool = experiment
625 |         .get("alone")
626 |         .map_or(true, |v| v.as_bool().expect("'alone' should be boolean"));
627 |     let profile_only: Option<Vec<String>> = experiment.get("profile_only_a").map(|progs| {
628 |         progs
629 |             .as_slice()
630 |             .expect("Error in manifest.toml: 'profile_only_a' should be a list.")
631 |             .into_iter()
632 |             .map(|p| {
633 |                 p.as_str()
634 |                     .expect("profile_only_a elements should name programs (strings)")
635 |                     .to_string()
636 |             })
637 |             .collect()
638 |     });
639 |     let profile_only_b: Option<Vec<String>> = experiment.get("profile_only_b").map(|progs| {
640 |         progs
641 |             .as_slice()
642 |             .expect("Error in manifest.toml: 'profile_only_b' should be a list.")
643 |             .into_iter()
644 |             .map(|p| {
645 |                 p.as_str()
646 |                     .expect("profile_only_b elements should name programs (strings)")
647 |                     .to_string()
648 |             })
649 |             .collect()
650 |     });
651 | 
652 |     let mut programs: Vec<Program> = Vec::with_capacity(2);
653 |     for (key, _value) in &doc {
654 |         if key.starts_with("program") {
655 |             let program_desc: &toml::Table = doc[key]
656 |                 .as_table()
657 |                 .expect("Error in manifest.toml: 'program' should be a table.");
658 |             programs.push(Program::from_toml(
659 |                 &canonical_manifest_path,
660 |                 program_desc,
661 |                 run_alone,
662 |             ));
663 |         }
664 |     }
665 | 
666 |     let mut deployments: Vec<Deployment> = Vec::with_capacity(4);
667 |     for config in configs {
668 |         match config.as_str() {
669 |             "L1-SMT" => deployments.push(Deployment::split_interleaved(
670 |                 "L1-SMT",
671 |                 mt.same_l1(),
672 |                 mt.l1_size().unwrap_or(0),
673 |             )),
674 |             "L3-SMT" => deployments.push(Deployment::split_interleaved(
675 |                 "L3-SMT",
676 |                 mt.same_l3(),
677 |                 mt.l3_size().unwrap_or(0),
678 |             )),
679 |             "L3-SMT-cores" => deployments.push(Deployment::split_smt_aware(
680 |                 "L3-SMT-cores",
681 |                 mt.same_l3(),
682 |                 mt.l3_size().unwrap_or(0),
683 |             )),
684 |             "L3-cores" => deployments.push(Deployment::split_smt_aware(
685 |                 "L3-cores",
686 |                 mt.same_l3_cores(),
687 |                 mt.l3_size().unwrap_or(0),
688 |             )),
689 |             "Full-L3" => deployments.push(Deployment::split_l3_aware(
690 |                 "Full-L3",
691 |                 mt.whole_machine_cores(),
692 |                 mt.l3_size().unwrap_or(0),
693 |             )),
694 |             "Full-SMT-L3" => deployments.push(Deployment::split_l3_aware(
695 |                 "Full-SMT-L3",
696 |                 mt.whole_machine(),
697 |                 mt.l3_size().unwrap_or(0),
698 |             )),
699 |             "Full-cores" => deployments.push(Deployment::split_interleaved(
700 |                 "Full-cores",
701 |                 mt.whole_machine_cores(),
702 |                 mt.l3_size().unwrap_or(0),
703 |             )),
704 |             "Full-SMT-cores" => deployments.push(Deployment::split_smt_aware(
705 |                 "Full-SMT-cores",
706 |                 mt.whole_machine(),
707 |                 mt.l3_size().unwrap_or(0),
708 |             )),
709 | 
710 |             _ => error!("Ignored unknown deployment config '{}'.", config),
711 |         };
712 |     }
713 | 
714 |     // Add all possible pairs:
715 |     let mut pairs: Vec<(&Program, Option<&Program>)> = Vec::new();
716 |     for p in programs.iter() {
717 |         pairs.push((p, None));
718 |     }
719 |     for (a, b) in iproduct!(programs.iter(), programs.iter()) {
720 |         pairs.push((a, Some(b)));
721 |     }
722 | 
723 |     // Filter out the pairs we do not want to execute:
724 |     let mut runs: Vec<Run> = Vec::new();
725 |     for (a, b) in pairs.into_iter() {
726 |         let profile_a = profile_only
727 |             .as_ref()
728 |             .map_or(true, |ps| ps.contains(&a.name));
729 |         let profile_b = !b.is_none()
730 |             && profile_only_b
731 |                 .as_ref()
732 |                 .map_or(profile_a, |ps| ps.contains(&b.unwrap().name));
733 |         if !profile_a && !profile_b {
734 |             continue;
735 |         }
736 | 
737 |         for d in deployments.iter() {
738 |             if b.is_none() && (!run_alone || !a.alone) {
739 |                 continue;
740 |             }
741 |             runs.push(Run::new(
742 |                 &canonical_manifest_path,
743 |                 out_dir.as_path(),
744 |                 a,
745 |                 b,
746 |                 d,
747 |             ));
748 |         }
749 |     }
750 | 
751 |     // Finally, profile the runs we are supposed to execute based on the command line args
752 |     let mut i = 0;
753 |     for run in runs.iter_mut().skip(start).step(stepping) {
754 |         if !dryrun {
755 |             run.profile().ok();
756 |         } else {
757 |             warn!("Dryrun mode: {}", run);
758 |         }
759 |         i += 1;
760 |     }
761 | 
762 |     info!("{} runs completed.", i);
763 | }
764 | 


--------------------------------------------------------------------------------
/src/profile.rs:
--------------------------------------------------------------------------------
   1 | use std;
   2 | use std::collections::HashMap;
   3 | 
   4 | use csv;
   5 | use lazy_static::lazy_static;
   6 | use pbr::ProgressBar;
   7 | use std::error;
   8 | use std::error::Error;
   9 | use std::fmt;
  10 | use std::fs;
  11 | use std::fs::File;
  12 | use std::io::prelude::*;
  13 | use std::path::Path;
  14 | use std::path::PathBuf;
  15 | use std::process::Command;
  16 | use std::str::FromStr;
  17 | use x86::cpuid;
  18 | use x86::perfcnt::intel::{events, Counter, EventDescription, MSRIndex, PebsType, Tuple};
  19 | 
  20 | use super::util::*;
  21 | use log::*;
  22 | 
  23 | lazy_static! {
  24 | 
  25 |     /// Check if HT is enabled on this CPU (if HT is disabled it doubles the amount of available
  26 |     /// performance counters on a core).
  27 |     static ref HT_AVAILABLE: bool = {
  28 |         let cpuid = cpuid::CpuId::new();
  29 |         cpuid.get_extended_topology_info().unwrap().any(|t| {
  30 |             t.level_type() == cpuid::TopologyType::SMT
  31 |         })
  32 |     };
  33 | 
  34 |     /// For every MonitoringUnit try to figure out how many counters we support.
  35 |     /// This is handled through a config file since Linux doesn't export this information in
  36 |     /// it's PMU devices (but probably should)...
  37 |     static ref PMU_COUNTERS: HashMap<MonitoringUnit, usize> = {
  38 |         let cpuid = cpuid::CpuId::new();
  39 |         let cpu_counter = cpuid.get_performance_monitoring_info().map_or(0, |info| info.number_of_counters()) as usize;
  40 |         let mut res = HashMap::with_capacity(11);
  41 |         res.insert(MonitoringUnit::CPU, cpu_counter);
  42 |         let (family, model) = cpuid.get_feature_info().map_or((0,0), |fi| (fi.family_id(), ((fi.extended_model_id() as u8) << 4) | fi.model_id() as u8));
  43 | 
  44 |         let ctr_config = include_str!("counters.toml");
  45 |         let mut parser = toml::Parser::new(ctr_config);
  46 | 
  47 |         let doc = match parser.parse() {
  48 |             Some(doc) => doc,
  49 |             None => {
  50 |                 error!("Can't parse the counter configuration file:\n{:?}", parser.errors);
  51 |                 std::process::exit(9);
  52 |             }
  53 |         };
  54 | 
  55 |         trace!("Trying to find architecture for family = {:#x} model = {:#x}", family, model);
  56 |         let mut found: bool = false;
  57 |         for (name, architecture) in doc {
  58 |             let architecture = architecture.as_table().expect("counters.toml architectures must be a table");
  59 |             let cfamily = &architecture["family"];
  60 |             for cmodel in architecture["models"].as_slice().expect("counters.toml models must be a list.") {
  61 |                 let cfamily = cfamily.as_integer().expect("Family must be int.") as u8;
  62 |                 let cmodel = cmodel.as_integer().expect("Model must be int.") as u8;
  63 |                 if family == cfamily && model == cmodel {
  64 |                     trace!("Running on {}, reading MonitoringUnit limits from config", name);
  65 |                     found = true;
  66 | 
  67 |                     // TODO: We should ideally get both, prgrammable and fixed counters:
  68 |                     for (unit, limit) in architecture["programmable_counters"].as_table().expect("programmable_counters must be a table") {
  69 |                         let unit = MonitoringUnit::new(unit.as_str());
  70 |                         let limit = limit.as_integer().expect("Counter limit should be an integer");
  71 |                         res.insert(unit, limit as usize);
  72 |                     }
  73 |                 }
  74 |             }
  75 |         }
  76 | 
  77 |         if !found {
  78 |             warn!("Didn't recogize this architecture so we can't infer #counters for MonitoringUnit (Please update counters.toml for family = {:#x} model = {:#x})", family, model);
  79 |             res.insert(MonitoringUnit::UBox, 4);
  80 |             res.insert(MonitoringUnit::HA, 4);
  81 |             res.insert(MonitoringUnit::IRP, 4);
  82 |             res.insert(MonitoringUnit::PCU, 4);
  83 |             res.insert(MonitoringUnit::R2PCIe, 4);
  84 |             res.insert(MonitoringUnit::R3QPI, 4);
  85 |             res.insert(MonitoringUnit::QPI, 4);
  86 |             res.insert(MonitoringUnit::CBox, 2);
  87 |             res.insert(MonitoringUnit::IMC, 4);
  88 |             res.insert(MonitoringUnit::Arb, 2);
  89 |             res.insert(MonitoringUnit::M2M, 4);
  90 |             res.insert(MonitoringUnit::CHA, 4);
  91 |             res.insert(MonitoringUnit::M3UPI, 4);
  92 |             res.insert(MonitoringUnit::IIO, 4);
  93 |             res.insert(MonitoringUnit::UPI_LL, 4);
  94 |         }
  95 | 
  96 |         res
  97 |     };
  98 | 
  99 |     /// Find the linux PMU devices that we need to program through perf
 100 |     static ref PMU_DEVICES: Vec<String> = {
 101 |         let paths = fs::read_dir("/sys/bus/event_source/devices/").expect("Can't read devices directory.");
 102 |         let mut devices = Vec::with_capacity(15);
 103 |         for p in paths {
 104 |             let path = p.expect("Is not a path.");
 105 |             let file_name = path.file_name().into_string().expect("Is valid UTF-8 string.");
 106 |             devices.push(file_name);
 107 |         }
 108 | 
 109 |         devices
 110 |     };
 111 | 
 112 |     /// Bogus or clocks that we don't want to measure or tend to break things
 113 |     static ref IGNORE_EVENTS: HashMap<&'static str, bool> = {
 114 |         let mut ignored = HashMap::with_capacity(1);
 115 |         ignored.insert("UNC_CLOCK.SOCKET", true); // Just says 'fixed' and does not name which counter :/
 116 |         ignored.insert("UNC_M_CLOCKTICKS_F", true);
 117 |         ignored.insert("UNC_U_CLOCKTICKS", true);
 118 |         ignored
 119 |     };
 120 | 
 121 |     /// Which events should be measured in isolation on this architecture.
 122 |     static ref ISOLATE_EVENTS: Vec<&'static str> = {
 123 |         let cpuid = cpuid::CpuId::new();
 124 |         let (family, model) = cpuid.get_feature_info().map_or((0,0), |fi| (fi.family_id(), ((fi.extended_model_id() as u8) << 4) | fi.model_id() as u8));
 125 | 
 126 |         // Sometimes the perfmon data is missing the errata information
 127 |         // as is the case for IvyBridge where MEM_LOAD* things can't be measured
 128 |         // together with other things.
 129 |         if family == 0x6 && (model == 62 || model == 58) {
 130 |             vec![   "MEM_UOPS_RETIRED.ALL_STORES",
 131 |                     "MEM_LOAD_UOPS_RETIRED.L1_MISS",
 132 |                     "MEM_LOAD_UOPS_RETIRED.HIT_LFB",
 133 |                     "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM",
 134 |                     "MEM_LOAD_UOPS_RETIRED.L2_HIT",
 135 |                     "MEM_UOPS_RETIRED.SPLIT_LOADS",
 136 |                     "MEM_UOPS_RETIRED.ALL_LOADS",
 137 |                     "MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM",
 138 |                     "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_NONE",
 139 |                     "MEM_LOAD_UOPS_RETIRED.L1_HIT",
 140 |                     "MEM_UOPS_RETIRED.STLB_MISS_STORES",
 141 |                     "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT",
 142 |                     "MEM_LOAD_UOPS_RETIRED.LLC_MISS",
 143 |                     "MEM_LOAD_UOPS_RETIRED.L2_MISS",
 144 |                     "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS",
 145 |                     "MEM_UOPS_RETIRED.STLB_MISS_LOADS",
 146 |                     "MEM_UOPS_RETIRED.LOCK_LOADS",
 147 |                     "MEM_LOAD_UOPS_RETIRED.LLC_HIT",
 148 |                     "MEM_UOPS_RETIRED.SPLIT_STORES",
 149 |                     // Those are IvyBridge-EP events:
 150 |                     "MEM_LOAD_UOPS_LLC_MISS_RETIRED.REMOTE_DRAM",
 151 |                     "MEM_LOAD_UOPS_LLC_MISS_RETIRED.REMOTE_HITM",
 152 |                     "MEM_LOAD_UOPS_LLC_MISS_RETIRED.REMOTE_FWD"]
 153 |         }
 154 |         else {
 155 |             vec![]
 156 |         }
 157 |     };
 158 | }
 159 | 
 160 | fn execute_perf(
 161 |     perf: &mut Command,
 162 |     cmd: &Vec<String>,
 163 |     counters: &Vec<String>,
 164 |     datafile: &Path,
 165 |     dryrun: bool,
 166 | ) -> (String, String, String) {
 167 |     assert!(cmd.len() >= 1);
 168 |     let perf = perf.arg("-o").arg(datafile.as_os_str());
 169 |     let events: Vec<String> = counters.iter().map(|c| format!("-e {}", c)).collect();
 170 | 
 171 |     let perf = perf.args(events.as_slice());
 172 |     let perf = perf.args(cmd.as_slice());
 173 |     let perf_cmd_str: String = format!("{:?}", perf).replace("\"", "");
 174 | 
 175 |     let (stdout, stderr) = if !dryrun {
 176 |         match perf.output() {
 177 |             Ok(out) => {
 178 |                 let stdout =
 179 |                     String::from_utf8(out.stdout).unwrap_or(String::from("Unable to read stdout!"));
 180 |                 let stderr =
 181 |                     String::from_utf8(out.stderr).unwrap_or(String::from("Unable to read stderr!"));
 182 | 
 183 |                 if out.status.success() {
 184 |                     trace!("stdout:\n{:?}", stdout);
 185 |                     trace!("stderr:\n{:?}", stderr);
 186 |                 } else if !out.status.success() {
 187 |                     error!(
 188 |                         "perf command: {} got unknown exit status was: {}",
 189 |                         perf_cmd_str, out.status
 190 |                     );
 191 |                     debug!("stdout:\n{}", stdout);
 192 |                     debug!("stderr:\n{}", stderr);
 193 |                 }
 194 | 
 195 |                 if !datafile.exists() {
 196 |                     error!(
 197 |                         "perf command: {} succeeded but did not produce the required file {:?} \
 198 |                          (you should file a bug report!)",
 199 |                         perf_cmd_str, datafile
 200 |                     );
 201 |                 }
 202 | 
 203 |                 (stdout, stderr)
 204 |             }
 205 |             Err(err) => {
 206 |                 error!("Executing {} failed : {}", perf_cmd_str, err);
 207 |                 (String::new(), String::new())
 208 |             }
 209 |         }
 210 |     } else {
 211 |         warn!("Dry run mode -- would execute: {}", perf_cmd_str);
 212 |         (String::new(), String::new())
 213 |     };
 214 | 
 215 |     (perf_cmd_str, stdout, stderr)
 216 | }
 217 | 
 218 | pub fn create_out_directory(out_dir: &Path) {
 219 |     if !out_dir.exists() {
 220 |         std::fs::create_dir(out_dir).expect("Can't create `out` directory");
 221 |     }
 222 | }
 223 | 
 224 | pub fn get_known_events<'a>() -> Vec<&'a EventDescription<'static>> {
 225 |     events()
 226 |         .expect("No performance events found?")
 227 |         .values()
 228 |         .collect()
 229 | }
 230 | 
 231 | #[allow(non_camel_case_types)]
 232 | #[derive(Hash, Eq, PartialEq, Debug, Copy, Clone, PartialOrd, Ord)]
 233 | pub enum MonitoringUnit {
 234 |     /// Devices
 235 |     CPU,
 236 |     /// Memory stuff
 237 |     Arb,
 238 |     /// The CBox manages the interface between the core and the LLC, so
 239 |     /// the instances of uncore CBox is equal to number of cores
 240 |     CBox,
 241 |     /// ???
 242 |     SBox,
 243 |     /// ???
 244 |     UBox,
 245 |     /// QPI Stuff
 246 |     QPI,
 247 |     /// Ring to QPI
 248 |     R3QPI,
 249 |     /// IIO Coherency
 250 |     IRP,
 251 |     /// Ring to PCIe
 252 |     R2PCIe,
 253 |     /// Memory Controller
 254 |     IMC,
 255 |     /// Home Agent
 256 |     HA,
 257 |     /// Power Control Unit
 258 |     PCU,
 259 |     /// XXX
 260 |     M2M,
 261 |     /// XXX  
 262 |     CHA,
 263 |     /// XXX
 264 |     M3UPI,
 265 |     /// XXX
 266 |     IIO,
 267 |     /// XXX
 268 |     UPI_LL,
 269 |     /// Types we don't know how to handle...
 270 |     Unknown,
 271 | }
 272 | 
 273 | impl fmt::Display for MonitoringUnit {
 274 |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
 275 |         match *self {
 276 |             MonitoringUnit::CPU => write!(f, "CPU"),
 277 |             MonitoringUnit::Arb => write!(f, "Arb"),
 278 |             MonitoringUnit::CBox => write!(f, "CBox"),
 279 |             MonitoringUnit::SBox => write!(f, "SBox"),
 280 |             MonitoringUnit::UBox => write!(f, "UBox"),
 281 |             MonitoringUnit::QPI => write!(f, "QPI"),
 282 |             MonitoringUnit::R3QPI => write!(f, "R3QPI"),
 283 |             MonitoringUnit::IRP => write!(f, "IRP"),
 284 |             MonitoringUnit::R2PCIe => write!(f, "R2PCIe"),
 285 |             MonitoringUnit::IMC => write!(f, "IMC"),
 286 |             MonitoringUnit::HA => write!(f, "HA"),
 287 |             MonitoringUnit::PCU => write!(f, "PCU"),
 288 |             MonitoringUnit::M2M => write!(f, "M2M"),
 289 |             MonitoringUnit::CHA => write!(f, "CHA"),
 290 |             MonitoringUnit::M3UPI => write!(f, "M3UPI"),
 291 |             MonitoringUnit::IIO => write!(f, "IIO"),
 292 |             MonitoringUnit::UPI_LL => write!(f, "UPI LL"),
 293 |             MonitoringUnit::Unknown => write!(f, "Unknown"),
 294 |         }
 295 |     }
 296 | }
 297 | 
 298 | impl MonitoringUnit {
 299 |     fn new<'a>(unit: &'a str) -> MonitoringUnit {
 300 |         match unit.to_lowercase().as_str() {
 301 |             "cpu" => MonitoringUnit::CPU,
 302 |             "cbo" => MonitoringUnit::CBox,
 303 |             "qpi_ll" => MonitoringUnit::QPI,
 304 |             "sbo" => MonitoringUnit::SBox,
 305 |             "imph-u" => MonitoringUnit::Arb,
 306 |             "arb" => MonitoringUnit::Arb,
 307 |             "r3qpi" => MonitoringUnit::R3QPI,
 308 |             "qpi ll" => MonitoringUnit::QPI,
 309 |             "irp" => MonitoringUnit::IRP,
 310 |             "r2pcie" => MonitoringUnit::R2PCIe,
 311 |             "imc" => MonitoringUnit::IMC,
 312 |             "ha" => MonitoringUnit::HA,
 313 |             "pcu" => MonitoringUnit::PCU,
 314 |             "ubox" => MonitoringUnit::UBox,
 315 |             "m2m" => MonitoringUnit::M2M,
 316 |             "cha" => MonitoringUnit::CHA,
 317 |             "m3upi" => MonitoringUnit::M3UPI,
 318 |             "iio" => MonitoringUnit::IIO,
 319 |             "upi ll" => MonitoringUnit::UPI_LL,
 320 |             "upi" => MonitoringUnit::UPI_LL,
 321 |             "ubo" => MonitoringUnit::UBox,
 322 |             "qpi" => MonitoringUnit::QPI,
 323 |             _ => {
 324 |                 error!("Don't support MonitoringUnit {}", unit);
 325 |                 MonitoringUnit::Unknown
 326 |             }
 327 |         }
 328 |     }
 329 | 
 330 |     pub fn to_intel_event_description(&self) -> Option<&'static str> {
 331 |         match *self {
 332 |             MonitoringUnit::CPU => None,
 333 |             MonitoringUnit::CBox => Some("CBO"),
 334 |             MonitoringUnit::QPI => Some("QPI_LL"),
 335 |             MonitoringUnit::SBox => Some("SBO"),
 336 |             MonitoringUnit::Arb => Some("ARB"),
 337 |             MonitoringUnit::R3QPI => Some("R3QPI"),
 338 |             MonitoringUnit::IRP => Some("IRP"),
 339 |             MonitoringUnit::R2PCIe => Some("R2PCIE"),
 340 |             MonitoringUnit::IMC => Some("IMC"),
 341 |             MonitoringUnit::HA => Some("HA"),
 342 |             MonitoringUnit::PCU => Some("PCU"),
 343 |             MonitoringUnit::UBox => Some("UBOX"),
 344 |             MonitoringUnit::M2M => Some("M2M"),
 345 |             MonitoringUnit::CHA => Some("CHA"),
 346 |             MonitoringUnit::M3UPI => Some("M3UPI"),
 347 |             MonitoringUnit::IIO => Some("IIO"),
 348 |             MonitoringUnit::UPI_LL => Some("UPI LL"),
 349 |             MonitoringUnit::Unknown => None,
 350 |         }
 351 |     }
 352 | 
 353 |     /// Return the perf prefix for selecting the right PMU unit in case of uncore counters.
 354 |     pub fn to_perf_prefix(&self) -> Option<&'static str> {
 355 |         let res = match *self {
 356 |             MonitoringUnit::CPU => Some("cpu"),
 357 |             MonitoringUnit::CBox => Some("uncore_cbox"),
 358 |             MonitoringUnit::QPI => Some("uncore_qpi"),
 359 |             MonitoringUnit::SBox => Some("uncore_sbox"),
 360 |             MonitoringUnit::Arb => Some("uncore_arb"),
 361 |             MonitoringUnit::R3QPI => Some("uncore_r3qpi"), // Adds postfix value
 362 |             MonitoringUnit::IRP => Some("uncore_irp"), // According to libpfm4 (lib/pfmlib_intel_ivbep_unc_irp.c)
 363 |             MonitoringUnit::R2PCIe => Some("uncore_r2pcie"),
 364 |             MonitoringUnit::IMC => Some("uncore_imc"), // Adds postfix value
 365 |             MonitoringUnit::HA => Some("uncore_ha"),   // Adds postfix value
 366 |             MonitoringUnit::PCU => Some("uncore_pcu"),
 367 |             MonitoringUnit::UBox => Some("uncore_ubox"),
 368 |             MonitoringUnit::M2M => Some("uncore_m2m"), // Adds postfix value
 369 |             MonitoringUnit::CHA => Some("uncore_cha"), // Adds postfix value
 370 |             MonitoringUnit::M3UPI => Some("uncore_m3upi"), // Adds postfix value
 371 |             MonitoringUnit::IIO => Some("uncore_iio"), // Adds postfix value
 372 |             MonitoringUnit::UPI_LL => Some("uncore_upi"), // Adds postfix value
 373 |             MonitoringUnit::Unknown => None,
 374 |         };
 375 | 
 376 |         // Note: If anything here does not return uncore_ as a prefix, you need to update extract.rs!
 377 |         res.map(|string| assert!(string.starts_with("uncore_") || string.starts_with("cpu")));
 378 | 
 379 |         res
 380 |     }
 381 | }
 382 | 
 383 | #[derive(Debug)]
 384 | pub struct PerfEvent<'a, 'b>(pub &'a EventDescription<'b>)
 385 | where
 386 |     'b: 'a;
 387 | 
 388 | impl<'a, 'b> PerfEvent<'a, 'b> {
 389 |     /// Returns all possible configurations of the event.
 390 |     /// This is a two vector tuple containing devices and configs:
 391 |     ///
 392 |     ///   * Devices are a subset of the ones listed in `/sys/bus/event_source/devices/`
 393 |     ///     Usually just `cpu` but uncore events can be measured on multiple devices.
 394 |     ///   * Configs are all possible combinations of attributes for this event.
 395 |     ///     Usually one but offcore events have two.
 396 |     ///
 397 |     /// # Note
 398 |     /// The assumption of the return type is that we can always match any
 399 |     /// device with any config. Let's see how long this assumption will remain valid...
 400 |     ///
 401 |     pub fn perf_configs(&self) -> (Vec<String>, Vec<Vec<String>>) {
 402 |         let mut devices = Vec::with_capacity(1);
 403 |         let mut configs = Vec::with_capacity(2);
 404 | 
 405 |         let typ = self.unit();
 406 | 
 407 |         // XXX: Horrible vector transformation:
 408 |         let matched_devices: Vec<String> = PMU_DEVICES
 409 |             .iter()
 410 |             .filter(|d| typ.to_perf_prefix().map_or(false, |t| d.starts_with(t)))
 411 |             .map(|d| d.clone())
 412 |             .collect();
 413 |         devices.extend(matched_devices);
 414 | 
 415 |         // We can have no devices if we don't understand how to match the unit name to perf names:
 416 |         if devices.len() == 0 {
 417 |             debug!(
 418 |                 "Unit {:?} is not available to measure '{}'.",
 419 |                 self.unit(),
 420 |                 self,
 421 |             );
 422 |         }
 423 | 
 424 |         for args in self.perf_args() {
 425 |             configs.push(args);
 426 |         }
 427 | 
 428 |         (devices, configs)
 429 |     }
 430 | 
 431 |     /// Does this event use the passed code?
 432 |     pub fn uses_event_code(&self, event_code: u8) -> bool {
 433 |         match self.0.event_code {
 434 |             Tuple::One(e1) => e1 == event_code,
 435 |             Tuple::Two(e1, e2) => e1 == event_code || e2 == event_code,
 436 |         }
 437 |     }
 438 | 
 439 |     /// Does this event use the passed code?
 440 |     pub fn uses_umask(&self, umask: u8) -> bool {
 441 |         match self.0.umask {
 442 |             Tuple::One(m1) => m1 == umask,
 443 |             Tuple::Two(m1, m2) => m1 == umask || m2 == umask,
 444 |         }
 445 |     }
 446 | 
 447 |     /// Is this event an uncore event?
 448 |     pub fn is_uncore(&self) -> bool {
 449 |         self.0.unit.is_some()
 450 |     }
 451 | 
 452 |     pub fn unit(&self) -> MonitoringUnit {
 453 |         self.0
 454 |             .unit
 455 |             .map_or(MonitoringUnit::CPU, |u| MonitoringUnit::new(u))
 456 |     }
 457 | 
 458 |     /// Is this event an offcore event?
 459 |     pub fn is_offcore(&self) -> bool {
 460 |         match self.0.event_code {
 461 |             Tuple::One(_) => {
 462 |                 assert!(!self.0.offcore);
 463 |                 false
 464 |             }
 465 |             Tuple::Two(_, _) => {
 466 |                 assert!(self.0.event_name.contains("OFFCORE"));
 467 |                 // The OR is because there is this weird meta-event OFFCORE_RESPONSE
 468 |                 // in the data files. It has offcore == false and is not really a proper event :/
 469 |                 assert!(self.0.offcore || self.0.event_name == "OFFCORE_RESPONSE");
 470 |                 true
 471 |             }
 472 |         }
 473 |     }
 474 | 
 475 |     /// Get the correct counter mask
 476 |     pub fn counter(&self) -> Counter {
 477 |         if *HT_AVAILABLE || self.is_uncore() {
 478 |             self.0.counter
 479 |         } else {
 480 |             self.0.counter_ht_off.expect("A bug in JSON?") // Ideally, all CPU events should have this attribute
 481 |         }
 482 |     }
 483 | 
 484 |     fn push_arg(configs: &mut Vec<Vec<String>>, value: String) {
 485 |         for config in configs.iter_mut() {
 486 |             config.push(value.clone());
 487 |         }
 488 |     }
 489 | 
 490 |     /// Returns a set of attributes used to build the perf event description.
 491 |     ///
 492 |     /// # Arguments
 493 |     ///   * try_alternative: Can give a different event encoding (for offcore events).
 494 |     fn perf_args(&self) -> Vec<Vec<String>> {
 495 |         // OFFCORE_RESPONSE_0 and OFFCORE_RESPONSE_1  provide identical functionality.  The reason
 496 |         // that there are two of them is that these events are associated with a separate MSR that is
 497 |         // used to program the types of requests/responses that you want to count (instead of being
 498 |         // able to include this information in the Umask field of the PERFEVT_SELx MSR).   The
 499 |         // performance counter event OFFCORE_RESPONSE_0 (Event 0xB7) is associated with MSR 0x1A6,
 500 |         // while the performance counter event OFFCORE_RESPONSE_1 (Event 0xBB) is associated with MSR
 501 |         // 0x1A7.
 502 |         // So having two events (with different associated MSRs) allows you to count two different
 503 |         // offcore response events at the same time.
 504 |         // Source: https://software.intel.com/en-us/forums/software-tuning-performance-optimization-platform-monitoring/topic/559227
 505 | 
 506 |         let two_configs: bool = match self.0.event_code {
 507 |             Tuple::One(_) => false,
 508 |             Tuple::Two(_, _) => true,
 509 |         };
 510 | 
 511 |         let mut ret: Vec<Vec<String>> = vec![Vec::with_capacity(7)];
 512 |         if two_configs {
 513 |             ret.push(Vec::with_capacity(7));
 514 |         }
 515 |         PerfEvent::push_arg(&mut ret, format!("name={}", self.0.event_name));
 516 | 
 517 |         let is_pcu = self.0.unit.map_or(false, |u| {
 518 |             return MonitoringUnit::new(u) == MonitoringUnit::PCU;
 519 |         });
 520 | 
 521 |         match self.0.event_code {
 522 |             Tuple::One(ev) => {
 523 |                 // PCU events have umasks defined but they're OR'd with event (wtf)
 524 |                 let pcu_umask = if is_pcu {
 525 |                     match self.0.umask {
 526 |                         Tuple::One(mask) => mask,
 527 |                         Tuple::Two(_m1, _m2) => unreachable!(),
 528 |                     }
 529 |                 } else {
 530 |                     0x0
 531 |                 };
 532 | 
 533 |                 ret[0].push(format!("event=0x{:x}", ev | pcu_umask));
 534 |             }
 535 |             Tuple::Two(e1, e2) => {
 536 |                 assert!(two_configs);
 537 |                 assert!(!is_pcu);
 538 |                 ret[0].push(format!("event=0x{:x}", e1));
 539 |                 ret[1].push(format!("event=0x{:x}", e2));
 540 |             }
 541 |         };
 542 | 
 543 |         if !is_pcu {
 544 |             match self.0.umask {
 545 |                 Tuple::One(mask) => {
 546 |                     PerfEvent::push_arg(&mut ret, format!("umask=0x{:x}", mask));
 547 |                 }
 548 |                 Tuple::Two(m1, m2) => {
 549 |                     assert!(two_configs);
 550 |                     ret[0].push(format!("umask=0x{:x}", m1));
 551 |                     ret[1].push(format!("umask=0x{:x}", m2));
 552 |                 }
 553 |             };
 554 |         }
 555 | 
 556 |         if self.0.counter_mask != 0 {
 557 |             PerfEvent::push_arg(&mut ret, format!("cmask=0x{:x}", self.0.counter_mask));
 558 |         }
 559 | 
 560 |         if self.0.fc_mask != 0 {
 561 |             PerfEvent::push_arg(&mut ret, format!("fc_mask=0x{:x}", self.0.fc_mask));
 562 |         }
 563 | 
 564 |         if self.0.port_mask != 0 {
 565 |             PerfEvent::push_arg(&mut ret, format!("ch_mask=0x{:x}", self.0.port_mask));
 566 |         }
 567 | 
 568 |         if self.0.offcore {
 569 |             PerfEvent::push_arg(&mut ret, format!("offcore_rsp=0x{:x}", self.0.msr_value));
 570 |         } else {
 571 |             match self.0.msr_index {
 572 |                 MSRIndex::One(0x3F6) => {
 573 |                     PerfEvent::push_arg(&mut ret, format!("ldlat=0x{:x}", self.0.msr_value));
 574 |                 }
 575 |                 MSRIndex::One(0x1A6) => {
 576 |                     PerfEvent::push_arg(&mut ret, format!("offcore_rsp=0x{:x}", self.0.msr_value));
 577 |                 }
 578 |                 MSRIndex::One(0x1A7) => {
 579 |                     PerfEvent::push_arg(&mut ret, format!("offcore_rsp=0x{:x}", self.0.msr_value));
 580 |                 }
 581 |                 MSRIndex::One(0x3F7) => {
 582 |                     PerfEvent::push_arg(&mut ret, format!("frontend=0x{:x}", self.0.msr_value));
 583 |                 }
 584 |                 MSRIndex::One(a) => {
 585 |                     unreachable!("Unknown MSR value {}, check linux/latest/source/tools/perf/pmu-events/jevents.c", a)
 586 |                 }
 587 |                 MSRIndex::Two(_, _) => {
 588 |                     unreachable!("Should not have non offcore events with two MSR index values.")
 589 |                 }
 590 |                 MSRIndex::None => {
 591 |                     // ignored, not a load latency event
 592 |                 }
 593 |             };
 594 |         }
 595 | 
 596 |         if self.0.invert {
 597 |             PerfEvent::push_arg(&mut ret, String::from("inv=1"));
 598 |         }
 599 | 
 600 |         if self.0.edge_detect {
 601 |             PerfEvent::push_arg(&mut ret, String::from("edge=1"));
 602 |         }
 603 | 
 604 |         if self.0.any_thread {
 605 |             PerfEvent::push_arg(&mut ret, String::from("any=1"));
 606 |         }
 607 | 
 608 |         if self.match_filter("CBoFilter0[23:17]") {
 609 |             PerfEvent::push_arg(&mut ret, String::from("filter_state=0x1f"));
 610 |         }
 611 | 
 612 |         if self.match_filter("CBoFilter1[15:0]") {
 613 |             // TODO: Include both sockets by default -- we should probably be smarter...
 614 |             PerfEvent::push_arg(&mut ret, String::from("filter_nid=0x3"));
 615 |         }
 616 | 
 617 |         if self.match_filter("CBoFilter1[28:20]") {
 618 |             // TOR events requires filter_opc
 619 |             // Set to: 0x192 PrefData Prefetch Data into LLC but don’t pass to L2. Includes Hints
 620 |             PerfEvent::push_arg(&mut ret, String::from("filter_opc=0x192"));
 621 |         }
 622 | 
 623 |         ret
 624 |     }
 625 | 
 626 |     pub fn perf_qualifiers(&self) -> String {
 627 |         let qualifiers = String::from("S");
 628 |         if self.0.pebs == PebsType::PebsOrRegular {
 629 |             // Adding 'p' for PebsOrRegular event doesnt seem to work
 630 |             // for many events in perf that Intel regards as PEBS capable events
 631 |             // (see issue #2)
 632 |         } else if self.0.pebs == PebsType::PebsOnly {
 633 |             // Adding a 'p' here seems counterproducive (perf won't measure the events then)
 634 |             // so we do nothing
 635 |         }
 636 |         qualifiers
 637 |     }
 638 | 
 639 |     fn filters(&self) -> Vec<&str> {
 640 |         self.0.filter.map_or(Vec::new(), |value| {
 641 |             value
 642 |                 .split(",")
 643 |                 .map(|x| x.trim())
 644 |                 .filter(|x| x.len() > 0)
 645 |                 .collect()
 646 |         })
 647 |     }
 648 | 
 649 |     pub fn match_filter(&self, filter: &str) -> bool {
 650 |         self.filters().contains(&filter)
 651 |     }
 652 | }
 653 | 
 654 | impl<'a, 'b> fmt::Display for PerfEvent<'a, 'b> {
 655 |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
 656 |         write!(f, "{}", self.0.event_name)
 657 |     }
 658 | }
 659 | 
 660 | /// Adding a new event to a group of existing events (that can be measured
 661 | /// together) can fail for a variety of reasons which are encoded in this type.
 662 | #[derive(Debug)]
 663 | pub enum AddEventError {
 664 |     /// We couldn't measure any more offcore events
 665 |     OffcoreCapacityReached,
 666 |     /// We don't have more counters left on this monitoring unit
 667 |     UnitCapacityReached(MonitoringUnit),
 668 |     /// We have a constraint that we can't measure the new event together with
 669 |     /// an existing event in the group
 670 |     CounterConstraintConflict,
 671 |     /// We have a conflict with filters
 672 |     FilterConstraintConflict,
 673 |     /// The errata specifies an issue with this event (we tend to isolate these)
 674 |     ErrataConflict,
 675 |     /// This counter must be measured alone
 676 |     TakenAloneConflict,
 677 |     /// This is one of these events that we manually specified to be isolated
 678 |     IsolatedEventConflict,
 679 | }
 680 | 
 681 | impl fmt::Display for AddEventError {
 682 |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
 683 |         match *self {
 684 |             AddEventError::OffcoreCapacityReached => write!(f, "Offcore event limit reached."),
 685 |             AddEventError::UnitCapacityReached(u) => {
 686 |                 write!(f, "Unit '{}' capacity for reached.", u)
 687 |             }
 688 |             AddEventError::CounterConstraintConflict => write!(f, "Counter constraints conflict."),
 689 |             AddEventError::FilterConstraintConflict => write!(f, "Filter constraints conflict."),
 690 |             AddEventError::ErrataConflict => write!(f, "Errata conflict."),
 691 |             AddEventError::TakenAloneConflict => write!(f, "Group contains a taken alone counter."),
 692 |             AddEventError::IsolatedEventConflict => write!(f, "Group contains an isolated event."),
 693 |         }
 694 |     }
 695 | }
 696 | 
 697 | impl error::Error for AddEventError {
 698 |     fn description(&self) -> &str {
 699 |         match *self {
 700 |             AddEventError::OffcoreCapacityReached => "Offcore event limit reached.",
 701 |             AddEventError::UnitCapacityReached(_) => "Unit capacity reached.",
 702 |             AddEventError::CounterConstraintConflict => "Counter constraints conflict.",
 703 |             AddEventError::FilterConstraintConflict => "Filter constraints conflict.",
 704 |             AddEventError::ErrataConflict => "Errata conflict.",
 705 |             AddEventError::TakenAloneConflict => "Group contains a taken alone counter.",
 706 |             AddEventError::IsolatedEventConflict => "Group contains an isolated event.",
 707 |         }
 708 |     }
 709 | }
 710 | 
 711 | #[derive(Debug)]
 712 | pub struct PerfEventGroup<'a, 'b>
 713 | where
 714 |     'b: 'a,
 715 | {
 716 |     events: Vec<PerfEvent<'a, 'b>>,
 717 |     limits: &'a HashMap<MonitoringUnit, usize>,
 718 | }
 719 | 
 720 | impl<'a, 'b> PerfEventGroup<'a, 'b> {
 721 |     /// Make a new performance event group.
 722 |     pub fn new(unit_sizes: &'a HashMap<MonitoringUnit, usize>) -> PerfEventGroup {
 723 |         PerfEventGroup {
 724 |             events: Default::default(),
 725 |             limits: unit_sizes,
 726 |         }
 727 |     }
 728 | 
 729 |     /// Returns how many offcore events are in the group.
 730 |     fn offcore_events(&self) -> usize {
 731 |         self.events.iter().filter(|e| e.is_offcore()).count()
 732 |     }
 733 | 
 734 |     /// Returns how many uncore events are in the group for a given unit.
 735 |     fn events_by_unit(&self, unit: MonitoringUnit) -> Vec<&PerfEvent> {
 736 |         self.events.iter().filter(|e| e.unit() == unit).collect()
 737 |     }
 738 | 
 739 |     /// Backtracking algorithm to find assigment of events to available counters
 740 |     /// while respecting the counter constraints every event has.
 741 |     /// The events passed here should all have the same counter type
 742 |     /// (i.e., either all programmable or all fixed) and the same unit.
 743 |     ///
 744 |     /// Returns a possible placement or None if no assignment was possible.
 745 |     fn find_counter_assignment(
 746 |         level: usize,
 747 |         max_level: usize,
 748 |         events: Vec<&'a PerfEvent<'a, 'b>>,
 749 |         assignment: Vec<&'a PerfEvent<'a, 'b>>,
 750 |     ) -> Option<Vec<&'a PerfEvent<'a, 'b>>> {
 751 |         // Are we done yet?
 752 |         if events.len() == 0 {
 753 |             return Some(assignment);
 754 |         }
 755 |         // Are we too deep?
 756 |         if level >= max_level {
 757 |             return None;
 758 |         }
 759 | 
 760 |         for (idx, event) in events.iter().enumerate() {
 761 |             let mask: usize = match event.counter() {
 762 |                 Counter::Programmable(mask) => mask as usize,
 763 |                 Counter::Fixed(mask) => mask as usize,
 764 |             };
 765 | 
 766 |             let mut assignment = assignment.clone();
 767 |             let mut events = events.clone();
 768 | 
 769 |             // If event supports counter, let's assign it to this counter and go deeper
 770 |             if (mask & (1 << level)) > 0 {
 771 |                 assignment.push(event);
 772 |                 events.remove(idx);
 773 |                 let ret = PerfEventGroup::find_counter_assignment(
 774 |                     level + 1,
 775 |                     max_level,
 776 |                     events,
 777 |                     assignment,
 778 |                 );
 779 |                 if ret.is_some() {
 780 |                     return ret;
 781 |                 }
 782 |             }
 783 |             // Otherwise let's not assign the event at this level and go deeper (for groups that
 784 |             // don't use all counters)
 785 |             else {
 786 |                 let ret = PerfEventGroup::find_counter_assignment(
 787 |                     level + 1,
 788 |                     max_level,
 789 |                     events,
 790 |                     assignment,
 791 |                 );
 792 |                 if ret.is_some() {
 793 |                     return ret;
 794 |                 }
 795 |             }
 796 |             // And finally, just try with the next event in the list
 797 |         }
 798 | 
 799 |         None
 800 |     }
 801 | 
 802 |     /// Check if this event conflicts with the counter requirements
 803 |     /// of events already in this group
 804 |     fn has_counter_constraint_conflicts(&self, new_event: &PerfEvent) -> bool {
 805 |         let unit = new_event.unit();
 806 |         let unit_limit = *self.limits.get(&unit).unwrap_or(&0);
 807 |         //error!("unit = {:?} unit_limit {:?}", unit, unit_limit);
 808 | 
 809 |         // Get all the events that share the same counters as new_event:
 810 |         let mut events: Vec<&PerfEvent> = self
 811 |             .events_by_unit(unit)
 812 |             .into_iter()
 813 |             .filter(|c| match (c.counter(), new_event.counter()) {
 814 |                 (Counter::Programmable(_), Counter::Programmable(_)) => true,
 815 |                 (Counter::Fixed(_), Counter::Fixed(_)) => true,
 816 |                 _ => false,
 817 |             })
 818 |             .collect();
 819 | 
 820 |         events.push(new_event);
 821 |         PerfEventGroup::find_counter_assignment(0, unit_limit, events, Vec::new()).is_none()
 822 |     }
 823 | 
 824 |     /// Check if this events conflicts with the filter requirements of
 825 |     /// events already in this group
 826 |     fn has_filter_constraint_conflicts(&self, new_event: &PerfEvent) -> bool {
 827 |         let unit = new_event.unit();
 828 |         let events: Vec<&PerfEvent> = self.events_by_unit(unit);
 829 | 
 830 |         for event in events.iter() {
 831 |             for filter in event.filters() {
 832 |                 if new_event.filters().contains(&filter) {
 833 |                     return true;
 834 |                 }
 835 |             }
 836 |         }
 837 | 
 838 |         false
 839 |     }
 840 | 
 841 |     /// Try to add an event to an event group.
 842 |     ///
 843 |     /// Returns true if the event can be added to the group, false if we would be Unable
 844 |     /// to measure the event in the same group (given the PMU limitations).
 845 |     ///
 846 |     /// Things we consider correctly right now:
 847 |     /// * Fixed amount of counters per monitoring unit (so we don't multiplex).
 848 |     /// * Some events can only use some counters.
 849 |     /// * Taken alone attribute of the events.
 850 |     ///
 851 |     /// Things we consider not entirely correct right now:
 852 |     /// * Event Erratas this is not complete in the JSON files, and we just run them in isolation
 853 |     ///
 854 |     pub fn add_event(&mut self, event: PerfEvent<'a, 'b>) -> Result<(), AddEventError> {
 855 |         // 1. Can't measure more than two offcore events:
 856 |         if event.is_offcore() && self.offcore_events() == 2 {
 857 |             return Err(AddEventError::OffcoreCapacityReached);
 858 |         }
 859 | 
 860 |         // 2. Check we don't measure more events than we have counters
 861 |         // for on the repspective units
 862 |         let unit = event.unit();
 863 |         let unit_limit = *self.limits.get(&unit).unwrap_or(&0);
 864 |         if self.events_by_unit(unit).len() >= unit_limit {
 865 |             return Err(AddEventError::UnitCapacityReached(unit));
 866 |         }
 867 | 
 868 |         // 3. Now, consider the counter <-> event mapping constraints:
 869 |         // Try to see if there is any event already in the group
 870 |         // that would conflict when running together with the new `event`:
 871 |         if self.has_counter_constraint_conflicts(&event) {
 872 |             return Err(AddEventError::CounterConstraintConflict);
 873 |         }
 874 | 
 875 |         if self.has_filter_constraint_conflicts(&event) {
 876 |             return Err(AddEventError::FilterConstraintConflict);
 877 |         }
 878 | 
 879 |         // 4. Isolate things that have erratas to not screw other events (see HSW30)
 880 |         let errata = self.events.iter().any(|cur| cur.0.errata.is_some());
 881 |         if errata || event.0.errata.is_some() && self.events.len() != 0 {
 882 |             return Err(AddEventError::ErrataConflict);
 883 |         }
 884 | 
 885 |         // 5. If an event has the taken alone attribute set it needs to be measured alone
 886 |         let already_have_taken_alone_event = self.events.iter().any(|cur| cur.0.taken_alone);
 887 |         if already_have_taken_alone_event || event.0.taken_alone && self.events.len() != 0 {
 888 |             return Err(AddEventError::TakenAloneConflict);
 889 |         }
 890 | 
 891 |         // 6. If our own isolate event list contains the name we also run them alone:
 892 |         let already_have_isolated_event = self.events.get(0).map_or(false, |e| {
 893 |             ISOLATE_EVENTS.iter().any(|cur| *cur == e.0.event_name)
 894 |         });
 895 |         if already_have_isolated_event
 896 |             || ISOLATE_EVENTS.iter().any(|cur| *cur == event.0.event_name) && self.events.len() != 0
 897 |         {
 898 |             return Err(AddEventError::IsolatedEventConflict);
 899 |         }
 900 | 
 901 |         self.events.push(event);
 902 |         Ok(())
 903 |     }
 904 | 
 905 |     /// Find the right config to use for every event in the group.
 906 |     ///
 907 |     /// * We need to make sure we use the correct config if we have two offcore events in the same group.
 908 |     pub fn get_perf_config(&self) -> Vec<String> {
 909 |         let mut event_strings: Vec<String> = Vec::with_capacity(2);
 910 |         let mut have_one_offcore = false; // Have we already added one offcore event?
 911 | 
 912 |         for event in self.events.iter() {
 913 |             let (devices, mut configs) = event.perf_configs();
 914 | 
 915 |             if devices.len() == 0 || configs.len() == 0 {
 916 |                 error!(
 917 |                     "Event {} supported by hardware, but your Linux does not allow you to measure it (available PMU devices = {:?})",
 918 |                     event, devices
 919 |                 );
 920 | 
 921 |                 continue;
 922 |             }
 923 | 
 924 |             // TODO: handle fixed counters
 925 |             // fixed_counters = {
 926 |             //    "inst_retired.any": (0xc0, 0, 0),
 927 |             //    "cpu_clk_unhalted.thread": (0x3c, 0, 0),
 928 |             //    "cpu_clk_unhalted.thread_any": (0x3c, 0, 1),
 929 |             // }
 930 | 
 931 |             // Adding offcore event:
 932 |             if event.is_offcore() {
 933 |                 assert!(devices.len() == 1);
 934 |                 assert!(configs.len() == 2);
 935 |                 assert!(devices[0] == "cpu");
 936 | 
 937 |                 let config = match have_one_offcore {
 938 |                     false => configs.get(0).unwrap(), // Ok, always has at least one config
 939 |                     true => configs.get(1).unwrap(),  // Ok, as offcore implies two configs
 940 |                 };
 941 | 
 942 |                 event_strings.push(format!(
 943 |                     "{}/{}/{}",
 944 |                     devices[0],
 945 |                     config.join(","),
 946 |                     event.perf_qualifiers()
 947 |                 ));
 948 |                 have_one_offcore = true;
 949 |             }
 950 |             // Adding uncore event:
 951 |             else if event.is_uncore() {
 952 |                 assert!(configs.len() == 1);
 953 | 
 954 |                 // If we have an uncore event we just go ahead and measure it on all possible devices:
 955 |                 for device in devices {
 956 |                     // Patch name in config so we know where this event was running
 957 |                     // `perf stat` just reports CPU 0 for uncore events :-(
 958 |                     configs[0][0] = format!("name={}.{}", device, event.0.event_name);
 959 |                     event_strings.push(format!(
 960 |                         "{}/{}/{}",
 961 |                         device,
 962 |                         configs[0].join(","),
 963 |                         event.perf_qualifiers()
 964 |                     ));
 965 |                 }
 966 |             }
 967 |             // Adding normal event:
 968 |             else {
 969 |                 assert!(devices.len() == 1);
 970 |                 assert!(configs.len() == 1);
 971 |                 assert!(devices[0] == "cpu");
 972 | 
 973 |                 event_strings.push(format!(
 974 |                     "{}/{}/{}",
 975 |                     devices[0],
 976 |                     configs[0].join(","),
 977 |                     event.perf_qualifiers()
 978 |                 ));
 979 |             }
 980 |         }
 981 | 
 982 |         event_strings
 983 |     }
 984 | 
 985 |     /// Returns a list of events as strings that can be passed to perf-record using
 986 |     /// the -e arguments.
 987 |     pub fn get_perf_config_strings(&self) -> Vec<String> {
 988 |         self.get_perf_config()
 989 |     }
 990 | 
 991 |     /// Returns a list of event names in this group.
 992 |     ///
 993 |     /// The order of the list of names matches with the order
 994 |     /// returned by `get_perf_config_strings` or `get_perf_config`.
 995 |     pub fn get_event_names(&self) -> Vec<&'b str> {
 996 |         self.events.iter().map(|event| event.0.event_name).collect()
 997 |     }
 998 | }
 999 | 
1000 | /// Given a list of events, create a list of event groups that can be measured together.
1001 | pub fn schedule_events<'a, 'b>(events: Vec<&'a EventDescription<'b>>) -> Vec<PerfEventGroup<'a, 'b>>
1002 | where
1003 |     'b: 'a,
1004 | {
1005 |     let mut groups: Vec<PerfEventGroup> = Vec::with_capacity(42);
1006 | 
1007 |     for event in events {
1008 |         if IGNORE_EVENTS.contains_key(event.event_name) {
1009 |             continue;
1010 |         }
1011 | 
1012 |         let perf_event: PerfEvent = PerfEvent(event);
1013 |         let mut added: Result<(), AddEventError> = Err(AddEventError::ErrataConflict);
1014 |         match perf_event.unit() {
1015 |             MonitoringUnit::Unknown => {
1016 |                 info!("Ignoring event with unknown unit '{}'", event);
1017 |                 continue;
1018 |             }
1019 |             _ => (),
1020 |         };
1021 | 
1022 |         // Try to add the event to an existing group:
1023 |         for group in groups.iter_mut() {
1024 |             let perf_event: PerfEvent = PerfEvent(event);
1025 |             added = group.add_event(perf_event);
1026 |             if added.is_ok() {
1027 |                 break;
1028 |             }
1029 |         }
1030 | 
1031 |         // Unable to add event to any existing group, make a new group instead:
1032 |         if !added.is_ok() {
1033 |             let mut pg = PerfEventGroup::new(&*PMU_COUNTERS);
1034 |             let perf_event: PerfEvent = PerfEvent(event);
1035 | 
1036 |             let added = pg.add_event(perf_event);
1037 |             match added {
1038 |                 Err(e) => {
1039 |                     let perf_event: PerfEvent = PerfEvent(event);
1040 |                     panic!(
1041 |                         "Can't add a new event {:?} to an empty group: {:?}",
1042 |                         perf_event, e
1043 |                     );
1044 |                 }
1045 |                 Ok(_) => (),
1046 |             };
1047 | 
1048 |             groups.push(pg);
1049 |         }
1050 |     }
1051 | 
1052 |     // println!("{:?}", groups);
1053 |     groups
1054 | }
1055 | 
1056 | pub fn get_perf_command(
1057 |     cmd_working_dir: &str,
1058 |     _output_path: &Path,
1059 |     env: &Vec<(String, String)>,
1060 |     breakpoints: &Vec<String>,
1061 |     record: bool,
1062 | ) -> Command {
1063 |     let mut perf = Command::new("perf");
1064 |     perf.current_dir(cmd_working_dir);
1065 |     let _filename: String;
1066 |     if !record {
1067 |         perf.arg("stat");
1068 |         perf.arg("-aA");
1069 |         perf.arg("-I 250");
1070 |         perf.arg("-x ;");
1071 |     } else {
1072 |         perf.arg("record");
1073 |         perf.arg("--group");
1074 |         perf.arg("-F 4");
1075 |         perf.arg("-a");
1076 |         perf.arg("--raw-samples");
1077 |     }
1078 | 
1079 |     // Ensure we use dots as number separators in csv output (see issue #1):
1080 |     perf.env("LC_NUMERIC", "C");
1081 | 
1082 |     // Add the environment variables:
1083 |     for &(ref key, ref value) in env.iter() {
1084 |         perf.env(key, value);
1085 |     }
1086 |     let breakpoint_args: Vec<String> = breakpoints.iter().map(|s| format!("-e \\{}", s)).collect();
1087 |     perf.args(breakpoint_args.as_slice());
1088 | 
1089 |     perf
1090 | }
1091 | 
1092 | pub fn profile<'a, 'b>(
1093 |     output_path: &Path,
1094 |     cmd_working_dir: &str,
1095 |     cmd: Vec<String>,
1096 |     env: Vec<(String, String)>,
1097 |     breakpoints: Vec<String>,
1098 |     record: bool,
1099 |     events: Option<Vec<&'a EventDescription<'b>>>,
1100 |     dryrun: bool,
1101 | ) where
1102 |     'b: 'a,
1103 | {
1104 |     let event_groups = match events {
1105 |         Some(evts) => schedule_events(evts),
1106 |         None => schedule_events(get_known_events()),
1107 |     };
1108 | 
1109 |     // Is this run already done (in case we restart):
1110 |     let mut completed_file: PathBuf = output_path.to_path_buf();
1111 |     completed_file.push("completed");
1112 |     if completed_file.exists() {
1113 |         warn!(
1114 |             "Run {} already completed, skipping.",
1115 |             output_path.to_string_lossy()
1116 |         );
1117 |         return;
1118 |     }
1119 | 
1120 |     create_out_directory(output_path);
1121 |     if !dryrun {
1122 |         check_for_perf();
1123 |         let ret = check_for_perf_permissions()
1124 |             || check_for_disabled_nmi_watchdog()
1125 |             || check_for_perf_paranoia();
1126 |         if !ret {
1127 |             std::process::exit(3);
1128 |         }
1129 | 
1130 |         let _ = save_numa_topology(&output_path).expect("Can't save NUMA topology");
1131 |         let _ = save_cpu_topology(&output_path).expect("Can't save CPU topology");
1132 |         let _ = save_lstopo(&output_path).expect("Can't save lstopo information");
1133 |         let _ = save_cpuid(&output_path).expect("Can't save CPUID information");
1134 |         let _ = save_likwid_topology(&output_path).expect("Can't save likwid information");
1135 |     }
1136 | 
1137 |     assert!(cmd.len() >= 1);
1138 |     let mut perf_log = PathBuf::new();
1139 |     perf_log.push(output_path);
1140 |     perf_log.push("perf.csv");
1141 | 
1142 |     let mut wtr = csv::Writer::from_file(perf_log).unwrap();
1143 |     let r = wtr.encode((
1144 |         "command",
1145 |         "event_names",
1146 |         "perf_events",
1147 |         "breakpoints",
1148 |         "datafile",
1149 |         "perf_command",
1150 |         "stdout",
1151 |         "stdin",
1152 |     ));
1153 |     assert!(r.is_ok());
1154 | 
1155 |     // For warm-up do a dummy run of the program with perf
1156 |     let record_path = Path::new("/dev/null");
1157 |     let mut perf = get_perf_command(cmd_working_dir, output_path, &env, &breakpoints, record);
1158 |     perf.arg("-n"); // null run - don’t start any counters
1159 |     let (_, _, _) = execute_perf(&mut perf, &cmd, &Vec::new(), &record_path, dryrun);
1160 |     debug!("Warmup complete, let's start measuring.");
1161 | 
1162 |     let mut pb = ProgressBar::new(event_groups.len() as u64);
1163 | 
1164 |     for (idx, group) in event_groups.iter().enumerate() {
1165 |         if !dryrun {
1166 |             pb.inc();
1167 |         }
1168 | 
1169 |         let event_names: Vec<&str> = group.get_event_names();
1170 |         let counters: Vec<String> = group.get_perf_config_strings();
1171 | 
1172 |         let mut record_path = PathBuf::new();
1173 |         let filename = match record {
1174 |             false => format!("{}_stat.csv", idx + 1),
1175 |             true => format!("{}_perf.data", idx + 1),
1176 |         };
1177 |         record_path.push(output_path);
1178 |         record_path.push(&filename);
1179 | 
1180 |         let mut perf = get_perf_command(cmd_working_dir, output_path, &env, &breakpoints, record);
1181 |         let (executed_cmd, stdout, stdin) =
1182 |             execute_perf(&mut perf, &cmd, &counters, record_path.as_path(), dryrun);
1183 |         if !dryrun {
1184 |             let r = wtr.encode(vec![
1185 |                 cmd.join(" "),
1186 |                 event_names.join(","),
1187 |                 counters.join(","),
1188 |                 String::new(),
1189 |                 filename,
1190 |                 executed_cmd,
1191 |                 stdout,
1192 |                 stdin,
1193 |             ]);
1194 |             assert!(r.is_ok());
1195 | 
1196 |             let r = wtr.flush();
1197 |             assert!(r.is_ok());
1198 |         }
1199 |     }
1200 | 
1201 |     // Mark this run as completed:
1202 |     let _ = File::create(completed_file.as_path()).unwrap();
1203 | }
1204 | 
1205 | pub fn check_for_perf() {
1206 |     match Command::new("perf").output() {
1207 |         Ok(out) => {
1208 |             if out.status.code() != Some(1) {
1209 |                 error!("'perf' seems to have some problems?");
1210 |                 debug!("perf exit status was: {}", out.status);
1211 |                 error!("{}", String::from_utf8_lossy(&out.stderr));
1212 |                 error!(
1213 |                     "You may require a restart after fixing this so \
1214 |                      `/sys/bus/event_source/devices` is updated!"
1215 |                 );
1216 |                 std::process::exit(2);
1217 |             }
1218 |         }
1219 |         Err(_) => {
1220 |             error!(
1221 |                 "'perf' does not seem to be executable? You may need to install it (Ubuntu: \
1222 |                  `sudo apt-get install linux-tools-common`)."
1223 |             );
1224 |             error!(
1225 |                 "You may require a restart after fixing this so \
1226 |                  `/sys/bus/event_source/devices` is updated!"
1227 |             );
1228 |             std::process::exit(2);
1229 |         }
1230 |     }
1231 | }
1232 | 
1233 | pub fn check_for_perf_permissions() -> bool {
1234 |     let path = Path::new("/proc/sys/kernel/kptr_restrict");
1235 |     let mut file = File::open(path).expect("kptr_restrict file does not exist?");
1236 |     let mut s = String::new();
1237 | 
1238 |     match file.read_to_string(&mut s) {
1239 |         Ok(_) => {
1240 |             match s.trim() {
1241 |                 "1" => {
1242 |                     error!(
1243 |                         "kptr restriction is enabled. You can either run autoperf as root or \
1244 |                          do:"
1245 |                     );
1246 |                     error!("\tsudo sh -c 'echo 0 >> {}'", path.display());
1247 |                     error!("to disable.");
1248 |                     return false;
1249 |                 }
1250 |                 "0" => {
1251 |                     // debug!("kptr_restrict is already disabled (good).");
1252 |                 }
1253 |                 _ => {
1254 |                     warn!(
1255 |                         "Unkown content read from '{}': {}. Proceeding anyways...",
1256 |                         path.display(),
1257 |                         s.trim()
1258 |                     );
1259 |                 }
1260 |             }
1261 |         }
1262 | 
1263 |         Err(why) => {
1264 |             error!("Couldn't read {}: {}", path.display(), why.description());
1265 |             std::process::exit(3);
1266 |         }
1267 |     }
1268 | 
1269 |     true
1270 | }
1271 | 
1272 | pub fn check_for_disabled_nmi_watchdog() -> bool {
1273 |     let path = Path::new("/proc/sys/kernel/nmi_watchdog");
1274 |     let mut file = File::open(path).expect("nmi_watchdog file does not exist?");
1275 |     let mut s = String::new();
1276 | 
1277 |     match file.read_to_string(&mut s) {
1278 |         Ok(_) => {
1279 |             match s.trim() {
1280 |                 "1" => {
1281 |                     error!(
1282 |                         "nmi_watchdog is enabled. This can lead to counters not read (<not \
1283 |                          counted>). Execute"
1284 |                     );
1285 |                     error!("\tsudo sh -c 'echo 0 > {}'", path.display());
1286 |                     error!("to disable.");
1287 |                     return false;
1288 |                 }
1289 |                 "0" => {
1290 |                     // debug!("nmi_watchdog is already disabled (good).");
1291 |                 }
1292 |                 _ => {
1293 |                     warn!(
1294 |                         "Unkown content read from '{}': {}. Proceeding anyways...",
1295 |                         path.display(),
1296 |                         s.trim()
1297 |                     );
1298 |                 }
1299 |             }
1300 |         }
1301 | 
1302 |         Err(why) => {
1303 |             error!("Couldn't read {}: {}", path.display(), why.description());
1304 |             std::process::exit(4);
1305 |         }
1306 |     }
1307 | 
1308 |     true
1309 | }
1310 | 
1311 | pub fn check_for_perf_paranoia() -> bool {
1312 |     let path = Path::new("/proc/sys/kernel/perf_event_paranoid");
1313 |     let mut file = File::open(path).expect("perf_event_paranoid file does not exist?");
1314 |     let mut s = String::new();
1315 | 
1316 |     let res = match file.read_to_string(&mut s) {
1317 |         Ok(_) => {
1318 |             let digit = i64::from_str(s.trim()).unwrap_or_else(|_op| {
1319 |                 warn!(
1320 |                     "Unkown content read from '{}': {}. Proceeding anyways...",
1321 |                     path.display(),
1322 |                     s.trim()
1323 |                 );
1324 |                 1
1325 |             });
1326 | 
1327 |             if digit >= 0 {
1328 |                 error!(
1329 |                     "perf_event_paranoid is enabled. This means we can't collect system wide \
1330 |                      stats. Execute"
1331 |                 );
1332 |                 error!("\tsudo sh -c 'echo -1 > {}'", path.display());
1333 |                 error!("to disable.");
1334 |                 false
1335 |             } else {
1336 |                 true
1337 |             }
1338 |         }
1339 | 
1340 |         Err(why) => {
1341 |             error!("Couldn't read {}: {}", path.display(), why.description());
1342 |             std::process::exit(4);
1343 |         }
1344 |     };
1345 | 
1346 |     res
1347 | }
1348 | 


--------------------------------------------------------------------------------
/src/scale.rs:
--------------------------------------------------------------------------------
  1 | use std::path::Path;
  2 | 
  3 | #[allow(unused)]
  4 | pub fn scale(_manifest_folder: &Path, _dryrun: bool, _start: usize, _stepping: usize) {
  5 |     // let canonical_manifest_path = fs::canonicalize(&manifest_folder)
  6 |     // .expect("canonicalize manifest path does not work");
  7 |     //
  8 |     // let mut out_dir = canonical_manifest_path.to_path_buf();
  9 |     // let hostname = get_hostname().unwrap_or(String::from("unknown"));
 10 |     // out_dir.push(hostname);
 11 |     // mkdir(&out_dir);
 12 |     //
 13 |     // let mt = MachineTopology::new();
 14 |     //
 15 |     // let mut manifest: PathBuf = canonical_manifest_path.to_path_buf();
 16 |     // manifest.push("manifest.toml");
 17 |     // let mut file = File::open(manifest.as_path()).expect("manifest.toml file does not exist?");
 18 |     // let mut manifest_string = String::new();
 19 |     // let _ = file.read_to_string(&mut manifest_string).unwrap();
 20 |     // let mut parser = toml::Parser::new(manifest_string.as_str());
 21 |     // let doc = match parser.parse() {
 22 |     // Some(doc) => doc,
 23 |     // None => {
 24 |     // error!("Can't parse the manifest file:\n{:?}", parser.errors);
 25 |     // process::exit(1);
 26 |     // }
 27 |     // };
 28 |     // let experiment: &toml::Table = doc["experiment"]
 29 |     // .as_table()
 30 |     // .expect("Error in manifest.toml: 'experiment' should be a table.");
 31 |     // let configuration: &[toml::Value] = experiment["configurations"]
 32 |     // .as_slice()
 33 |     // .expect("Error in manifest.toml: 'configuration' attribute should be a list.");
 34 |     // let configs: Vec<String> = configuration.iter()
 35 |     // .map(|s| s.as_str().expect("configuration elements should be strings").to_string())
 36 |     // .collect();
 37 |     // let run_alone: bool = experiment.get("alone")
 38 |     // .map_or(true, |v| v.as_bool().expect("'alone' should be boolean"));
 39 |     // let profile_only: Option<Vec<String>> = experiment.get("profile_only_a")
 40 |     // .map(|progs| {
 41 |     // progs.as_slice()
 42 |     // .expect("Error in manifest.toml: 'profile_only_a' should be a list.")
 43 |     // .into_iter()
 44 |     // .map(|p| {
 45 |     // p.as_str()
 46 |     // .expect("profile_only_a elements should name programs (strings)")
 47 |     // .to_string()
 48 |     // })
 49 |     // .collect()
 50 |     // });
 51 |     // let profile_only_b: Option<Vec<String>> = experiment.get("profile_only_b")
 52 |     // .map(|progs| {
 53 |     // progs.as_slice()
 54 |     // .expect("Error in manifest.toml: 'profile_only_b' should be a list.")
 55 |     // .into_iter()
 56 |     // .map(|p| {
 57 |     // p.as_str()
 58 |     // .expect("profile_only_b elements should name programs (strings)")
 59 |     // .to_string()
 60 |     // })
 61 |     // .collect()
 62 |     // });
 63 |     //
 64 |     //
 65 |     // let mut programs: Vec<Program> = Vec::with_capacity(2);
 66 |     // for (key, value) in &doc {
 67 |     // if key.starts_with("program") {
 68 |     // let program_desc: &toml::Table =
 69 |     // doc[key].as_table().expect("Error in manifest.toml: 'program' should be a table.");
 70 |     // programs.push(Program::from_toml(&canonical_manifest_path, program_desc, run_alone));
 71 |     // }
 72 |     // }
 73 |     //
 74 |     // let mut deployments: Vec<Deployment> = Vec::with_capacity(4);
 75 |     // for config in configs {
 76 |     // match config.as_str() {
 77 |     // "L1-SMT" => {
 78 |     // deployments.push(Deployment::split_interleaved("L1-SMT",
 79 |     // mt.same_l1(),
 80 |     // mt.l1_size().unwrap_or(0)))
 81 |     // }
 82 |     // "L3-SMT" => {
 83 |     // deployments.push(Deployment::split_interleaved("L3-SMT",
 84 |     // mt.same_l3(),
 85 |     // mt.l3_size().unwrap_or(0)))
 86 |     // }
 87 |     // "L3-SMT-cores" => {
 88 |     // deployments.push(Deployment::split_smt_aware("L3-SMT-cores",
 89 |     // mt.same_l3(),
 90 |     // mt.l3_size().unwrap_or(0)))
 91 |     // }
 92 |     // "L3-cores" => {
 93 |     // deployments.push(Deployment::split_smt_aware("L3-cores",
 94 |     // mt.same_l3_cores(),
 95 |     // mt.l3_size().unwrap_or(0)))
 96 |     // }
 97 |     // "Full-L3" => {
 98 |     // deployments.push(Deployment::split_l3_aware("Full-L3",
 99 |     // mt.whole_machine_cores(),
100 |     // mt.l3_size().unwrap_or(0)))
101 |     // }
102 |     // "Full-SMT-L3" => {
103 |     // deployments.push(Deployment::split_l3_aware("Full-SMT-L3",
104 |     // mt.whole_machine(),
105 |     // mt.l3_size().unwrap_or(0)))
106 |     // }
107 |     // "Full-cores" => {
108 |     // deployments.push(Deployment::split_interleaved("Full-cores",
109 |     // mt.whole_machine_cores(),
110 |     // mt.l3_size().unwrap_or(0)))
111 |     // }
112 |     // "Full-SMT-cores" => {
113 |     // deployments.push(Deployment::split_smt_aware("Full-SMT-cores",
114 |     // mt.whole_machine(),
115 |     // mt.l3_size().unwrap_or(0)))
116 |     // }
117 |     //
118 |     // _ => error!("Ignored unknown deployment config '{}'.", config),
119 |     // };
120 |     // }
121 |     //
122 |     // Add all possible pairs:
123 |     // let mut pairs: Vec<(&Program, Option<&Program>)> = Vec::new();
124 |     // for p in programs.iter() {
125 |     // pairs.push((p, None));
126 |     // }
127 |     // for (a, b) in iproduct!(programs.iter(), programs.iter()) {
128 |     // pairs.push((a, Some(b)));
129 |     // }
130 |     //
131 |     // Filter out the pairs we do not want to execute:
132 |     // let mut runs: Vec<Run> = Vec::new();
133 |     // for (a, b) in pairs.into_iter() {
134 |     // let profile_a = profile_only.as_ref().map_or(true, |ps| ps.contains(&a.name));
135 |     // let profile_b = !b.is_none() &&
136 |     // profile_only_b.as_ref()
137 |     // .map_or(profile_a, |ps| ps.contains(&b.unwrap().name));
138 |     // if !profile_a && !profile_b {
139 |     // continue;
140 |     // }
141 |     //
142 |     // for d in deployments.iter() {
143 |     // if b.is_none() && (!run_alone || !a.alone) {
144 |     // continue;
145 |     // }
146 |     // runs.push(Run::new(&canonical_manifest_path, out_dir.as_path(), a, b, d));
147 |     // }
148 |     // }
149 |     //
150 |     // Finally, profile the runs we are supposed to execute based on the command line args
151 |     // let mut i = 0;
152 |     // for run in runs.iter_mut().skip(start).step(stepping) {
153 |     // if !dryrun {
154 |     // run.profile();
155 |     // } else {
156 |     // println!("{}", run);
157 |     // }
158 |     // i += 1;
159 |     // }
160 |     //
161 |     // println!("{} runs completed.", i);
162 |     //
163 | }
164 | 


--------------------------------------------------------------------------------
/src/search.rs:
--------------------------------------------------------------------------------
  1 | use std;
  2 | 
  3 | use std::collections::BTreeSet;
  4 | use std::collections::HashMap;
  5 | use std::path::Path;
  6 | use std::path::PathBuf;
  7 | use std::process::Command;
  8 | 
  9 | use csv;
 10 | 
 11 | use super::profile;
 12 | use super::profile::{MonitoringUnit, PerfEvent};
 13 | use log::*;
 14 | use x86::perfcnt::intel::{Counter, EventDescription, MSRIndex, PebsType, Tuple};
 15 | 
 16 | pub fn event_is_documented(
 17 |     events: &Vec<PerfEvent>,
 18 |     unit: MonitoringUnit,
 19 |     code: u8,
 20 |     umask: u8,
 21 | ) -> bool {
 22 |     for event in events.iter() {
 23 |         if event.unit() == unit && event.uses_event_code(code) && event.uses_umask(umask) {
 24 |             return true;
 25 |         }
 26 |     }
 27 | 
 28 |     return false;
 29 | }
 30 | 
 31 | fn execute_perf(
 32 |     perf: &mut Command,
 33 |     cmd: &Vec<String>,
 34 |     counters: &Vec<String>,
 35 | ) -> BTreeSet<(String, String)> {
 36 |     assert!(cmd.len() >= 1);
 37 |     let events: Vec<String> = counters.iter().map(|c| format!("-e {}", c)).collect();
 38 | 
 39 |     let perf = perf.args(events.as_slice());
 40 |     let perf = perf.args(cmd.as_slice());
 41 |     let perf_cmd_str: String = format!("{:?}", perf).replace("\"", "");
 42 | 
 43 |     let (_stdout, stderr) = match perf.output() {
 44 |         Ok(out) => {
 45 |             let stdout =
 46 |                 String::from_utf8(out.stdout).unwrap_or(String::from("Unable to read stdout!"));
 47 |             let stderr =
 48 |                 String::from_utf8(out.stderr).unwrap_or(String::from("Unable to read stderr!"));
 49 | 
 50 |             if out.status.success() {
 51 |                 // debug!("stdout:\n{:?}", stdout);
 52 |                 // debug!("stderr:\n{:?}", stderr);
 53 |             } else if !out.status.success() {
 54 |                 error!(
 55 |                     "perf command: {} got unknown exit status was: {}",
 56 |                     perf_cmd_str, out.status
 57 |                 );
 58 |                 debug!("stdout:\n{}", stdout);
 59 |                 debug!("stderr:\n{}", stderr);
 60 |             }
 61 | 
 62 |             (stdout, stderr)
 63 |         }
 64 |         Err(err) => {
 65 |             error!("Executing {} failed : {}", perf_cmd_str, err);
 66 |             (String::new(), String::new())
 67 |         }
 68 |     };
 69 | 
 70 |     let mut found_events = BTreeSet::new();
 71 |     let mut rdr = csv::Reader::from_string(stderr)
 72 |         .has_headers(false)
 73 |         .delimiter(b';')
 74 |         .flexible(true);
 75 |     for record in rdr.decode() {
 76 |         if record.is_ok() {
 77 |             type SourceRow = (f64, String, String, String, String, String, f64);
 78 |             let (_time, _cpu, value_string, _, event, _, _percent): SourceRow =
 79 |                 record.expect("Should not happen (in is_ok() branch)!");
 80 | 
 81 |             // Perf will just report first CPU on the socket for uncore events,
 82 |             // so we temporarily encode the location in the event name and
 83 |             // extract it here again:
 84 |             let (unit, event_name) = if !event.starts_with("uncore_") {
 85 |                 // Normal case, we just take the regular event and cpu fields from perf stat
 86 |                 (String::from("cpu"), String::from(event.trim()))
 87 |             } else {
 88 |                 // Uncore events, use first part of the event name as the location
 89 |                 let (unit, name) = event.split_at(event.find(".").unwrap());
 90 |                 // remove the _1 in uncore_cbox_1:
 91 |                 let mut unit_parts: Vec<&str> = unit.split('_').collect();
 92 |                 unit_parts.pop();
 93 |                 (
 94 |                     String::from(unit_parts.join("_")),
 95 |                     String::from(name.trim_start_matches(".").trim()),
 96 |                 )
 97 |             };
 98 | 
 99 |             let value: u64 = value_string.trim().parse().unwrap_or(0);
100 |             if value != 0 {
101 |                 debug!("{:?} {:?} {:?}", unit, event_name, value);
102 |                 found_events.insert((event_name, unit));
103 |             }
104 |         }
105 |     }
106 | 
107 |     found_events
108 | }
109 | 
110 | pub fn check_events<'a, 'b>(
111 |     output_path: &Path,
112 |     cmd_working_dir: &str,
113 |     cmd: Vec<String>,
114 |     env: Vec<(String, String)>,
115 |     breakpoints: Vec<String>,
116 |     record: bool,
117 |     events: Vec<&'a EventDescription<'b>>,
118 | ) -> BTreeSet<(String, String)>
119 | where
120 |     'b: 'a,
121 | {
122 |     let event_groups = profile::schedule_events(events);
123 |     profile::create_out_directory(output_path);
124 | 
125 |     profile::check_for_perf();
126 |     let ret = profile::check_for_perf_permissions()
127 |         || profile::check_for_disabled_nmi_watchdog()
128 |         || profile::check_for_perf_paranoia();
129 |     if !ret {
130 |         std::process::exit(3);
131 |     }
132 | 
133 |     assert!(cmd.len() >= 1);
134 |     let mut perf_log = PathBuf::new();
135 |     perf_log.push(output_path);
136 |     perf_log.push("unknown_events.csv");
137 | 
138 |     let mut all_events = BTreeSet::new();
139 |     for group in event_groups {
140 |         let mut _event_names: Vec<&str> = group.get_event_names();
141 |         let counters: Vec<String> = group.get_perf_config_strings();
142 |         let mut perf =
143 |             profile::get_perf_command(cmd_working_dir, output_path, &env, &breakpoints, record);
144 |         let mut found_events = execute_perf(&mut perf, &cmd, &counters);
145 |         all_events.append(&mut found_events);
146 |     }
147 | 
148 |     all_events
149 | }
150 | 
151 | pub fn print_unknown_events() {
152 |     let events = profile::get_known_events();
153 |     let pevents: Vec<PerfEvent> = events.into_iter().map(|e| PerfEvent(e)).collect();
154 |     let units = vec![
155 |         MonitoringUnit::CPU,
156 |         //MonitoringUnit::UBox,
157 |         MonitoringUnit::CBox,
158 |         MonitoringUnit::HA,
159 |         MonitoringUnit::IMC,
160 |         //MonitoringUnit::PCU,
161 |         //MonitoringUnit::R2PCIe,
162 |         MonitoringUnit::R3QPI,
163 |         //MonitoringUnit::QPI
164 |     ];
165 | 
166 |     let mut event_names = HashMap::new();
167 |     for unit in units.iter() {
168 |         for code in 1..255 {
169 |             for umask in 1..255 {
170 |                 let id: isize = (*unit as isize) << 32 | (code as isize) << 8 | umask as isize;
171 |                 let value = format!(
172 |                     "{}_EVENT_{}_{}",
173 |                     unit.to_intel_event_description().unwrap_or("CPU"),
174 |                     code,
175 |                     umask
176 |                 );
177 |                 event_names.insert(id, value);
178 |             }
179 |         }
180 |     }
181 | 
182 |     println!("Find events...");
183 |     let mut storage_location = PathBuf::from("unknown_events");
184 |     profile::create_out_directory(&storage_location);
185 |     storage_location.push("found_events.dat");
186 |     let mut wtr = csv::Writer::from_file(storage_location).unwrap();
187 |     let r = wtr.encode(("unit", "code", "mask", "event_name"));
188 |     assert!(r.is_ok());
189 | 
190 |     let mut events = Vec::new();
191 |     for code in 1..255 {
192 |         for umask in 1..255 {
193 |             for unit in units.iter() {
194 |                 let id: isize = (*unit as isize) << 32 | (code as isize) << 8 | umask as isize;
195 | 
196 |                 if event_is_documented(&pevents, *unit, code, umask) {
197 |                     println!("Skip documented event {} {:?} {:?}", unit, code, umask);
198 |                     continue;
199 |                 }
200 | 
201 |                 let e = EventDescription::new(
202 |                     Tuple::One(code),
203 |                     Tuple::One(umask),
204 |                     event_names.get(&id).unwrap().as_str(),
205 |                     "Unknown Event",
206 |                     None,
207 |                     Counter::Programmable(15),
208 |                     None,
209 |                     None,
210 |                     0,
211 |                     MSRIndex::None,
212 |                     0,
213 |                     false,
214 |                     0x0,
215 |                     false,
216 |                     false,
217 |                     false,
218 |                     PebsType::Regular,
219 |                     false,
220 |                     None,
221 |                     false,
222 |                     false,
223 |                     None,
224 |                     false,
225 |                     unit.to_intel_event_description(),
226 |                     None,
227 |                     false,
228 |                     false,
229 |                     false,
230 |                     0,
231 |                     0,
232 |                     0,
233 |                     0,
234 |                     0,
235 |                 );
236 |                 events.push(e);
237 |             }
238 |         }
239 | 
240 |         let storage_location = PathBuf::from("unknown_events");
241 |         let all_found_events = check_events(
242 |             &storage_location,
243 |             ".",
244 |             vec![String::from("sleep"), String::from("1")],
245 |             Vec::new(),
246 |             Vec::new(),
247 |             false,
248 |             events.iter().collect(),
249 |         );
250 |         for &(ref name, ref unit) in all_found_events.iter() {
251 |             let splitted: Vec<&str> = name.split("_").collect();
252 |             let r = wtr.encode(vec![
253 |                 unit,
254 |                 &String::from(splitted[2]),
255 |                 &String::from(splitted[3]),
256 |                 name,
257 |             ]);
258 |             assert!(r.is_ok());
259 |         }
260 |         let r = wtr.flush();
261 |         assert!(r.is_ok());
262 | 
263 |         events.clear();
264 |     }
265 | }
266 | 


--------------------------------------------------------------------------------
/src/stats.rs:
--------------------------------------------------------------------------------
  1 | use csv;
  2 | use itertools::Itertools;
  3 | use phf::Map;
  4 | use std::cmp::Ord;
  5 | use std::collections::HashMap;
  6 | use std::fs::File;
  7 | use std::path::Path;
  8 | use std::path::PathBuf;
  9 | 
 10 | use x86::perfcnt::intel::events::COUNTER_MAP;
 11 | use x86::perfcnt::intel::{EventDescription, Tuple};
 12 | 
 13 | use super::profile::{MonitoringUnit, PerfEvent};
 14 | use super::util::*;
 15 | 
 16 | type EventMap = Map<&'static str, EventDescription<'static>>;
 17 | type ArchitectureMap = HashMap<&'static str, (&'static str, &'static str, &'static str)>;
 18 | 
 19 | /// Saves the event count for all architectures to a file.
 20 | fn save_event_counts(key_to_name: &ArchitectureMap, csv_result: &Path) {
 21 |     let mut writer = csv::Writer::from_file(csv_result).unwrap();
 22 |     writer
 23 |         .encode(&[
 24 |             "year",
 25 |             "architecture",
 26 |             "core events",
 27 |             "uncore events",
 28 |             "counters",
 29 |             "uncore groups",
 30 |         ])
 31 |         .expect(format!("Can't write {:?} header", csv_result).as_str());
 32 | 
 33 |     for (key, &(name, year, counters)) in key_to_name.iter() {
 34 |         let events = COUNTER_MAP.get(format!("{}", key).as_str());
 35 | 
 36 |         let counter_groups: Vec<(MonitoringUnit, usize)> = events.map_or(Vec::new(), |uc| {
 37 |             let mut units: Vec<(MonitoringUnit, PerfEvent)> = Vec::with_capacity(uc.len());
 38 |             for ref e in uc.values() {
 39 |                 if e.uncore {
 40 |                     units.push((PerfEvent(&e).unit(), PerfEvent(&e)));
 41 |                 }
 42 |             }
 43 |             units.sort_by(|a, b| a.0.cmp(&b.0));
 44 | 
 45 |             let mut counts: Vec<(MonitoringUnit, usize)> = Vec::with_capacity(10);
 46 |             for (key, group) in &units.into_iter().group_by(|&(unit, _)| unit) {
 47 |                 counts.push((key, group.count()));
 48 |             }
 49 | 
 50 |             counts
 51 |         });
 52 | 
 53 |         let cc_count = events
 54 |             .map(|c| {
 55 |                 let filtered: Vec<&EventDescription> = c.values().filter(|e| !e.uncore).collect();
 56 |                 filtered.len()
 57 |             })
 58 |             .unwrap_or(0);
 59 |         let uc_count = events
 60 |             .map(|c| {
 61 |                 let filtered: Vec<&EventDescription> = c.values().filter(|e| e.uncore).collect();
 62 |                 filtered.len()
 63 |             })
 64 |             .unwrap_or(0);
 65 | 
 66 |         let group_string = counter_groups
 67 |             .into_iter()
 68 |             .map(|(u, c)| format!("{}:{}", u, c))
 69 |             .join(";");
 70 |         let cc_count = cc_count.to_string();
 71 |         let uc_count = uc_count.to_string();
 72 | 
 73 |         let mut row: Vec<&str> = Vec::new();
 74 |         row.push(year);
 75 |         row.push(name);
 76 |         row.push(cc_count.as_str());
 77 |         row.push(uc_count.as_str());
 78 |         row.push(counters);
 79 |         row.push(group_string.as_str());
 80 | 
 81 |         writer
 82 |             .encode(&row.as_slice())
 83 |             .expect(format!("Can't write for for {:?} file", csv_result).as_str());
 84 |     }
 85 | }
 86 | 
 87 | /// Given two EventMaps count all the shared (same event name key) events.
 88 | fn common_event_names(a: Option<&'static EventMap>, b: Option<&'static EventMap>) -> usize {
 89 |     if a.is_none() || b.is_none() {
 90 |         return 0;
 91 |     }
 92 | 
 93 |     let a_map = a.unwrap();
 94 |     let b_map = b.unwrap();
 95 | 
 96 |     let mut counter = 0;
 97 |     for (key, _value) in a_map.entries() {
 98 |         if b_map.get(key).is_some() {
 99 |             counter += 1
100 |         }
101 |     }
102 | 
103 |     counter
104 | }
105 | 
106 | /// Does pairwise comparison of all architectures and saves their shared events to a file.
107 | fn save_architecture_comparison(key_to_name: &ArchitectureMap, csv_result: &Path) {
108 |     let mut writer = csv::Writer::from_file(csv_result)
109 |         .expect(format!("Can't write {:?} file", csv_result).as_str());
110 |     writer
111 |         .encode(&[
112 |             "arch1",
113 |             "year1",
114 |             "arch2",
115 |             "year2",
116 |             "common events",
117 |             "arch1 events",
118 |             "arch2 events",
119 |         ])
120 |         .expect(format!("Can't write {:?} header", csv_result).as_str());
121 | 
122 |     for (key1, &(name1, year1, _)) in key_to_name.iter() {
123 |         for (key2, &(name2, year2, _)) in key_to_name.iter() {
124 |             let events1 = COUNTER_MAP.get(format!("{}", key1).as_str());
125 |             let events2 = COUNTER_MAP.get(format!("{}", key2).as_str());
126 | 
127 |             writer
128 |                 .encode(&[
129 |                     name1,
130 |                     year1,
131 |                     name2,
132 |                     year2,
133 |                     common_event_names(events1, events2).to_string().as_str(),
134 |                     events1.map(|c| c.len()).unwrap_or(0).to_string().as_str(),
135 |                     events2.map(|c| c.len()).unwrap_or(0).to_string().as_str(),
136 |                 ])
137 |                 .ok();
138 |         }
139 |     }
140 | }
141 | 
142 | /// Computes the Levenshtein edit distance of two strings.
143 | fn edit_distance(a: &str, b: &str) -> i32 {
144 |     let len_a = a.chars().count();
145 |     let len_b = b.chars().count();
146 | 
147 |     let row: Vec<i32> = vec![0; len_b + 1];
148 |     let mut matrix: Vec<Vec<i32>> = vec![row; len_a + 1];
149 | 
150 |     let chars_a: Vec<char> = a.to_lowercase().chars().collect();
151 |     let chars_b: Vec<char> = b.to_lowercase().chars().collect();
152 | 
153 |     for i in 0..len_a {
154 |         matrix[i + 1][0] = (i + 1) as i32;
155 |     }
156 |     for i in 0..len_b {
157 |         matrix[0][i + 1] = (i + 1) as i32;
158 |     }
159 | 
160 |     for i in 0..len_a {
161 |         for j in 0..len_b {
162 |             let ind: i32 = if chars_a[i] == chars_b[j] { 0 } else { 1 };
163 | 
164 |             let min = vec![
165 |                 matrix[i][j + 1] + 1,
166 |                 matrix[i + 1][j] + 1,
167 |                 matrix[i][j] + ind,
168 |             ]
169 |             .into_iter()
170 |             .min()
171 |             .unwrap();
172 | 
173 |             matrix[i + 1][j + 1] = if min == 0 { 0 } else { min };
174 |         }
175 |     }
176 |     matrix[len_a][len_b]
177 | }
178 | 
179 | /// Computes the edit distance of the event description for common events shared in 'a' and 'b'.
180 | fn common_event_desc_distance(
181 |     writer: &mut csv::Writer<File>,
182 |     a: Option<&'static EventMap>,
183 |     b: Option<&'static EventMap>,
184 |     uncore: bool,
185 | ) -> csv::Result<()> {
186 |     if a.is_none() || b.is_none() {
187 |         return Ok(());
188 |     }
189 | 
190 |     let a_map = a.unwrap();
191 |     let b_map = b.unwrap();
192 | 
193 |     for (key1, value1) in a_map.entries() {
194 |         match b_map.get(key1) {
195 |             Some(value2) => {
196 |                 assert_eq!(value1.event_name, value2.event_name);
197 |                 let ed =
198 |                     edit_distance(value1.brief_description, value2.brief_description).to_string();
199 |                 let uncore_str = if uncore { "true" } else { "false" };
200 | 
201 |                 writer.encode(&[
202 |                     value1.event_name,
203 |                     ed.as_str(),
204 |                     uncore_str,
205 |                     value1.brief_description,
206 |                     value2.brief_description,
207 |                 ])?
208 |             }
209 |             None => {
210 |                 // Ignore event names that are not shared in both architectures
211 |             }
212 |         }
213 |     }
214 | 
215 |     Ok(())
216 | }
217 | 
218 | /// Does a pairwise comparison of all architectures by computing edit distances of shared events.
219 | fn save_edit_distances(key_to_name: &ArchitectureMap, output_dir: &Path) {
220 |     for (key1, &(name1, _, _)) in key_to_name.iter() {
221 |         for (key2, &(name2, _, _)) in key_to_name.iter() {
222 |             let mut csv_result = output_dir.to_path_buf();
223 |             csv_result.push(format!("editdist_{}-vs-{}.csv", name1, name2));
224 | 
225 |             let mut writer = csv::Writer::from_file(csv_result.clone())
226 |                 .expect(format!("Can't open {:?}", csv_result).as_str());
227 |             writer
228 |                 .encode(&["event name", "edit distance", "uncore", "desc1", "desc2"])
229 |                 .expect(format!("Can't write {:?} header", csv_result).as_str());
230 | 
231 |             let events1 = COUNTER_MAP.get(format!("{}", key1).as_str());
232 |             let events2 = COUNTER_MAP.get(format!("{}", key2).as_str());
233 | 
234 |             common_event_desc_distance(&mut writer, events1, events2, false).ok();
235 |         }
236 |     }
237 | }
238 | 
239 | /// Dump information about performance events on your machine into the given directory.
240 | fn save_event_descriptions(output_path: &Path) {
241 |     let events: &'static Map<&'static str, EventDescription<'static>> =
242 |         &x86::perfcnt::intel::events().expect("Can't get events for arch");
243 |     let pevents: Vec<PerfEvent> = events.into_iter().map(|e| PerfEvent(e.1)).collect();
244 | 
245 |     let mut storage_location = PathBuf::from(output_path);
246 |     storage_location.push("ivytown_events.dat");
247 |     let mut wtr = csv::Writer::from_file(storage_location.clone())
248 |         .expect(format!("Can't open {:?}", storage_location).as_str());
249 |     let r = wtr.encode(("unit", "code", "mask", "event_name"));
250 |     assert!(r.is_ok());
251 | 
252 |     for event in pevents.iter() {
253 |         //println!("{:?}", event.0.event_name);
254 |         let unit = event.unit().to_perf_prefix().unwrap_or("none");
255 | 
256 |         match (&event.0.event_code, &event.0.umask) {
257 |             (&Tuple::One(e1), &Tuple::One(m1)) => {
258 |                 wtr.encode(vec![
259 |                     unit,
260 |                     &format!("{}", e1),
261 |                     &format!("{}", m1),
262 |                     &String::from(event.0.event_name),
263 |                 ])
264 |                 .ok();
265 |             }
266 |             (&Tuple::Two(e1, e2), &Tuple::Two(m1, m2)) => {
267 |                 wtr.encode(vec![
268 |                     unit,
269 |                     &format!("{}", e1),
270 |                     &format!("{}", m1),
271 |                     &String::from(event.0.event_name),
272 |                 ])
273 |                 .ok();
274 | 
275 |                 wtr.encode(vec![
276 |                     unit,
277 |                     &format!("{}", e2),
278 |                     &format!("{}", m2),
279 |                     &String::from(event.0.event_name),
280 |                 ])
281 |                 .ok();
282 |             }
283 |             (&Tuple::Two(e1, e2), &Tuple::One(m1)) => {
284 |                 wtr.encode(vec![
285 |                     unit,
286 |                     &format!("{}", e1),
287 |                     &format!("{}", m1),
288 |                     &String::from(event.0.event_name),
289 |                 ])
290 |                 .ok();
291 | 
292 |                 wtr.encode(vec![
293 |                     unit,
294 |                     &format!("{}", e2),
295 |                     &format!("{}", m1),
296 |                     &String::from(event.0.event_name),
297 |                 ])
298 |                 .ok();
299 |             }
300 |             _ => unreachable!(),
301 |         }
302 |     }
303 | 
304 |     let r = wtr.flush();
305 |     assert!(r.is_ok());
306 | }
307 | 
308 | /// Generate all the stats about Intel events and save them to a file.
309 | pub fn stats(output_path: &Path) {
310 |     mkdir(output_path);
311 | 
312 |     // TODO: Ideally this should come from x86 crate: x86data/perfmon_data/mapfile.csv
313 |     let mut key_to_name = HashMap::new();
314 |     key_to_name.insert("GenuineIntel-6-1C", ("Bonnell", "2008", "4"));
315 |     key_to_name.insert("GenuineIntel-6-1E", ("NehalemEP", "2009", "4"));
316 |     key_to_name.insert("GenuineIntel-6-2E", ("NehalemEX", "2010", "4"));
317 |     key_to_name.insert("GenuineIntel-6-25", ("WestmereEP-SP", "2010", "4"));
318 |     key_to_name.insert("GenuineIntel-6-2C", ("WestmereEP-DP", "2010", "4"));
319 |     key_to_name.insert("GenuineIntel-6-2F", ("WestmereEX", "2011", "4"));
320 |     key_to_name.insert("GenuineIntel-6-2D", ("Jaketown", "2011", "8"));
321 |     key_to_name.insert("GenuineIntel-6-2A", ("SandyBridge", "2011", "8"));
322 |     key_to_name.insert("GenuineIntel-6-3A", ("IvyBridge", "2012", "8"));
323 |     key_to_name.insert("GenuineIntel-6-37", ("Silvermont", "2013", "8"));
324 |     key_to_name.insert("GenuineIntel-6-3C", ("Haswell", "2013", "8"));
325 |     key_to_name.insert("GenuineIntel-6-3E", ("IvyBridgeEP", "2014", "8"));
326 |     key_to_name.insert("GenuineIntel-6-3F", ("HaswellX", "2014", "8"));
327 |     key_to_name.insert("GenuineIntel-6-3D", ("Broadwell", "2014", "8"));
328 |     key_to_name.insert("GenuineIntel-6-56", ("BroadwellDE", "2015", "8"));
329 |     key_to_name.insert("GenuineIntel-6-4E", ("Skylake", "2015", "8"));
330 |     key_to_name.insert("GenuineIntel-6-4F", ("BroadwellX", "2016", "8"));
331 |     key_to_name.insert("GenuineIntel-6-5C", ("Goldmont", "2016", "8"));
332 |     key_to_name.insert("GenuineIntel-6-57", ("KnightsLanding", "2016", "4"));
333 |     key_to_name.insert("GenuineIntel-6-55", ("SkylakeX", "2017", "8"));
334 | 
335 |     let mut csv_result_file = output_path.to_path_buf();
336 |     csv_result_file.push("events.csv");
337 |     save_event_counts(&key_to_name, csv_result_file.as_path());
338 | 
339 |     let mut csv_result_file = output_path.to_path_buf();
340 |     csv_result_file.push("architecture_comparison.csv");
341 | 
342 |     save_architecture_comparison(&key_to_name, csv_result_file.as_path());
343 |     save_edit_distances(&key_to_name, output_path);
344 |     save_event_descriptions(output_path);
345 | }
346 | 


--------------------------------------------------------------------------------
/src/util.rs:
--------------------------------------------------------------------------------
  1 | #![allow(unused)]
  2 | 
  3 | use csv;
  4 | use itertools::*;
  5 | use log::error as lerror;
  6 | use log::*;
  7 | use nom::*;
  8 | use std::fs;
  9 | use std::fs::File;
 10 | use std::io;
 11 | use std::io::prelude::*;
 12 | use std::path::Path;
 13 | use std::path::PathBuf;
 14 | use std::process::{Command, Output};
 15 | use std::str::{from_utf8_unchecked, FromStr};
 16 | use x86::cpuid;
 17 | 
 18 | pub type Node = u64;
 19 | pub type Socket = u64;
 20 | pub type Core = u64;
 21 | pub type Cpu = u64;
 22 | pub type L1 = u64;
 23 | pub type L2 = u64;
 24 | pub type L3 = u64;
 25 | pub type Online = u64;
 26 | pub type MHz = u64;
 27 | 
 28 | pub fn mkdir(out_dir: &Path) {
 29 |     if !out_dir.exists() {
 30 |         fs::create_dir(out_dir).expect("Can't create directory");
 31 |     }
 32 | }
 33 | 
 34 | fn to_string(s: &[u8]) -> &str {
 35 |     unsafe { from_utf8_unchecked(s) }
 36 | }
 37 | 
 38 | fn to_u64(s: &str) -> u64 {
 39 |     FromStr::from_str(s).unwrap()
 40 | }
 41 | 
 42 | fn buf_to_u64(s: &[u8]) -> u64 {
 43 |     to_u64(to_string(s))
 44 | }
 45 | 
 46 | named!(parse_numactl_size<&[u8], NodeInfo>,
 47 |     chain!(
 48 |         tag!("node") ~
 49 |         take_while!(is_space) ~
 50 |         node: take_while!(is_digit) ~
 51 |         take_while!(is_space) ~
 52 |         tag!("size:") ~
 53 |         take_while!(is_space) ~
 54 |         size: take_while!(is_digit) ~
 55 |         take_while!(is_space) ~
 56 |         tag!("MB"),
 57 |         || NodeInfo { node: buf_to_u64(node), memory: buf_to_u64(size) * 1000000 }
 58 |     )
 59 | );
 60 | 
 61 | fn get_node_info(node: Node, numactl_output: &String) -> Option<NodeInfo> {
 62 |     let find_prefix = format!("node {} size:", node);
 63 |     for line in numactl_output.split('\n') {
 64 |         if line.starts_with(find_prefix.as_str()) {
 65 |             let res = parse_numactl_size(line.as_bytes());
 66 |             return Some(res.unwrap().1);
 67 |         }
 68 |     }
 69 | 
 70 |     None
 71 | }
 72 | 
 73 | #[derive(Debug, Eq, PartialEq, RustcEncodable)]
 74 | pub struct CpuInfo {
 75 |     pub node: NodeInfo,
 76 |     pub socket: Socket,
 77 |     pub core: Core,
 78 |     pub cpu: Cpu,
 79 |     pub l1: L1,
 80 |     pub l2: L2,
 81 |     pub l3: L3,
 82 | }
 83 | 
 84 | impl CpuInfo {
 85 |     pub fn cbox(&self, mt: &MachineTopology) -> String {
 86 |         let cbox = self.core % mt.cores_on_socket(self.socket).len() as u64;
 87 |         format!("uncore_cbox_{}", cbox)
 88 |     }
 89 | }
 90 | 
 91 | #[derive(Debug, Eq, PartialEq, Ord, PartialOrd, Copy, Clone, RustcEncodable)]
 92 | pub struct NodeInfo {
 93 |     pub node: Node,
 94 |     pub memory: u64,
 95 | }
 96 | 
 97 | #[derive(Debug)]
 98 | pub struct MachineTopology {
 99 |     data: Vec<CpuInfo>,
100 | }
101 | 
102 | fn save_file(
103 |     cmd: &'static str,
104 |     output_path: &Path,
105 |     file: &'static str,
106 |     out: Output,
107 | ) -> io::Result<String> {
108 |     if out.status.success() {
109 |         // Save to result directory:
110 |         let mut out_file: PathBuf = output_path.to_path_buf();
111 |         out_file.push(file);
112 |         let mut f = File::create(out_file.as_path())?;
113 |         let content = String::from_utf8(out.stdout).unwrap_or(String::new());
114 |         f.write(content.as_bytes())?;
115 |         Ok(content)
116 |     } else {
117 |         lerror!(
118 |             "{} command: got unknown exit status was: {}",
119 |             cmd,
120 |             out.status
121 |         );
122 |         debug!(
123 |             "stderr:\n{}",
124 |             String::from_utf8(out.stderr).unwrap_or("Can't parse output".to_string())
125 |         );
126 |         unreachable!()
127 |     }
128 | }
129 | 
130 | pub fn save_lstopo(output_path: &Path) -> io::Result<String> {
131 |     let out = Command::new("lstopo")
132 |         .arg("--of console")
133 |         .arg("--taskset")
134 |         .output()?;
135 |     save_file("lstopo", output_path, "lstopo.txt", out)
136 | }
137 | 
138 | pub fn save_cpuid(output_path: &Path) -> io::Result<String> {
139 |     let out = Command::new("cpuid").output()?;
140 |     save_file("cpuid", output_path, "cpuid.txt", out)
141 | }
142 | 
143 | pub fn save_likwid_topology(output_path: &Path) -> io::Result<String> {
144 |     let out = Command::new("likwid-topology")
145 |         .arg("-g")
146 |         .arg("-c")
147 |         .output()?;
148 |     save_file("likwid-topology", output_path, "likwid_topology.txt", out)
149 | }
150 | 
151 | pub fn save_numa_topology(output_path: &Path) -> io::Result<String> {
152 |     let out = Command::new("numactl").arg("--hardware").output()?;
153 |     save_file("numactl", output_path, "numactl.dat", out)
154 | }
155 | 
156 | pub fn save_cpu_topology(output_path: &Path) -> io::Result<String> {
157 |     let out = Command::new("lscpu")
158 |         .arg("--parse=NODE,SOCKET,CORE,CPU,CACHE")
159 |         .output()?;
160 |     save_file("lscpu", output_path, "lscpu.csv", out)
161 | }
162 | 
163 | impl MachineTopology {
164 |     pub fn new() -> MachineTopology {
165 |         let lscpu_out = Command::new("lscpu")
166 |             .arg("--parse=NODE,SOCKET,CORE,CPU,CACHE")
167 |             .output()
168 |             .unwrap();
169 |         let lscpu_string = String::from_utf8(lscpu_out.stdout).unwrap_or(String::new());
170 | 
171 |         let numactl_out = Command::new("numactl").arg("--hardware").output().unwrap();
172 |         let numactl_string = String::from_utf8(numactl_out.stdout).unwrap_or(String::new());
173 | 
174 |         MachineTopology::from_strings(lscpu_string, numactl_string)
175 |     }
176 | 
177 |     pub fn from_files(lcpu_path: &Path, numactl_path: &Path) -> MachineTopology {
178 |         let mut file = File::open(lcpu_path).expect("lscpu.csv file does not exist?");
179 |         let mut lscpu_string = String::new();
180 |         let _ = file.read_to_string(&mut lscpu_string).unwrap();
181 | 
182 |         let mut file = File::open(numactl_path).expect("numactl.dat file does not exist?");
183 |         let mut numactl_string = String::new();
184 |         let _ = file.read_to_string(&mut numactl_string).unwrap();
185 | 
186 |         MachineTopology::from_strings(lscpu_string, numactl_string)
187 |     }
188 | 
189 |     pub fn from_strings(lscpu_output: String, numactl_output: String) -> MachineTopology {
190 |         let no_comments: Vec<&str> = lscpu_output
191 |             .split('\n')
192 |             .filter(|s| s.trim().len() > 0 && !s.trim().starts_with("#"))
193 |             .collect();
194 | 
195 |         type Row = (Node, Socket, Core, Cpu, String); // Online MHz
196 |         let mut rdr = csv::Reader::from_string(no_comments.join("\n")).has_headers(false);
197 |         let rows = rdr.decode().collect::<csv::Result<Vec<Row>>>().unwrap();
198 | 
199 |         let mut data: Vec<CpuInfo> = Vec::with_capacity(rows.len());
200 |         for row in rows {
201 |             let caches: Vec<u64> = row
202 |                 .4
203 |                 .split(":")
204 |                 .map(|s| u64::from_str(s).unwrap())
205 |                 .collect();
206 |             assert_eq!(caches.len(), 4);
207 |             let node: NodeInfo =
208 |                 get_node_info(row.0, &numactl_output).expect("Can't find node in numactl output?");
209 |             let tuple: CpuInfo = CpuInfo {
210 |                 node: node,
211 |                 socket: row.1,
212 |                 core: row.2,
213 |                 cpu: row.3,
214 |                 l1: caches[0],
215 |                 l2: caches[2],
216 |                 l3: caches[3],
217 |             };
218 |             data.push(tuple);
219 |         }
220 | 
221 |         MachineTopology { data: data }
222 |     }
223 | 
224 |     pub fn cpus(&self) -> Vec<Cpu> {
225 |         let mut cpus: Vec<Cpu> = self.data.iter().map(|t| t.cpu).collect();
226 |         cpus.sort();
227 |         cpus.dedup();
228 |         cpus
229 |     }
230 | 
231 |     pub fn cpu(&self, cpu: Cpu) -> Option<&CpuInfo> {
232 |         self.data.iter().find(|t| t.cpu == cpu)
233 |     }
234 | 
235 |     pub fn cores(&self) -> Vec<Core> {
236 |         let mut cores: Vec<Core> = self.data.iter().map(|t| t.core).collect();
237 |         cores.sort();
238 |         cores.dedup();
239 |         cores
240 |     }
241 | 
242 |     pub fn sockets(&self) -> Vec<Socket> {
243 |         let mut sockets: Vec<Cpu> = self.data.iter().map(|t| t.socket).collect();
244 |         sockets.sort();
245 |         sockets.dedup();
246 |         sockets
247 |     }
248 | 
249 |     pub fn nodes(&self) -> Vec<NodeInfo> {
250 |         let mut nodes: Vec<NodeInfo> = self.data.iter().map(|t| t.node).collect();
251 |         nodes.sort();
252 |         nodes.dedup();
253 |         nodes
254 |     }
255 | 
256 |     pub fn max_memory(&self) -> u64 {
257 |         self.nodes().iter().map(|t| t.memory).sum()
258 |     }
259 | 
260 |     pub fn l1(&self) -> Vec<L1> {
261 |         let mut l1: Vec<L1> = self.data.iter().map(|t| t.l1).collect();
262 |         l1.sort();
263 |         l1.dedup();
264 |         l1
265 |     }
266 | 
267 |     pub fn l1_size(&self) -> Option<u64> {
268 |         let cpuid = cpuid::CpuId::new();
269 |         cpuid.get_cache_parameters().map(|mut cparams| {
270 |             let cache = cparams
271 |                 .find(|c| c.level() == 1 && c.cache_type() == cpuid::CacheType::Data)
272 |                 .unwrap();
273 |             (cache.associativity()
274 |                 * cache.physical_line_partitions()
275 |                 * cache.coherency_line_size()
276 |                 * cache.sets()) as u64
277 |         })
278 |     }
279 | 
280 |     pub fn l2(&self) -> Vec<L2> {
281 |         let mut l2: Vec<L2> = self.data.iter().map(|t| t.l2).collect();
282 |         l2.sort();
283 |         l2.dedup();
284 |         l2
285 |     }
286 | 
287 |     pub fn l2_size(&self) -> Option<u64> {
288 |         let cpuid = cpuid::CpuId::new();
289 |         cpuid.get_cache_parameters().map(|mut cparams| {
290 |             let cache = cparams
291 |                 .find(|c| c.level() == 2 && c.cache_type() == cpuid::CacheType::Unified)
292 |                 .unwrap();
293 |             (cache.associativity()
294 |                 * cache.physical_line_partitions()
295 |                 * cache.coherency_line_size()
296 |                 * cache.sets()) as u64
297 |         })
298 |     }
299 | 
300 |     pub fn l3(&self) -> Vec<L3> {
301 |         let mut l3: Vec<L3> = self.data.iter().map(|t| t.l3).collect();
302 |         l3.sort();
303 |         l3.dedup();
304 |         l3
305 |     }
306 | 
307 |     pub fn l3_size(&self) -> Option<u64> {
308 |         let cpuid = cpuid::CpuId::new();
309 |         cpuid.get_cache_parameters().map(|mut cparams| {
310 |             let cache = cparams
311 |                 .find(|c| c.level() == 3 && c.cache_type() == cpuid::CacheType::Unified)
312 |                 .unwrap();
313 |             (cache.associativity()
314 |                 * cache.physical_line_partitions()
315 |                 * cache.coherency_line_size()
316 |                 * cache.sets()) as u64
317 |         })
318 |     }
319 | 
320 |     pub fn cpus_on_node(&self, node: NodeInfo) -> Vec<&CpuInfo> {
321 |         self.data.iter().filter(|t| t.node == node).collect()
322 |     }
323 | 
324 |     pub fn cpus_on_l1(&self, l1: L1) -> Vec<&CpuInfo> {
325 |         self.data.iter().filter(|t| t.l1 == l1).collect()
326 |     }
327 | 
328 |     pub fn cpus_on_l2(&self, l2: L2) -> Vec<&CpuInfo> {
329 |         self.data.iter().filter(|t| t.l2 == l2).collect()
330 |     }
331 | 
332 |     pub fn cpus_on_l3(&self, l3: L3) -> Vec<&CpuInfo> {
333 |         self.data.iter().filter(|t| t.l3 == l3).collect()
334 |     }
335 | 
336 |     pub fn cpus_on_core(&self, core: Core) -> Vec<&CpuInfo> {
337 |         self.data.iter().filter(|t| t.core == core).collect()
338 |     }
339 | 
340 |     pub fn cpus_on_socket(&self, socket: Socket) -> Vec<&CpuInfo> {
341 |         self.data.iter().filter(|t| t.socket == socket).collect()
342 |     }
343 | 
344 |     fn cores_on_socket(&self, socket: Socket) -> Vec<Core> {
345 |         let mut cores: Vec<Core> = self
346 |             .data
347 |             .iter()
348 |             .filter(|c| c.socket == socket)
349 |             .map(|c| c.core)
350 |             .collect();
351 |         cores.sort();
352 |         cores.dedup();
353 |         cores
354 |     }
355 | 
356 |     fn cores_on_l3(&self, l3: L3) -> Vec<&CpuInfo> {
357 |         let mut cpus: Vec<&CpuInfo> = self.data.iter().filter(|t| t.l3 == l3).collect();
358 |         cpus.sort_by_key(|c| c.core);
359 |         // TODO: implicit assumption that we have two HTs
360 |         cpus.into_iter().step(2).collect()
361 |     }
362 | 
363 |     pub fn same_socket(&self) -> Vec<Vec<&CpuInfo>> {
364 |         self.sockets()
365 |             .into_iter()
366 |             .map(|s| self.cpus_on_socket(s))
367 |             .collect()
368 |     }
369 | 
370 |     pub fn same_core(&self) -> Vec<Vec<&CpuInfo>> {
371 |         self.cores()
372 |             .into_iter()
373 |             .map(|c| self.cpus_on_core(c))
374 |             .collect()
375 |     }
376 | 
377 |     pub fn same_node(&self) -> Vec<Vec<&CpuInfo>> {
378 |         self.nodes()
379 |             .into_iter()
380 |             .map(|c| self.cpus_on_node(c))
381 |             .collect()
382 |     }
383 | 
384 |     pub fn same_l1(&self) -> Vec<Vec<&CpuInfo>> {
385 |         self.l1().into_iter().map(|c| self.cpus_on_l1(c)).collect()
386 |     }
387 | 
388 |     pub fn same_l2(&self) -> Vec<Vec<&CpuInfo>> {
389 |         self.l2().into_iter().map(|c| self.cpus_on_l2(c)).collect()
390 |     }
391 | 
392 |     pub fn same_l3(&self) -> Vec<Vec<&CpuInfo>> {
393 |         self.l3().into_iter().map(|c| self.cpus_on_l3(c)).collect()
394 |     }
395 | 
396 |     pub fn same_l3_cores(&self) -> Vec<Vec<&CpuInfo>> {
397 |         self.l3()
398 |             .into_iter()
399 |             .map(|l3| self.cores_on_l3(l3))
400 |             .collect()
401 |     }
402 | 
403 |     pub fn whole_machine(&self) -> Vec<Vec<&CpuInfo>> {
404 |         vec![self.data.iter().collect()]
405 |     }
406 | 
407 |     pub fn whole_machine_cores(&self) -> Vec<Vec<&CpuInfo>> {
408 |         let mut cpus: Vec<&CpuInfo> = self.data.iter().collect();
409 |         cpus.sort_by_key(|c| c.core);
410 |         // TODO: implicit assumption that we have two HTs
411 |         vec![cpus.into_iter().step(2).collect()]
412 |     }
413 | }
414 | 
415 | // TODO: Should ideally be generic:
416 | pub fn socket_uncore_devices() -> Vec<&'static str> {
417 |     vec![
418 |         "uncore_ha_0",
419 |         "uncore_imc_0",
420 |         "uncore_imc_1",
421 |         "uncore_imc_2",
422 |         "uncore_imc_3",
423 |         "uncore_pcu",
424 |         "uncore_r2pcie",
425 |         "uncore_r3qpi_0",
426 |         "uncore_r3qpi_1",
427 |         "uncore_ubox",
428 |     ]
429 | }
430 | 


--------------------------------------------------------------------------------
/tests/pair/manifest.toml:
--------------------------------------------------------------------------------
 1 | [experiment]
 2 | # configurations: [e \in { "L1-SMT", "L3-SMT", "L3-SMT-cores", "L3-cores", "Full-L3", "Full-SMT-L3", "Full-cores", "Full-SMT-cores" }] 
 3 | # Specifies a series of different affinity mappings for the programs
 4 | configurations = ["L3-SMT", "L3-SMT-cores"]
 5 | # alone: bool 
 6 | # Also run and profile programs in isolation
 7 | alone = true
 8 | # profile_only_a = ["prog_name1", "prog_name2", ...]
 9 | # Limit protagonist programs (programs being profiled) to the specified subset
10 | profile_only_a = ["echo", "p2"]
11 | # profile_only_b = ["prog_name1", "prog_name2", ...]
12 | # Limit antagonist programs to the specified subset
13 | profile_only_b = ["echo", "p2"]
14 | 
15 | [program1]
16 | # name: String
17 | # Program name
18 | name = "echo"
19 | # binary: String
20 | # Program binary path to invoke
21 | binary = "echo"
22 | # default_working_dir: String
23 | # Sets the working dir of program to the following path
24 | default_working_dir = "/tmp"
25 | # arguments: [String]
26 | # Passes the following arguments to the program
27 | arguments = ["a", "b", "c"]
28 | # antagonist_arguments: [String]
29 | # When run as an antagonist, use the following arguments instead
30 | antagonist_arguments = ["a", "b", "c"]
31 | # breakpoints: [String, String]
32 | # Set the following two breakpoints with perf (used to identify critical sections for measurements)
33 | breakpoints = ["0x123", "0x123"]
34 | # env: { String -> String }
35 | # Augment the program environment with the following key--value pairs
36 | env = { KEY = "value" }
37 | # use_watch_repeat: bool
38 | # When run as an antagonist use watch to repeatedly run program (while protagonist is running)
39 | use_watch_repeat = false
40 | # alone: bool
41 | # Don't run this program alone
42 | alone = false
43 | 
44 | #The following features are depreacated and will be removed:
45 | # openmp = false
46 | # parsec = false
47 | # checkpoints = ["0x123", "0x123"]
48 | 


--------------------------------------------------------------------------------
/tests/test_readme.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/bash
 2 | # We run all commands of the README.md file and hope it works
 3 | set -ex
 4 | export RUST_BACKTRACE=1
 5 | export RUST_LOG='autoperf=trace'
 6 | 
 7 | sudo apt-get update
 8 | sudo apt-get install likwid cpuid hwloc numactl util-linux
 9 | 
10 | curl https://sh.rustup.rs -sSf | sh -s -- -y --default-toolchain nightly
11 | source $HOME/.cargo/env
12 | 
13 | git clone https://github.com/gz/autoperf.git
14 | 
15 | cd autoperf
16 | cargo build --release
17 | ./target/release/autoperf --help
18 | 
19 | ls /sys/bus/event_source/devices/
20 | sudo sh -c 'echo 0 >> /proc/sys/kernel/kptr_restrict'
21 | sudo sh -c 'echo 0 > /proc/sys/kernel/nmi_watchdog' || true # This fails on travis!
22 | sudo sh -c 'echo -1 > /proc/sys/kernel/perf_event_paranoid'
23 | 
24 | cargo run --release -- stats stats_out
25 | cargo run --release -- profile -d echo test
26 | 
27 | mkdir pairings
28 | cat <<EOT >> pairings/manifest.toml
29 | [experiment]
30 | configurations = ["L3-SMT", "L3-SMT-cores"]
31 | 
32 | [programA]
33 | name = "gcc"
34 | binary = "gcc"
35 | arguments = ["-j", "4", "out.c", "-o", "out"]
36 | 
37 | [programB]
38 | name = "objdump"
39 | binary = "objdump"
40 | arguments = ["--disassemble", "/bin/true"]
41 | 
42 | [programC]
43 | name = "cat"
44 | binary = "cat"
45 | arguments = ["/var/log/messages"]
46 | env = { LC_ALL = "C" }
47 | use_watch_repeat = true
48 | EOT
49 | cargo run --release -- pair -d ./pairings
50 | 
51 | 


--------------------------------------------------------------------------------