├── .gitignore ├── src ├── parse │ ├── mod.rs │ ├── callgrind.rs │ └── cachegrind.rs ├── profiler.rs ├── argparse.rs ├── cargo.rs ├── main.rs ├── display.rs └── err.rs ├── .github └── dependabot.yml ├── .travis.yml ├── Cargo.toml ├── CHANGELOG.md ├── LICENSE ├── README.md └── Cargo.lock /.gitignore: -------------------------------------------------------------------------------- 1 | target/ 2 | target/* 3 | -------------------------------------------------------------------------------- /src/parse/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod cachegrind; 2 | pub mod callgrind; 3 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: cargo 4 | directory: "/" 5 | schedule: 6 | interval: daily 7 | time: "04:00" 8 | open-pull-requests-limit: 10 9 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: rust 2 | 3 | sudo: false 4 | 5 | rust: 6 | - nightly 7 | 8 | script: 9 | - | 10 | cargo build 11 | 12 | notifications: 13 | email: 14 | - sgrangan01@gmail.com 15 | 16 | branches: 17 | only: master 18 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "cargo-profiler" 3 | version = "0.2.0" 4 | authors = ["pegasos1", "Sven-Hendrik Haase "] 5 | description = "Cargo subcommand to profile your applications." 6 | homepage = "http://github.com/svenstaro/cargo-profiler" 7 | repository = "http://github.com/svenstaro/cargo-profiler" 8 | license = "MIT" 9 | keywords = ["cargo", "profiler", "subcommand", "valgrind", "performance"] 10 | edition = "2018" 11 | 12 | [dependencies] 13 | clap = "2.5.1" 14 | regex = "1" 15 | ndarray = "0.13" 16 | lazy_static = "1" 17 | serde = { version = "1", features = ["derive"] } 18 | serde_json = "1" 19 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | ## 0.2.0 (unreleased) 2 | - Crate transferred to [@svenstaro](https://github.com/svenstaro) 3 | - Upgraded crate to Rust 2018 4 | - Modernized all dependencies 5 | - Fixed a ton of clippy lints 6 | 7 | ## 0.1.6 8 | - Arguments to binary can be supplied. 9 | 10 | ## 0.1.5 11 | - Bug fix 12 | 13 | ## 0.1.4 14 | - Detects invocation outside of rust project. 15 | - Propagates valgrind memory error up to cargo profiler. 16 | - Moves process exit/print error function to main.rs so we can create unit tests for underlying functions. 17 | - This is achieved by a `try_or_exit` macro in main.rs. 18 | - Generally better error handling with result/option combinators (e.g. and_then, ok_or, etc.). 19 | - Unit tests initialized in each submodule. 20 | 21 | ## 0.1.3 22 | - cargo better integrated. 23 | - No longer have to specify binary if in rust project w/ cargo.toml. 24 | - better error messages and exits (e.g. upon compilation errors). 25 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Suchin and Sven-Hendrik Haase 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /src/profiler.rs: -------------------------------------------------------------------------------- 1 | use ndarray::Array2; 2 | use std::f64; 3 | 4 | // Profiler enum. We have two profilers: CacheGrind and CallGrind. 5 | pub enum Profiler { 6 | // CachGrind holds the parsed objects of 7 | // `valgrind --tool=cachegrind -cachegrind-out-file=cachegrind.out 8 | // && cg_annotate cachegrind.out` 9 | CacheGrind { 10 | ir: f64, 11 | i1mr: f64, 12 | ilmr: f64, 13 | dr: f64, 14 | d1mr: f64, 15 | dlmr: f64, 16 | dw: f64, 17 | d1mw: f64, 18 | dlmw: f64, 19 | data: Array2, 20 | functs: Vec, 21 | }, 22 | 23 | // Call holds the parsed objects of 24 | // `valgrind --tool=callgrind --callgrind-out-file=callgrind.out 25 | // && callgrind_annotate callgrind.out` 26 | CallGrind { 27 | total_instructions: f64, 28 | instructions: Vec, 29 | functs: Vec, 30 | }, 31 | } 32 | 33 | // Initialize the Profilers 34 | impl Profiler { 35 | // Initialize CacheGrind 36 | 37 | pub fn new_cachegrind() -> Profiler { 38 | Profiler::CacheGrind { 39 | // total instructions 40 | ir: f64::NAN, 41 | // total instruction-cache read misses 42 | i1mr: f64::NAN, 43 | // total LL-cache read misses 44 | ilmr: f64::NAN, 45 | // total reads 46 | dr: f64::NAN, 47 | // total data-cache read misses 48 | d1mr: f64::NAN, 49 | // total LL-cache read misses 50 | dlmr: f64::NAN, 51 | // total data-cache writes 52 | dw: f64::NAN, 53 | // total data-cache write-misses 54 | d1mw: f64::NAN, 55 | // total LL cache write misses 56 | dlmw: f64::NAN, 57 | // profiler data 58 | data: Array2::zeros((2, 2)), 59 | // profiled functions in binary 60 | functs: Vec::new(), 61 | } 62 | } 63 | // Initialize CallGrind 64 | pub fn new_callgrind() -> Profiler { 65 | Profiler::CallGrind { 66 | // total instruction calls 67 | total_instructions: f64::NAN, 68 | // instruction data 69 | instructions: Vec::new(), 70 | // profiled functions in binary 71 | functs: Vec::new(), 72 | } 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /src/argparse.rs: -------------------------------------------------------------------------------- 1 | use crate::err::ProfError; 2 | use crate::parse::cachegrind::Metric; 3 | use crate::profiler::Profiler; 4 | use clap::ArgMatches; 5 | use std::path::Path; 6 | 7 | /// match the profiler argument 8 | pub fn get_profiler<'a>( 9 | matches: &'a ArgMatches, 10 | ) -> Result<(&'a ArgMatches<'a>, Profiler), ProfError> { 11 | match matches.subcommand_matches("profiler") { 12 | Some(matches) => match matches.subcommand_matches("callgrind") { 13 | Some(matches) => Ok((matches, Profiler::new_callgrind())), 14 | None => match matches.subcommand_matches("cachegrind") { 15 | Some(matches) => Ok((matches, Profiler::new_cachegrind())), 16 | None => Err(ProfError::InvalidProfiler), 17 | }, 18 | }, 19 | None => Err(ProfError::InvalidProfiler), 20 | } 21 | } 22 | 23 | /// match the binary argument 24 | pub fn get_binary<'a>(matches: &'a ArgMatches) -> Result<&'a str, ProfError> { 25 | // read binary argument, make sure it exists in the filesystem 26 | match matches.value_of("binary") { 27 | Some(z) => { 28 | if !Path::new(z).exists() { 29 | return Err(ProfError::InvalidBinary); 30 | } 31 | Ok(z) 32 | } 33 | None => Err(ProfError::InvalidBinary), 34 | } 35 | } 36 | 37 | /// parse the number argument into a usize 38 | pub fn get_num(matches: &ArgMatches) -> Result { 39 | match matches.value_of("n").map(|x| x.parse::()) { 40 | Some(Ok(z)) => Ok(z), 41 | Some(Err(_)) => Err(ProfError::InvalidNum), 42 | None => Ok(10000), // some arbitrarily large number... 43 | } 44 | } 45 | 46 | /// get the cachegrind metric user wants to sort on 47 | pub fn get_sort_metric(matches: &ArgMatches) -> Result { 48 | match matches.value_of("sort") { 49 | Some("ir") => Ok(Metric::Ir), 50 | Some("i1mr") => Ok(Metric::I1mr), 51 | Some("ilmr") => Ok(Metric::ILmr), 52 | Some("dr") => Ok(Metric::Dr), 53 | Some("d1mr") => Ok(Metric::D1mr), 54 | Some("dlmr") => Ok(Metric::DLmr), 55 | Some("dw") => Ok(Metric::Dw), 56 | Some("d1mw") => Ok(Metric::D1mw), 57 | Some("dlmw") => Ok(Metric::DLmw), 58 | None => Ok(Metric::NAN), 59 | _ => Err(ProfError::InvalidSortMetric), 60 | } 61 | } 62 | 63 | #[cfg(test)] 64 | mod test { 65 | #[test] 66 | fn test_get_profiler() { 67 | assert_eq!(1, 1); 68 | } 69 | 70 | #[test] 71 | fn test_get_binary() { 72 | assert_eq!(1, 1); 73 | assert_eq!(1, 1); 74 | } 75 | 76 | #[test] 77 | fn test_get_num() { 78 | assert_eq!(1, 1); 79 | } 80 | 81 | #[test] 82 | fn test_get_sort_metric() { 83 | assert_eq!(1, 1); 84 | } 85 | } 86 | -------------------------------------------------------------------------------- /src/cargo.rs: -------------------------------------------------------------------------------- 1 | use crate::err::ProfError; 2 | use serde_json::Value; 3 | use std::env; 4 | use std::fs; 5 | use std::path::Path; 6 | use std::path::PathBuf; 7 | use std::process::Command; 8 | 9 | /// Returns the closest ancestor path containing a `target` directory. 10 | /// 11 | /// Returns `None` if no ancestor path contains a `target` directory, or if 12 | /// the limit of MAX_ANCESTORS ancestors has been reached. 13 | pub fn find_target() -> Option { 14 | /// Checks if the directory contains `Cargo.toml` 15 | fn contains_manifest(path: &PathBuf) -> bool { 16 | fs::read_dir(path) 17 | .map(|entries| { 18 | entries 19 | .filter_map(|res| res.ok()) 20 | .any(|ent| ent.path().ends_with("target")) 21 | }) 22 | .unwrap_or(false) 23 | } 24 | 25 | // From the current directory we work our way up, looking for `Cargo.toml` 26 | env::current_dir().ok().and_then(|mut wd| { 27 | for _ in 0..10 { 28 | if contains_manifest(&wd) { 29 | return Some(wd); 30 | } 31 | if !wd.pop() { 32 | break; 33 | } 34 | } 35 | 36 | None 37 | }) 38 | } 39 | 40 | // returns the name of the package parsed from Cargo.toml 41 | // this will only work if the package name is directly underneath [package] tag 42 | pub fn get_package_name() -> Result { 43 | let manifest = Command::new("cargo") 44 | .arg("read-manifest") 45 | .output() 46 | .unwrap_or_else(|e| panic!("failed to execute process: {}", e)); 47 | 48 | let out = String::from_utf8(manifest.stdout).unwrap_or_default(); 49 | let data: Value = serde_json::from_str(&out).or(Err(ProfError::ReadManifestError))?; 50 | 51 | data.as_object() 52 | .expect("Could not extract object from read manifest JSON. Please submit bug.") 53 | .get("name") 54 | .ok_or(ProfError::NoNameError) 55 | .and_then(|x| Ok(x.to_string().replace("\"", ""))) 56 | } 57 | 58 | // build the binary by calling cargo build 59 | // return the path to the built binary 60 | pub fn build_binary(release: bool) -> Result { 61 | let package_name = get_package_name()?; 62 | 63 | let (out, binary_dir) = if release { 64 | println!( 65 | "\n\x1b[1;33mCompiling \x1b[1;0m{} in release mode...", 66 | package_name 67 | ); 68 | 69 | ( 70 | Command::new("cargo") 71 | .args(&["build", "--release"]) 72 | .output() 73 | .unwrap_or_else(|e| panic!("failed to execute process: {}", e)), 74 | "/target/release/", 75 | ) 76 | } else { 77 | println!( 78 | "\n\x1b[1;33mCompiling \x1b[1;0m{} in debug mode...", 79 | package_name 80 | ); 81 | 82 | ( 83 | Command::new("cargo") 84 | .arg("build") 85 | .output() 86 | .unwrap_or_else(|e| panic!("failed to execute process: {}", e)), 87 | "/target/debug/", 88 | ) 89 | }; 90 | 91 | let target_dir = find_target() 92 | .ok_or(ProfError::NoTargetDirectory) 93 | .and_then(|x| { 94 | Ok(x.to_str() 95 | .expect("target directory could not be converted to string.") 96 | .to_string()) 97 | }); 98 | let path = target_dir 99 | .and_then(|x| Ok(x + binary_dir + &package_name)) 100 | .unwrap_or_default(); 101 | 102 | if !Path::new(&path).exists() { 103 | return Err(ProfError::CompilationError( 104 | package_name.to_string(), 105 | String::from_utf8(out.stderr).unwrap_or_default(), 106 | )); 107 | } 108 | Ok(path) 109 | } 110 | 111 | #[cfg(test)] 112 | mod test { 113 | #[test] 114 | fn test_find_target() { 115 | assert_eq!(1, 1); 116 | } 117 | 118 | #[test] 119 | fn test_get_package_name() { 120 | assert_eq!(1, 1); 121 | assert_eq!(1, 1); 122 | } 123 | 124 | #[test] 125 | fn test_build_binary() { 126 | assert_eq!(1, 1); 127 | } 128 | } 129 | -------------------------------------------------------------------------------- /src/parse/callgrind.rs: -------------------------------------------------------------------------------- 1 | use crate::err::ProfError; 2 | use crate::profiler::Profiler; 3 | use lazy_static::lazy_static; 4 | use regex::Regex; 5 | use std::ffi::OsStr; 6 | use std::process::Command; 7 | 8 | // Parser trait. To parse the output of Profilers, we first have to get their output from 9 | // the command line, and then parse the output into respective structs. 10 | pub trait CallGrindParser { 11 | fn callgrind_cli(&self, binary: &str, binargs: &[&OsStr]) -> Result; 12 | fn callgrind_parse<'b>(&'b self, output: &'b str, num: usize) -> Result; 13 | } 14 | 15 | impl CallGrindParser for Profiler { 16 | // Get profiler output from stdout. 17 | fn callgrind_cli(&self, binary: &str, binargs: &[&OsStr]) -> Result { 18 | // get callgrind cli output from stdout 19 | Command::new("valgrind") 20 | .arg("--tool=callgrind") 21 | .arg("--callgrind-out-file=callgrind.out") 22 | .arg(binary) 23 | .args(binargs) 24 | .output() 25 | .unwrap_or_else(|e| panic!("failed to execute process: {}", e)); 26 | 27 | let cachegrind_output = Command::new("callgrind_annotate") 28 | .arg("callgrind.out") 29 | .arg(binary) 30 | .output() 31 | .unwrap_or_else(|e| panic!("failed to execute process: {}", e)); 32 | 33 | Ok(String::from_utf8(cachegrind_output.stdout) 34 | .expect("error while returning cachegrind stdout")) 35 | } 36 | 37 | fn callgrind_parse<'b>(&'b self, output: &'b str, num: usize) -> Result { 38 | // split output line-by-line 39 | let mut out_split = output.split('\n').collect::>(); 40 | 41 | // regex identifies lines that start with digits and have characters that commonly 42 | // show up in file paths 43 | lazy_static! { 44 | static ref CALLGRIND_REGEX: Regex = 45 | Regex::new(r"\d+\s*[a-zA-Z]*$*_*:*/+\.*@*-*|\d+\s*[a-zA-Z]*$*_*\?+:*/*\.*-*@*-*") 46 | .unwrap(); 47 | static ref COMPILER_TRASH: Regex = Regex::new(r"\$\w{2}\$|\$\w{3}\$").unwrap(); 48 | static ref ERROR_REGEX: Regex = Regex::new(r"out of memory").unwrap(); 49 | } 50 | let errs = out_split 51 | .to_owned() 52 | .into_iter() 53 | .filter(|x| ERROR_REGEX.is_match(x)) 54 | .collect::>(); 55 | if !errs.is_empty() { 56 | return Err(ProfError::OutOfMemoryError); 57 | } 58 | 59 | out_split.retain(|x| CALLGRIND_REGEX.is_match(x)); 60 | 61 | let mut funcs: Vec = Vec::new(); 62 | let mut data_vec: Vec = Vec::new(); 63 | // loop through each line and get numbers + func 64 | for sample in out_split.iter() { 65 | // trim the sample, split by whitespace to separate out each data point 66 | // (numbers + func) 67 | let elems = sample.trim().split(" ").collect::>(); 68 | 69 | // for each number, remove any commas and parse into f64. the last element in 70 | // data_elems is the function file path. 71 | 72 | let data_row = match elems[0].trim().replace(",", "").parse::() { 73 | Ok(rep) => rep, 74 | Err(_) => return Err(ProfError::RegexError), 75 | }; 76 | 77 | data_vec.push(data_row); 78 | 79 | // the function has some trailing whitespace and trash. remove that, and 80 | // get the function, push to functs vector. 81 | let path = elems[1].split(' ').collect::>(); 82 | let cleaned_path = path[0].split('/').collect::>(); 83 | let func = cleaned_path[cleaned_path.len() - 1]; 84 | let mut func = COMPILER_TRASH.replace_all(func, ".."); 85 | let idx = func.rfind("::").unwrap_or_else(|| func.len()); 86 | func.to_mut().drain(idx..).collect::(); 87 | funcs.push(func.into_owned()) 88 | } 89 | 90 | // get the total instructions by summing the data vector. 91 | let total_instructions = data_vec.iter().fold(0.0, |a, b| a + b); 92 | 93 | // parse the limit argument n, and take the first n values of data/funcs vectors 94 | // accordingly. 95 | 96 | if num < data_vec.len() { 97 | data_vec = data_vec.iter().take(num).cloned().collect(); 98 | funcs = funcs.iter().take(num).cloned().collect(); 99 | } 100 | // put all data in cachegrind struct! 101 | Ok(Profiler::CallGrind { 102 | total_instructions, 103 | instructions: data_vec, 104 | functs: funcs, 105 | }) 106 | } 107 | } 108 | 109 | #[cfg(test)] 110 | mod test { 111 | use super::CallGrindParser; 112 | use crate::profiler::Profiler; 113 | #[test] 114 | fn test_callgrind_parse_1() { 115 | let output = "==6072== Valgrind's memory management: out of memory:\n ==6072== \ 116 | Whatever the reason, Valgrind cannot continue. Sorry."; 117 | let num = 10; 118 | let profiler = Profiler::new_callgrind(); 119 | let is_err = profiler.callgrind_parse(&output, num).is_err(); 120 | assert!(is_err && true) 121 | } 122 | 123 | #[test] 124 | fn test_callgrind_parse_2() { 125 | assert_eq!(1, 1); 126 | assert_eq!(1, 1); 127 | } 128 | 129 | #[test] 130 | fn test_callgrind_parse_3() { 131 | assert_eq!(1, 1); 132 | } 133 | } 134 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # cargo-profiler 2 | 3 | [![Build Status](https://travis-ci.org/svenstaro/cargo-profiler.svg?branch=master)](https://travis-ci.org/svenstaro/cargo-profiler) 4 | [![Crates.io](https://img.shields.io/crates/v/cargo-profiler.svg)](https://crates.io/crates/cargo-profiler) 5 | [![license](http://img.shields.io/badge/license-MIT-blue.svg)](https://github.com/svenstaro/cargo-profiler/blob/master/LICENSE) 6 | 7 | **Cargo subcommand to profile binaries** 8 | 9 | ## To install 10 | 11 | NOTE: This subcommand can only be used on Linux machines. 12 | 13 | First install valgrind: 14 | 15 | ``` 16 | $ sudo apt-get install valgrind 17 | ``` 18 | 19 | Then you can install `cargo-profiler` via `cargo install`. 20 | ``` 21 | $ cargo install cargo-profiler 22 | ``` 23 | 24 | Alternatively, you can clone this repo and build the binary from the source. 25 | 26 | ``` 27 | $ cargo build --release 28 | ``` 29 | 30 | Now, copy the built binary to the same directory as cargo. 31 | 32 | ``` 33 | $ sudo cp ./target/release/cargo-profiler $(dirname $(which cargo))/ 34 | ``` 35 | 36 | ## To run 37 | 38 | Cargo profiler currently supports callgrind and cachegrind. 39 | 40 | You can call cargo profiler anywhere in a rust project directory with a `Cargo.toml`. 41 | 42 | ``` 43 | $ cargo profiler callgrind 44 | $ cargo profiler cachegrind --release 45 | ``` 46 | 47 | You can also specify a binary directly: 48 | 49 | ``` 50 | $ cargo profiler callgrind --bin $PATH_TO_BINARY 51 | ``` 52 | 53 | To specify command line arguments to the executable being profiled, append them 54 | after a `--`: 55 | 56 | ``` 57 | $ cargo profiler callgrind --bin $PATH_TO_BINARY -- -a 3 --like this 58 | ``` 59 | 60 | You can chose to keep the callgrind/cachegrind output files using the `--keep` option 61 | 62 | ``` 63 | $ cargo profiler callgrind --keep 64 | ``` 65 | 66 | You can limit the number of functions you'd like to look at: 67 | 68 | ``` 69 | $ cargo profiler callgrind --bin ./target/debug/rsmat -n 10 70 | 71 | Profiling rsmat with callgrind... 72 | 73 | Total Instructions...198,466,456 74 | 75 | 78,346,775 (39.5%) dgemm_kernel.rs:matrixmultiply::gemm::masked_kernel 76 | ----------------------------------------------------------------------- 77 | 23,528,320 (11.9%) iter.rs:_..std..ops..Range..A....as..std..iter..Iterator..::next 78 | ----------------------------------------------------------------------- 79 | 16,824,925 (8.5%) loopmacros.rs:matrixmultiply::gemm::masked_kernel 80 | ----------------------------------------------------------------------- 81 | 10,236,864 (5.2%) mem.rs:core::mem::swap 82 | ----------------------------------------------------------------------- 83 | 7,712,846 (3.9%) memset.S:memset 84 | ----------------------------------------------------------------------- 85 | 7,197,344 (3.6%) ???:core::cmp::impls::_..impl..cmp..PartialOrd..for..usize..::lt 86 | ----------------------------------------------------------------------- 87 | 6,979,680 (3.5%) ops.rs:_..usize..as..ops..Add..::add 88 | ----------------------------------------------------------------------- 89 | 90 | ``` 91 | 92 | With cachegrind, you can also sort the data by a particular metric column: 93 | 94 | ``` 95 | $ cargo profiler cachegrind --bin ./target/debug/rsmat -n 10 --sort dr 96 | 97 | Profiling rsmat with cachegrind... 98 | 99 | Total Memory Accesses...320,385,356 100 | 101 | Total L1 I-Cache Misses...371 (0%) 102 | Total LL I-Cache Misses...308 (0%) 103 | Total L1 D-Cache Misses...58,549 (0%) 104 | Total LL D-Cache Misses...8,451 (0%) 105 | 106 | Ir I1mr ILmr Dr D1mr DLmr Dw D1mw DLmw 107 | 0.40 0.18 0.21 0.35 0.93 1.00 0.38 0.00 0.00 dgemm_kernel.rs:matrixmultiply::gemm::masked_kernel 108 | ----------------------------------------------------------------------- 109 | 0.08 0.04 0.05 0.12 0.00 0.00 0.02 0.00 0.00 loopmacros.rs:matrixmultiply::gemm::masked_kernel 110 | ----------------------------------------------------------------------- 111 | 0.12 0.02 0.02 0.10 0.00 0.00 0.15 0.00 0.00 iter.rs:_std..ops..RangeAasstd..iter..Iterator::next 112 | ----------------------------------------------------------------------- 113 | 0.05 0.01 0.01 0.07 0.00 0.00 0.08 0.00 0.00 mem.rs:core::mem::swap 114 | ----------------------------------------------------------------------- 115 | 0.03 0.00 0.00 0.05 0.00 0.00 0.00 0.00 0.00 ???:core::cmp::impls::_implcmp..PartialOrdforusize::lt 116 | ----------------------------------------------------------------------- 117 | 0.03 0.01 0.01 0.04 0.00 0.00 0.03 0.00 0.00 ops.rs:_busizeasops..Addausize::add 118 | ----------------------------------------------------------------------- 119 | 0.04 0.01 0.01 0.04 0.00 0.00 0.03 0.00 0.00 ptr.rs:core::ptr::_implconstT::offset 120 | ----------------------------------------------------------------------- 121 | 0.02 0.01 0.00 0.03 0.00 0.00 0.01 0.00 0.00 ???:_usizeasops..Add::add 122 | ----------------------------------------------------------------------- 123 | 0.01 0.01 0.01 0.02 0.00 0.00 0.01 0.00 0.00 mem.rs:core::mem::uninitialized 124 | ----------------------------------------------------------------------- 125 | 0.02 0.01 0.01 0.02 0.00 0.00 0.04 0.00 0.00 wrapping.rs:_XorShiftRngasRng::next_u32 126 | ----------------------------------------------------------------------- 127 | 128 | 129 | ``` 130 | 131 | ## What are the cachegrind metrics? 132 | 133 | * Ir -> Total Instructions 134 | * I1mr -> Level 1 I-Cache misses 135 | * ILmr -> Last Level I-Cache misses 136 | * Dr -> Total Memory Reads 137 | * D1mr -> Level 1 D-Cache read misses 138 | * DLmr -> Last Level D-cache read misses 139 | * Dw -> Total Memory Writes 140 | * D1mw -> Level 1 D-Cache write misses 141 | * DLmw -> Last Level D-cache write misses 142 | 143 | ## TODO 144 | 145 | * cmp subcommand - compare binary profiles 146 | * profiler macros 147 | * better context around expensive functions 148 | * support for more profiling tools 149 | -------------------------------------------------------------------------------- /src/main.rs: -------------------------------------------------------------------------------- 1 | pub mod argparse; 2 | pub mod cargo; 3 | pub mod display; 4 | pub mod err; 5 | pub mod parse; 6 | pub mod profiler; 7 | 8 | use crate::argparse::{get_binary, get_num, get_profiler, get_sort_metric}; 9 | use crate::cargo::build_binary; 10 | use crate::err::ProfError; 11 | use crate::parse::cachegrind::CacheGrindParser; 12 | use crate::parse::callgrind::CallGrindParser; 13 | use crate::profiler::Profiler; 14 | use clap::{App, AppSettings, Arg, SubCommand}; 15 | use std::ffi::OsStr; 16 | use std::process; 17 | use std::process::Command; 18 | 19 | // macro to try something, but print custom error message and exit upon error. 20 | macro_rules! try_or_exit { 21 | ($e:expr) => { 22 | match $e { 23 | Ok(e) => e, 24 | Err(e) => { 25 | println!("{}", e); 26 | process::exit(1); 27 | } 28 | } 29 | }; 30 | } 31 | 32 | fn main() { 33 | let _ = real_main(); 34 | } 35 | 36 | // #[cfg(all(unix, any(target_os = "linux", target_os = "macos")))] 37 | #[cfg(unix)] 38 | fn real_main() -> Result<(), ProfError> { 39 | // create binary path argument 40 | let binary_arg = Arg::with_name("binary") 41 | .long("bin") 42 | .value_name("BINARY") 43 | .required(false) 44 | .help("binary you want to profile"); 45 | 46 | // create binary arguments positional args (aka, everything after a '--') 47 | let binargs_arg = Arg::with_name("binargs") 48 | .multiple(true) 49 | .value_name("BIN_ARGS") 50 | .required(false) 51 | .help("arguments to the binary when executed"); 52 | 53 | // create release argument 54 | let release = Arg::with_name("release") 55 | .long("release") 56 | .required(false) 57 | .help("whether binary should be built in release mode"); 58 | 59 | // create function count argument 60 | let fn_count_arg = Arg::with_name("n") 61 | .short("n") 62 | .value_name("NUMBER") 63 | .takes_value(true) 64 | .help("number of functions you want"); 65 | 66 | // create sort metric argument 67 | let sort_arg = Arg::with_name("sort") 68 | .long("sort") 69 | .value_name("SORT") 70 | .takes_value(true) 71 | .help("metric you want to sort by"); 72 | 73 | // keep output files 74 | let keep_arg = Arg::with_name("keep") 75 | .long("keep") 76 | .required(false) 77 | .help("keep profiler output files"); 78 | 79 | 80 | // create callgrind subcommand 81 | let callgrind = SubCommand::with_name("callgrind") 82 | .about("gets callgrind features") 83 | .version("1.0") 84 | .author("Suchin Gururangan") 85 | .arg(release.clone()) 86 | .arg(binary_arg.clone()) 87 | .arg(binargs_arg.clone()) 88 | .arg(fn_count_arg.clone()) 89 | .arg(keep_arg.clone()); 90 | 91 | // create cachegrind subcommand 92 | let cachegrind = SubCommand::with_name("cachegrind") 93 | .about("gets cachegrind features") 94 | .version("1.0") 95 | .author("Suchin Gururangan") 96 | .arg(release) 97 | .arg(binary_arg) 98 | .arg(binargs_arg.clone()) 99 | .arg(fn_count_arg) 100 | .arg(sort_arg) 101 | .arg(keep_arg); 102 | 103 | // create profiler subcommand 104 | let profiler = SubCommand::with_name("profiler") 105 | .about("gets callgrind features") 106 | .version("1.0") 107 | .author("Suchin Gururangan") 108 | .subcommand(callgrind) 109 | .subcommand(cachegrind); 110 | 111 | // create profiler application 112 | let matches = App::new("cargo-profiler") 113 | .bin_name("cargo") 114 | .settings(&[AppSettings::SubcommandRequired]) 115 | .version("1.0") 116 | .author("Suchin Gururangan") 117 | .about("Profile your binaries") 118 | .subcommand(profiler) 119 | .get_matches(); 120 | 121 | // parse arguments from cli call 122 | let (m, profiler) = try_or_exit!(get_profiler(&matches)); 123 | let binary = { 124 | if m.is_present("binary") { 125 | try_or_exit!(get_binary(&m)).to_string() 126 | } else if m.is_present("release") { 127 | try_or_exit!(build_binary(true)) 128 | } else { 129 | try_or_exit!(build_binary(false)) 130 | } 131 | }; 132 | 133 | let binary_name = binary.split('/').collect::>().pop().unwrap_or(""); 134 | let binargs: Vec<&OsStr> = match m.values_of_os("binargs") { 135 | None => vec![], 136 | Some(raw) => raw.collect(), 137 | }; 138 | 139 | let num = try_or_exit!(get_num(&m)); 140 | let sort_metric = try_or_exit!(get_sort_metric(&m)); 141 | 142 | match profiler { 143 | Profiler::CallGrind { .. } => println!( 144 | "\n\x1b[1;33mProfiling \x1b[1;0m{} \x1b[0mwith callgrind\x1b[0m...", 145 | binary_name 146 | ), 147 | Profiler::CacheGrind { .. } => println!( 148 | "\n\x1b[1;33mProfiling \x1b[1;0m{} \x1b[0mwith cachegrind\x1b[0m...", 149 | binary_name 150 | ), 151 | }; 152 | 153 | // get the profiler output 154 | let output = match profiler { 155 | Profiler::CallGrind { .. } => profiler.callgrind_cli(&binary, &binargs)?, 156 | Profiler::CacheGrind { .. } => profiler.cachegrind_cli(&binary, &binargs)?, 157 | }; 158 | 159 | // parse the output into struct 160 | let parsed = match profiler { 161 | Profiler::CallGrind { .. } => try_or_exit!(profiler.callgrind_parse(&output, num)), 162 | Profiler::CacheGrind { .. } => { 163 | try_or_exit!(profiler.cachegrind_parse(&output, num, sort_metric)) 164 | } 165 | }; 166 | 167 | // pretty-print 168 | println!("{}", parsed); 169 | 170 | if !m.is_present("keep") { 171 | // remove files generated while profiling 172 | Command::new("rm").arg("cachegrind.out").output()?; 173 | 174 | Command::new("rm").arg("callgrind.out").output()?; 175 | } 176 | 177 | Ok(()) 178 | } 179 | -------------------------------------------------------------------------------- /src/display.rs: -------------------------------------------------------------------------------- 1 | use crate::profiler::Profiler; 2 | use ndarray::Axis; 3 | use std::fmt; 4 | 5 | // pretty-print separator of functions 6 | static DASHES: &'static str = 7 | "-----------------------------------------------------------------------"; 8 | 9 | /// Format a number with thousands separators. copied from cargo bench. 10 | fn fmt_thousands_sep(n: f64, sep: char) -> String { 11 | let mut n_usize = n as usize; 12 | use std::fmt::Write; 13 | let mut output = String::new(); 14 | let mut trailing = false; 15 | for &pow in &[9, 6, 3, 0] { 16 | let base = 10_usize.pow(pow); 17 | if pow == 0 || trailing || n_usize / base != 0 { 18 | if !trailing { 19 | output 20 | .write_fmt(format_args!("{}", n_usize / base)) 21 | .unwrap(); 22 | } else { 23 | output 24 | .write_fmt(format_args!("{:03}", n_usize / base)) 25 | .unwrap(); 26 | } 27 | if pow != 0 { 28 | output.push(sep); 29 | } 30 | trailing = true; 31 | } 32 | 33 | n_usize %= base; 34 | } 35 | 36 | output 37 | } 38 | 39 | /// Pretty-print the profiler outputs into user-friendly formats. 40 | impl fmt::Display for Profiler { 41 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 42 | match *self { 43 | Profiler::CacheGrind { 44 | ref ir, 45 | ref i1mr, 46 | ref ilmr, 47 | ref dr, 48 | ref d1mr, 49 | ref dlmr, 50 | ref dw, 51 | ref d1mw, 52 | ref dlmw, 53 | ref data, 54 | ref functs, 55 | } => { 56 | let _ = write!( 57 | f, 58 | "\n\x1b[32mTotal Memory Accesses\x1b[0m...{}\t\x1b[0m\n\ 59 | \n\x1b[32mTotal L1 I-Cache Misses\x1b[0m...{} ({}%)\t\x1b[0m\ 60 | \n\x1b[32mTotal LL I-Cache Misses\x1b[0m...{} ({}%)\t\x1b[0m\ 61 | \n\x1b[32mTotal L1 D-Cache Misses\x1b[0m...{} ({}%)\t\x1b[0m\ 62 | \n\x1b[32mTotal LL D-Cache Misses\x1b[0m...{} ({}%)\t\x1b[0m\n\n\ 63 | 64 | ", 65 | fmt_thousands_sep(ir + dr + dw, ','), 66 | fmt_thousands_sep(*i1mr, ','), 67 | fmt_thousands_sep(i1mr / (ir + dr + dw) * 100., ','), 68 | fmt_thousands_sep(*ilmr, ','), 69 | fmt_thousands_sep(ilmr / (ir + dr + dw) * 100., ','), 70 | fmt_thousands_sep(d1mr + d1mw, ','), 71 | fmt_thousands_sep((d1mr + d1mw) / (ir + dr + dw) * 100., ','), 72 | fmt_thousands_sep(dlmr + dlmw, ','), 73 | fmt_thousands_sep((dlmr + dlmw) / (ir + dr + dw) * 100., ','), 74 | ); 75 | let _ = writeln!( 76 | f, 77 | " \x1b[1;36mIr \x1b[1;36mI1mr \x1b[1;36mILmr \x1b[1;36mDr \ 78 | \x1b[1;36mD1mr \x1b[1;36mDLmr \x1b[1;36mDw \x1b[1;36mD1mw \ 79 | \x1b[1;36mDLmw" 80 | ); 81 | 82 | for (ref x, y) in data.axis_iter(Axis(0)).zip(functs.iter()) { 83 | let _ = writeln!( 84 | f, 85 | "\x1b[0m{:.2} {:.2} {:.2} {:.2} {:.2} {:.2} {:.2} {:.2} {:.2} \ 86 | {}", 87 | x[0] / ir, 88 | x[1] / i1mr, 89 | x[2] / ilmr, 90 | x[3] / dr, 91 | x[4] / d1mr, 92 | x[5] / dlmr, 93 | x[6] / dw, 94 | x[7] / d1mw, 95 | x[8] / dlmw, 96 | y 97 | ); 98 | println!("{}", DASHES); 99 | } 100 | Ok(()) 101 | } 102 | 103 | Profiler::CallGrind { 104 | ref total_instructions, 105 | ref instructions, 106 | ref functs, 107 | } => { 108 | let _ = write!( 109 | f, 110 | "\n\x1b[32mTotal Instructions\x1b[0m...{}\n\n\x1b[0m", 111 | fmt_thousands_sep(*total_instructions, ',') 112 | ); 113 | 114 | for (&x, ref y) in instructions.iter().zip(functs.iter()) { 115 | { 116 | let perc = x / total_instructions * 100.; 117 | match perc { 118 | t if t >= 50.0 => { 119 | let _ = writeln!( 120 | f, 121 | "{} (\x1b[31m{:.1}%\x1b[0m)\x1b[0m {}", 122 | fmt_thousands_sep(x, ','), 123 | t, 124 | y 125 | ); 126 | println!("{}", DASHES); 127 | } 128 | t if (t >= 30.0) & (t < 50.0) => { 129 | let _ = writeln!( 130 | f, 131 | "{} (\x1b[33m{:.1}%\x1b[0m)\x1b[0m {}", 132 | fmt_thousands_sep(x, ','), 133 | t, 134 | y 135 | ); 136 | println!("{}", DASHES); 137 | } 138 | _ => { 139 | let _ = writeln!( 140 | f, 141 | "{} (\x1b[32m{:.1}%\x1b[0m)\x1b[0m {}", 142 | fmt_thousands_sep(x, ','), 143 | x / total_instructions * 100., 144 | y 145 | ); 146 | println!("{}", DASHES); 147 | } 148 | } 149 | } 150 | } 151 | Ok(()) 152 | } 153 | } 154 | } 155 | } 156 | 157 | #[cfg(test)] 158 | mod test { 159 | #[test] 160 | fn test_fmt_thousands_sep() { 161 | assert_eq!(1, 1); 162 | } 163 | 164 | } 165 | -------------------------------------------------------------------------------- /src/err.rs: -------------------------------------------------------------------------------- 1 | use std::error; 2 | use std::fmt; 3 | use std::io::Error as ioError; 4 | 5 | #[derive(Debug)] 6 | /// Represents potential errors that may occur when profiling 7 | pub enum ProfError { 8 | RegexError, 9 | InvalidProfiler, 10 | InvalidBinary, 11 | InvalidNum, 12 | InvalidSortMetric, 13 | /// Wraps a std::io::Error 14 | IOError(ioError), 15 | UTF8Error, 16 | MisalignedData, 17 | CompilationError(String, String), 18 | TomlError, 19 | ReadManifestError, 20 | NoNameError, 21 | NoTargetDirectory, 22 | OutOfMemoryError, 23 | CliError, 24 | } 25 | 26 | impl fmt::Display for ProfError { 27 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 28 | match *self { 29 | ProfError::RegexError => write!( 30 | f, 31 | "\x1b[1;31merror: \x1b[0mRegex error -- please file a bug. In bug report, \ 32 | please include the original output file from profiler, e.g. from \ 33 | valgrind --tool=cachegrind --cachegrind-out-file=cachegrind.txt" 34 | ), 35 | ProfError::InvalidProfiler => write!( 36 | f, 37 | "\x1b[1;31merror: \x1b[0mInvalid profiler. cargo profiler currently \ 38 | supports callgrind and cachegrind." 39 | ), 40 | ProfError::InvalidBinary => write!( 41 | f, 42 | "\x1b[1;31merror: \x1b[0mInvalid binary. make sure binary exists." 43 | ), 44 | ProfError::InvalidNum => write!( 45 | f, 46 | "\x1b[1;31merror: \x1b[0mInvalid number. make sure number is a positive \ 47 | integer." 48 | ), 49 | ProfError::InvalidSortMetric => write!( 50 | f, 51 | "\x1b[1;31merror: \x1b[0mInvalid metric to sort on. available cachegrind \ 52 | metrics are \nir, i1mr, ilmr, dr, d1mr, dlmr, dw, d1mw, and dlmw. Check \ 53 | README for details on these metrics." 54 | ), 55 | ProfError::IOError(ref err) => write!( 56 | f, 57 | "\x1b[1;31merror: \x1b[0mIO error: {} -- please file a bug.", 58 | err 59 | ), 60 | ProfError::UTF8Error => write!( 61 | f, 62 | "\x1b[1;31merror: \x1b[0mCLI Utf8 error -- please file a bug." 63 | ), 64 | ProfError::MisalignedData => write!( 65 | f, 66 | "\x1b[1;31merror: \x1b[0mMisaligned data arrays due to regex error -- \ 67 | please file a bug." 68 | ), 69 | ProfError::CompilationError(ref package_name, ref stderr) => write!( 70 | f, 71 | "\x1b[1;31merror: \x1b[0mFailed to compile {}.\n\n{}", 72 | package_name, stderr 73 | ), 74 | ProfError::TomlError => write!( 75 | f, 76 | "\x1b[1;31merror: \x1b[0mError in parsing Cargo.toml to derive package \ 77 | name. Make sure package name is directly under [package] tag." 78 | ), 79 | ProfError::ReadManifestError => write!( 80 | f, 81 | "\x1b[1;31merror: \x1b[0mCargo.toml missing. Are you sure you're in a Rust \ 82 | project?" 83 | ), 84 | 85 | ProfError::NoNameError => write!( 86 | f, 87 | "\x1b[1;31merror: \x1b[0mNo package name found in Cargo.toml. Run \ 88 | cargo read-manifest to make sure everything looks okay. Otherwise please \ 89 | submit bug." 90 | ), 91 | 92 | ProfError::NoTargetDirectory => write!( 93 | f, 94 | "\x1b[1;31merror: \x1b[0mNo target output directory found in project. \ 95 | Binary must be in target/debug/ or target/release/, or specify binary \ 96 | path explicitly with --bin argument." 97 | ), 98 | ProfError::OutOfMemoryError => write!( 99 | f, 100 | "\x1b[1;31merror: \x1b[0mValgrind's memory management: out of memory. \ 101 | Valgrind cannot continue. Sorry. " 102 | ), 103 | ProfError::CliError => write!( 104 | f, 105 | "\x1b[1;31merror: \x1b[0mError in valgrind cli call. Make sure valgrind is \ 106 | installed properly." 107 | ), 108 | } 109 | } 110 | } 111 | 112 | impl error::Error for ProfError { 113 | fn description(&self) -> &str { 114 | match *self { 115 | ProfError::RegexError => "Regex error. file bug.", 116 | ProfError::InvalidProfiler => "Invalid Profiler.", 117 | ProfError::InvalidBinary => "Invalid Binary.", 118 | ProfError::InvalidNum => "Invalid number.", 119 | ProfError::InvalidSortMetric => "Invalid sort metric.", 120 | ProfError::MisalignedData => "Misaligned Data. File bug.", 121 | ProfError::CompilationError(_, _) => { 122 | "Failed to compile. Run cargo build to get compilation error." 123 | } 124 | ProfError::TomlError => "Error in parsing Cargo.toml.", 125 | ProfError::ReadManifestError => "Error in reading the manifest of this crate.", 126 | ProfError::NoNameError => "No package name found in Cargo.toml", 127 | ProfError::NoTargetDirectory => "No target output directory found in project.", 128 | ProfError::IOError(ref err) => err.description(), 129 | ProfError::OutOfMemoryError => "out of memory.", 130 | ProfError::CliError => "make sure valgrind is installed properly.", 131 | ProfError::UTF8Error => "utf8 error. file bug.", 132 | } 133 | } 134 | 135 | fn cause(&self) -> Option<&dyn error::Error> { 136 | match *self { 137 | ProfError::RegexError => None, 138 | ProfError::InvalidProfiler => None, 139 | ProfError::InvalidBinary => None, 140 | ProfError::InvalidNum => None, 141 | ProfError::InvalidSortMetric => None, 142 | ProfError::MisalignedData => None, 143 | ProfError::TomlError => None, 144 | ProfError::IOError(ref err) => Some(err), 145 | ProfError::CompilationError(_, _) => None, 146 | ProfError::ReadManifestError => None, 147 | ProfError::NoNameError => None, 148 | ProfError::NoTargetDirectory => None, 149 | ProfError::OutOfMemoryError => None, 150 | ProfError::CliError => None, 151 | ProfError::UTF8Error => None, 152 | } 153 | } 154 | } 155 | 156 | impl From for ProfError { 157 | fn from(err: ioError) -> ProfError { 158 | ProfError::IOError(err) 159 | } 160 | } 161 | -------------------------------------------------------------------------------- /Cargo.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Cargo. 2 | # It is not intended for manual editing. 3 | [[package]] 4 | name = "aho-corasick" 5 | version = "0.7.6" 6 | source = "registry+https://github.com/rust-lang/crates.io-index" 7 | checksum = "58fb5e95d83b38284460a5fda7d6470aa0b8844d283a0b614b8535e880800d2d" 8 | dependencies = [ 9 | "memchr", 10 | ] 11 | 12 | [[package]] 13 | name = "ansi_term" 14 | version = "0.11.0" 15 | source = "registry+https://github.com/rust-lang/crates.io-index" 16 | checksum = "ee49baf6cb617b853aa8d93bf420db2383fab46d314482ca2803b40d5fde979b" 17 | dependencies = [ 18 | "winapi", 19 | ] 20 | 21 | [[package]] 22 | name = "atty" 23 | version = "0.2.13" 24 | source = "registry+https://github.com/rust-lang/crates.io-index" 25 | checksum = "1803c647a3ec87095e7ae7acfca019e98de5ec9a7d01343f611cf3152ed71a90" 26 | dependencies = [ 27 | "libc", 28 | "winapi", 29 | ] 30 | 31 | [[package]] 32 | name = "autocfg" 33 | version = "0.1.7" 34 | source = "registry+https://github.com/rust-lang/crates.io-index" 35 | checksum = "1d49d90015b3c36167a20fe2810c5cd875ad504b39cff3d4eae7977e6b7c1cb2" 36 | 37 | [[package]] 38 | name = "bitflags" 39 | version = "1.2.1" 40 | source = "registry+https://github.com/rust-lang/crates.io-index" 41 | checksum = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693" 42 | 43 | [[package]] 44 | name = "cargo-profiler" 45 | version = "0.2.0" 46 | dependencies = [ 47 | "clap", 48 | "lazy_static", 49 | "ndarray", 50 | "regex", 51 | "serde", 52 | "serde_json", 53 | ] 54 | 55 | [[package]] 56 | name = "clap" 57 | version = "2.33.0" 58 | source = "registry+https://github.com/rust-lang/crates.io-index" 59 | checksum = "5067f5bb2d80ef5d68b4c87db81601f0b75bca627bc2ef76b141d7b846a3c6d9" 60 | dependencies = [ 61 | "ansi_term", 62 | "atty", 63 | "bitflags", 64 | "strsim", 65 | "textwrap", 66 | "unicode-width", 67 | "vec_map", 68 | ] 69 | 70 | [[package]] 71 | name = "itoa" 72 | version = "1.0.1" 73 | source = "registry+https://github.com/rust-lang/crates.io-index" 74 | checksum = "1aab8fc367588b89dcee83ab0fd66b72b50b72fa1904d7095045ace2b0c81c35" 75 | 76 | [[package]] 77 | name = "lazy_static" 78 | version = "1.4.0" 79 | source = "registry+https://github.com/rust-lang/crates.io-index" 80 | checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" 81 | 82 | [[package]] 83 | name = "libc" 84 | version = "0.2.65" 85 | source = "registry+https://github.com/rust-lang/crates.io-index" 86 | checksum = "1a31a0627fdf1f6a39ec0dd577e101440b7db22672c0901fe00a9a6fbb5c24e8" 87 | 88 | [[package]] 89 | name = "matrixmultiply" 90 | version = "0.2.3" 91 | source = "registry+https://github.com/rust-lang/crates.io-index" 92 | checksum = "d4f7ec66360130972f34830bfad9ef05c6610a43938a467bcc9ab9369ab3478f" 93 | dependencies = [ 94 | "rawpointer", 95 | ] 96 | 97 | [[package]] 98 | name = "memchr" 99 | version = "2.2.1" 100 | source = "registry+https://github.com/rust-lang/crates.io-index" 101 | checksum = "88579771288728879b57485cc7d6b07d648c9f0141eb955f8ab7f9d45394468e" 102 | 103 | [[package]] 104 | name = "ndarray" 105 | version = "0.13.1" 106 | source = "registry+https://github.com/rust-lang/crates.io-index" 107 | checksum = "ac06db03ec2f46ee0ecdca1a1c34a99c0d188a0d83439b84bf0cb4b386e4ab09" 108 | dependencies = [ 109 | "matrixmultiply", 110 | "num-complex", 111 | "num-integer", 112 | "num-traits", 113 | "rawpointer", 114 | ] 115 | 116 | [[package]] 117 | name = "num-complex" 118 | version = "0.2.3" 119 | source = "registry+https://github.com/rust-lang/crates.io-index" 120 | checksum = "fcb0cf31fb3ff77e6d2a6ebd6800df7fdcd106f2ad89113c9130bcd07f93dffc" 121 | dependencies = [ 122 | "autocfg", 123 | "num-traits", 124 | ] 125 | 126 | [[package]] 127 | name = "num-integer" 128 | version = "0.1.41" 129 | source = "registry+https://github.com/rust-lang/crates.io-index" 130 | checksum = "b85e541ef8255f6cf42bbfe4ef361305c6c135d10919ecc26126c4e5ae94bc09" 131 | dependencies = [ 132 | "autocfg", 133 | "num-traits", 134 | ] 135 | 136 | [[package]] 137 | name = "num-traits" 138 | version = "0.2.8" 139 | source = "registry+https://github.com/rust-lang/crates.io-index" 140 | checksum = "6ba9a427cfca2be13aa6f6403b0b7e7368fe982bfa16fccc450ce74c46cd9b32" 141 | dependencies = [ 142 | "autocfg", 143 | ] 144 | 145 | [[package]] 146 | name = "proc-macro2" 147 | version = "1.0.27" 148 | source = "registry+https://github.com/rust-lang/crates.io-index" 149 | checksum = "f0d8caf72986c1a598726adc988bb5984792ef84f5ee5aa50209145ee8077038" 150 | dependencies = [ 151 | "unicode-xid", 152 | ] 153 | 154 | [[package]] 155 | name = "quote" 156 | version = "1.0.2" 157 | source = "registry+https://github.com/rust-lang/crates.io-index" 158 | checksum = "053a8c8bcc71fcce321828dc897a98ab9760bef03a4fc36693c231e5b3216cfe" 159 | dependencies = [ 160 | "proc-macro2", 161 | ] 162 | 163 | [[package]] 164 | name = "rawpointer" 165 | version = "0.2.1" 166 | source = "registry+https://github.com/rust-lang/crates.io-index" 167 | checksum = "60a357793950651c4ed0f3f52338f53b2f809f32d83a07f72909fa13e4c6c1e3" 168 | 169 | [[package]] 170 | name = "regex" 171 | version = "1.4.2" 172 | source = "registry+https://github.com/rust-lang/crates.io-index" 173 | checksum = "38cf2c13ed4745de91a5eb834e11c00bcc3709e773173b2ce4c56c9fbde04b9c" 174 | dependencies = [ 175 | "aho-corasick", 176 | "memchr", 177 | "regex-syntax", 178 | "thread_local", 179 | ] 180 | 181 | [[package]] 182 | name = "regex-syntax" 183 | version = "0.6.25" 184 | source = "registry+https://github.com/rust-lang/crates.io-index" 185 | checksum = "f497285884f3fcff424ffc933e56d7cbca511def0c9831a7f9b5f6153e3cc89b" 186 | 187 | [[package]] 188 | name = "ryu" 189 | version = "1.0.2" 190 | source = "registry+https://github.com/rust-lang/crates.io-index" 191 | checksum = "bfa8506c1de11c9c4e4c38863ccbe02a305c8188e85a05a784c9e11e1c3910c8" 192 | 193 | [[package]] 194 | name = "serde" 195 | version = "1.0.131" 196 | source = "registry+https://github.com/rust-lang/crates.io-index" 197 | checksum = "b4ad69dfbd3e45369132cc64e6748c2d65cdfb001a2b1c232d128b4ad60561c1" 198 | dependencies = [ 199 | "serde_derive", 200 | ] 201 | 202 | [[package]] 203 | name = "serde_derive" 204 | version = "1.0.131" 205 | source = "registry+https://github.com/rust-lang/crates.io-index" 206 | checksum = "b710a83c4e0dff6a3d511946b95274ad9ca9e5d3ae497b63fda866ac955358d2" 207 | dependencies = [ 208 | "proc-macro2", 209 | "quote", 210 | "syn", 211 | ] 212 | 213 | [[package]] 214 | name = "serde_json" 215 | version = "1.0.75" 216 | source = "registry+https://github.com/rust-lang/crates.io-index" 217 | checksum = "c059c05b48c5c0067d4b4b2b4f0732dd65feb52daf7e0ea09cd87e7dadc1af79" 218 | dependencies = [ 219 | "itoa", 220 | "ryu", 221 | "serde", 222 | ] 223 | 224 | [[package]] 225 | name = "strsim" 226 | version = "0.8.0" 227 | source = "registry+https://github.com/rust-lang/crates.io-index" 228 | checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a" 229 | 230 | [[package]] 231 | name = "syn" 232 | version = "1.0.73" 233 | source = "registry+https://github.com/rust-lang/crates.io-index" 234 | checksum = "f71489ff30030d2ae598524f61326b902466f72a0fb1a8564c001cc63425bcc7" 235 | dependencies = [ 236 | "proc-macro2", 237 | "quote", 238 | "unicode-xid", 239 | ] 240 | 241 | [[package]] 242 | name = "textwrap" 243 | version = "0.11.0" 244 | source = "registry+https://github.com/rust-lang/crates.io-index" 245 | checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060" 246 | dependencies = [ 247 | "unicode-width", 248 | ] 249 | 250 | [[package]] 251 | name = "thread_local" 252 | version = "1.0.1" 253 | source = "registry+https://github.com/rust-lang/crates.io-index" 254 | checksum = "d40c6d1b69745a6ec6fb1ca717914848da4b44ae29d9b3080cbee91d72a69b14" 255 | dependencies = [ 256 | "lazy_static", 257 | ] 258 | 259 | [[package]] 260 | name = "unicode-width" 261 | version = "0.1.6" 262 | source = "registry+https://github.com/rust-lang/crates.io-index" 263 | checksum = "7007dbd421b92cc6e28410fe7362e2e0a2503394908f417b68ec8d1c364c4e20" 264 | 265 | [[package]] 266 | name = "unicode-xid" 267 | version = "0.2.0" 268 | source = "registry+https://github.com/rust-lang/crates.io-index" 269 | checksum = "826e7639553986605ec5979c7dd957c7895e93eabed50ab2ffa7f6128a75097c" 270 | 271 | [[package]] 272 | name = "vec_map" 273 | version = "0.8.1" 274 | source = "registry+https://github.com/rust-lang/crates.io-index" 275 | checksum = "05c78687fb1a80548ae3250346c3db86a80a7cdd77bda190189f2d0a0987c81a" 276 | 277 | [[package]] 278 | name = "winapi" 279 | version = "0.3.8" 280 | source = "registry+https://github.com/rust-lang/crates.io-index" 281 | checksum = "8093091eeb260906a183e6ae1abdba2ef5ef2257a21801128899c3fc699229c6" 282 | dependencies = [ 283 | "winapi-i686-pc-windows-gnu", 284 | "winapi-x86_64-pc-windows-gnu", 285 | ] 286 | 287 | [[package]] 288 | name = "winapi-i686-pc-windows-gnu" 289 | version = "0.4.0" 290 | source = "registry+https://github.com/rust-lang/crates.io-index" 291 | checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" 292 | 293 | [[package]] 294 | name = "winapi-x86_64-pc-windows-gnu" 295 | version = "0.4.0" 296 | source = "registry+https://github.com/rust-lang/crates.io-index" 297 | checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" 298 | -------------------------------------------------------------------------------- /src/parse/cachegrind.rs: -------------------------------------------------------------------------------- 1 | use crate::err::ProfError; 2 | use crate::profiler::Profiler; 3 | use lazy_static::lazy_static; 4 | use ndarray::{stack, Array, Array2, ArrayView1, Axis}; 5 | use regex::Regex; 6 | use std::cmp::Ordering::Less; 7 | use std::ffi::OsStr; 8 | use std::process::Command; 9 | 10 | /// define cachegrind metrics 11 | pub enum Metric { 12 | Ir, 13 | I1mr, 14 | ILmr, 15 | Dr, 16 | D1mr, 17 | DLmr, 18 | Dw, 19 | D1mw, 20 | DLmw, 21 | NAN, 22 | } 23 | 24 | /// Utility function for sorting a matrix. used to sort cachegrind data by particular metric (descending) 25 | pub fn sort_matrix(mat: &Array2, sort_col: ArrayView1) -> (Array2, Vec) { 26 | let mut enum_col = sort_col.iter().enumerate().collect::>(); 27 | enum_col.sort_by(|a, &b| a.1.partial_cmp(b.1).unwrap_or(Less).reverse()); 28 | let indices = enum_col.iter().map(|x| x.0).collect::>(); 29 | (mat.select(Axis(0), indices.as_slice()), indices) 30 | } 31 | 32 | /// Parser trait. To parse the output of Profilers, we first have to get their output from 33 | /// the command line, and then parse the output into respective structs. 34 | pub trait CacheGrindParser { 35 | fn cachegrind_cli(&self, binary: &str, binargs: &[&OsStr]) -> Result; 36 | fn cachegrind_parse<'b>( 37 | &'b self, 38 | output: &'b str, 39 | num: usize, 40 | sort_metric: Metric, 41 | ) -> Result; 42 | } 43 | 44 | impl CacheGrindParser for Profiler { 45 | /// Get profiler output from stdout. 46 | fn cachegrind_cli(&self, binary: &str, binargs: &[&OsStr]) -> Result { 47 | // get cachegrind cli output from stdout 48 | let _ = Command::new("valgrind") 49 | .arg("--tool=cachegrind") 50 | .arg("--cachegrind-out-file=cachegrind.out") 51 | .arg(binary) 52 | .args(binargs) 53 | .output() 54 | .or(Err(ProfError::CliError)); 55 | 56 | let cachegrind_output = Command::new("cg_annotate") 57 | .arg("cachegrind.out") 58 | .arg(binary) 59 | .output() 60 | .or(Err(ProfError::CliError)); 61 | 62 | cachegrind_output 63 | .and_then(|x| String::from_utf8(x.stdout).or(Err(ProfError::UTF8Error))) 64 | .or(Err(ProfError::CliError)) 65 | } 66 | // Get parse the profiler output into respective structs. 67 | fn cachegrind_parse<'b>( 68 | &'b self, 69 | output: &'b str, 70 | num: usize, 71 | sort_metric: Metric, 72 | ) -> Result { 73 | // split output line-by-line 74 | let mut out_split: Vec<&'b str> = output.split('\n').collect(); 75 | 76 | // regex identifies lines that start with digits and have characters that commonly 77 | // show up in file paths 78 | lazy_static! { 79 | static ref CACHEGRIND_REGEX: Regex = 80 | Regex::new(r"\d+\s*[a-zA-Z]*$*_*:*/+\.*@*-*|\d+\s*[a-zA-Z]*$*_*\?+:*/*\.*-*@*-*") 81 | .unwrap(); 82 | static ref COMPILER_TRASH: Regex = Regex::new(r"\$\w{2}\$|\$\w{3}\$").unwrap(); 83 | static ref ERROR_REGEX: Regex = 84 | Regex::new(r"Valgrind's memory management: out of memory").unwrap(); 85 | } 86 | 87 | let errs = out_split 88 | .to_owned() 89 | .into_iter() 90 | .filter(|x| ERROR_REGEX.is_match(x)) 91 | .collect::>(); 92 | 93 | if !errs.is_empty() { 94 | return Err(ProfError::OutOfMemoryError); 95 | } 96 | 97 | out_split.retain(|x| CACHEGRIND_REGEX.is_match(x)); 98 | 99 | let mut funcs: Vec = Vec::new(); 100 | let mut data_vec: Vec> = Vec::new(); 101 | 102 | // loop through each line and get numbers + func 103 | for sample in out_split.iter() { 104 | // trim the sample, split by whitespace to separate out each data point 105 | // (numbers + func) 106 | let mut elems = sample.trim().split(' ').collect::>(); 107 | // remove any empty strings 108 | elems.retain(|x| x != &""); 109 | 110 | // for each number, remove any commas and parse into f64. the last element in 111 | // data_elems is the function file path. 112 | let mut numbers = Vec::new(); 113 | 114 | for elem in elems[0..elems.len() - 1].iter() { 115 | let number = match elem.trim().replace(",", "").parse::() { 116 | Ok(n) => n, 117 | Err(_) => return Err(ProfError::RegexError), 118 | }; 119 | 120 | numbers.push(number); 121 | } 122 | 123 | // reshape the vector of parsed numbers into a 1 x 9 matrix, and push the 124 | // matrix to our vector of 1 x 9 matrices. 125 | if let Ok(data_col) = Array::from_shape_vec((numbers.len(), 1), numbers) { 126 | data_vec.push(data_col); 127 | } 128 | // the last element in data_elems is the function file path. 129 | // get the file in the file-path (which includes the function) and push that to 130 | // the funcs vector. 131 | let path = elems[elems.len() - 1].split('/').collect::>(); 132 | let func = path[path.len() - 1]; 133 | 134 | let mut func = COMPILER_TRASH.replace_all(func, ""); 135 | let idx = func.rfind("::").unwrap_or_else(|| func.len()); 136 | func.to_mut().drain(idx..).collect::(); 137 | funcs.push(func.into_owned()); 138 | } 139 | 140 | // stack all the 1 x 9 matrices in data to a n x 9 matrix. 141 | let data_matrix = match stack( 142 | Axis(1), 143 | &data_vec 144 | .iter() 145 | .map(|x| x.view()) 146 | .collect::>() 147 | .as_slice(), 148 | ) { 149 | Ok(m) => m.t().to_owned(), 150 | Err(_) => return Err(ProfError::MisalignedData), 151 | }; 152 | 153 | // match the sort argument to a column of the matrix that we will sort on. 154 | // default sorting -> first column (total instructions). 155 | let sort_col = match sort_metric { 156 | Metric::Ir => data_matrix.column(0), 157 | Metric::I1mr => data_matrix.column(1), 158 | Metric::ILmr => data_matrix.column(2), 159 | Metric::Dr => data_matrix.column(3), 160 | Metric::D1mr => data_matrix.column(4), 161 | Metric::DLmr => data_matrix.column(5), 162 | Metric::Dw => data_matrix.column(6), 163 | Metric::D1mw => data_matrix.column(7), 164 | Metric::DLmw => data_matrix.column(8), 165 | Metric::NAN => data_matrix.column(0), 166 | }; 167 | 168 | // sort the matrix of data and functions by a particular column. 169 | // to sort matrix, we keep track of sorted indices, and select the matrix wrt 170 | // these sorted indices. to sort functions, we index the funcs vector with the 171 | // sorted indices. 172 | let (mut sorted_data_matrix, indices) = sort_matrix(&data_matrix, sort_col); 173 | 174 | let mut sorted_funcs: Vec = indices 175 | .iter() 176 | .map(|&x| (&funcs[x]).to_owned()) 177 | .collect::>(); 178 | 179 | // sum the columns of the data matrix to get total metrics. 180 | let ir = sorted_data_matrix.column(0).scalar_sum(); 181 | let i1mr = sorted_data_matrix.column(1).scalar_sum(); 182 | let ilmr = sorted_data_matrix.column(2).scalar_sum(); 183 | let dr = sorted_data_matrix.column(3).scalar_sum(); 184 | let d1mr = sorted_data_matrix.column(4).scalar_sum(); 185 | let dlmr = sorted_data_matrix.column(5).scalar_sum(); 186 | let dw = sorted_data_matrix.column(6).scalar_sum(); 187 | let d1mw = sorted_data_matrix.column(7).scalar_sum(); 188 | let dlmw = sorted_data_matrix.column(8).scalar_sum(); 189 | 190 | // parse the limit argument n, and take the first n values of data matrix/funcs 191 | // vector accordingly. 192 | if num < sorted_data_matrix.nrows() { 193 | let ls = (0..num).collect::>(); 194 | sorted_data_matrix = sorted_data_matrix.select(Axis(0), ls.as_slice()); 195 | sorted_funcs = sorted_funcs.iter().take(num).cloned().collect(); 196 | } 197 | 198 | // put all data in cachegrind struct! 199 | Ok(Profiler::CacheGrind { 200 | ir, 201 | i1mr, 202 | ilmr, 203 | dr, 204 | d1mr, 205 | dlmr, 206 | dw, 207 | d1mw, 208 | dlmw, 209 | data: sorted_data_matrix, 210 | functs: sorted_funcs, 211 | }) 212 | } 213 | } 214 | 215 | #[cfg(test)] 216 | mod test { 217 | #[test] 218 | fn test_cachegrind_parse_1() { 219 | assert_eq!(1, 1); 220 | } 221 | 222 | #[test] 223 | fn test_cachegrind_parse_2() { 224 | assert_eq!(1, 1); 225 | assert_eq!(1, 1); 226 | } 227 | 228 | #[test] 229 | fn test_cachegrind_parse_3() { 230 | assert_eq!(1, 1); 231 | } 232 | } 233 | --------------------------------------------------------------------------------