├── .github ├── dependabot.yml └── workflows │ └── tests.yaml ├── .gitignore ├── Cargo.toml ├── LICENSE ├── Makefile ├── README.md ├── buf.gen.yaml ├── buf.work.yaml ├── capi ├── Cargo.toml ├── include │ └── jemalloc_pprof.h ├── man │ └── dump_jemalloc_pprof.3 └── src │ └── lib.rs ├── example ├── .gitignore ├── Cargo.toml └── src │ └── main.rs ├── mappings ├── Cargo.toml └── src │ └── lib.rs ├── proto └── google │ └── pprof │ └── profile.proto ├── shell.nix ├── src └── lib.rs └── util ├── Cargo.toml └── src ├── cast.rs ├── lib.rs └── perftools.profiles.rs /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: cargo 4 | directory: "/" 5 | schedule: 6 | interval: weekly 7 | open-pull-requests-limit: 10 8 | - package-ecosystem: "github-actions" 9 | directory: "/" 10 | schedule: 11 | interval: "weekly" 12 | open-pull-requests-limit: 10 13 | -------------------------------------------------------------------------------- /.github/workflows/tests.yaml: -------------------------------------------------------------------------------- 1 | name: test suite 2 | on: [push, pull_request] 3 | 4 | jobs: 5 | test: 6 | name: cargo test 7 | runs-on: ubuntu-latest 8 | steps: 9 | - uses: actions/checkout@v4 10 | - uses: dtolnay/rust-toolchain@stable 11 | - run: | 12 | sudo apt update 13 | sudo apt install -y build-essential libjemalloc-dev 14 | - run: | 15 | make test 16 | make capi 17 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | target 2 | Cargo.lock 3 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [workspace] 2 | members = ["mappings", "capi", "util", "example"] 3 | 4 | [package] 5 | name = "jemalloc_pprof" 6 | description = "Convert jemalloc heap profiles to pprof to understand memory usage, fix memory leaks, and fix OOM Kills." 7 | version = "0.7.0" 8 | edition = "2021" 9 | publish = true 10 | license = "Apache-2.0" 11 | authors = [ 12 | "Frederic Branczyk ", 13 | "Brennan Vincent ", 14 | ] 15 | repository = "https://github.com/polarsignals/rust-jemalloc-pprof" 16 | keywords = ["jemalloc", "pprof", "memory", "profiling", "observability"] 17 | categories = [ 18 | "development-tools", 19 | "development-tools::profiling", 20 | "development-tools::debugging", 21 | "memory-management", 22 | ] 23 | documentation = "https://docs.rs/jemalloc_pprof/latest/jemalloc_pprof/" 24 | homepage = "https://crates.io/crates/jemalloc_pprof" 25 | 26 | [package.metadata.docs.rs] 27 | all-features = true 28 | 29 | [workspace.dependencies] 30 | anyhow = "1" 31 | flate2 = "1.0" 32 | libc = "0.2" 33 | once_cell = "1.19" 34 | prost = { version = "0.13", features = ["no-recursion-limit"] } 35 | tempfile = "3.11" 36 | tikv-jemalloc-ctl = { version = "0.6", features = ["use_std"] } 37 | tracing = "0.1" 38 | tokio = { version = "1", features = ["time", "sync"] } 39 | paste = "1.0" 40 | num = "0.4" 41 | errno = "0.3" 42 | util = { path = "./util", version = "0.7", package = "pprof_util" } 43 | mappings = { path = "./mappings", version = "0.7" } 44 | backtrace = "0.3" 45 | inferno = "0.12" 46 | 47 | [dependencies] 48 | util.workspace = true 49 | mappings.workspace = true 50 | libc.workspace = true 51 | anyhow.workspace = true 52 | tikv-jemalloc-ctl.workspace = true 53 | once_cell.workspace = true 54 | tracing.workspace = true 55 | tempfile.workspace = true 56 | tokio.workspace = true 57 | 58 | [features] 59 | flamegraph = ["util/flamegraph"] 60 | symbolize = ["util/symbolize"] 61 | 62 | [dev-dependencies] 63 | tikv-jemallocator = "0.6" 64 | axum = "0.7" 65 | # re-import tokio to enable all its features. This is required to 66 | # successfully compile the test snipptes that are part of the documentation 67 | tokio = { version = "1", features = ["full"] } 68 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright 2023 Polar Signals Inc. 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: capi 2 | capi: 3 | cargo build -p capi --release 4 | 5 | .PHONY: fmt 6 | fmt: 7 | cargo fmt --all -- --check 8 | 9 | # Run Clippy with default and all features, to ensure both variants compile. 10 | .PHONY: lint 11 | lint: 12 | cargo clippy --workspace -- -D warnings 13 | cargo clippy --workspace --all-features -- -D warnings 14 | 15 | .PHONY: doc 16 | doc: 17 | RUSTDOCFLAGS="--cfg docsrs -D warnings" cargo doc --all-features --no-deps 18 | 19 | .PHONY: test 20 | test: fmt lint doc 21 | cargo test --workspace --all-features 22 | 23 | .PHONY: clean 24 | clean: 25 | cargo clean 26 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Discord](https://img.shields.io/discord/813669360513056790?label=Discord)](https://discord.gg/Qgh4c9tRCE) 2 | 3 | # rust-jemalloc-pprof 4 | 5 | A rust library to collect and convert Heap profiling data from the [jemalloc](https://jemalloc.net/) allocator and convert it to the [pprof](https://github.com/google/pprof/tree/main/proto) format. 6 | 7 | To understand how to use this together with Polar Signals Cloud to continuously collect profiling data, refer to the [Use with Polar Signals Cloud](#use-with-polar-signals-cloud) section. 8 | 9 | This code was originally developed as part of [Materialize](https://github.com/MaterializeInc/materialize), and then in a collaboration extracted into this standalone library. 10 | 11 | ## Requirements 12 | 13 | Currently, this library only supports Linux. 14 | 15 | Furthermore, you must be able to switch your allocator to `jemalloc`. 16 | If you need to continue using the default system allocator for any reason, 17 | this library will not be useful. 18 | 19 | ## Usage 20 | 21 | Internally this library uses [`tikv-jemalloc-ctl`](https://docs.rs/tikv-jemalloc-ctl/latest/tikv_jemalloc_ctl/) to interact with jemalloc, so to use it, you must use the jemalloc allocator via the [`tikv-jemallocator`](https://crates.io/crates/tikv-jemallocator) library. 22 | 23 | When adding `tikv-jemallocator` as a dependency, make sure to enable the `profiling` feature. 24 | 25 | ```toml 26 | [dependencies] 27 | [target.'cfg(not(target_env = "msvc"))'.dependencies] 28 | tikv-jemallocator = { version = "0.6.0", features = ["profiling", "unprefixed_malloc_on_supported_platforms"] } 29 | ``` 30 | 31 | > Note: We also recommend enabling the `unprefixed_malloc_on_supported_platforms` feature, not strictly necessary, but will influence the rest of the usage. 32 | 33 | Then configure the global allocator and configure it with profiling enabled. 34 | 35 | ```rust,no_run 36 | #[cfg(not(target_env = "msvc"))] 37 | #[global_allocator] 38 | static ALLOC: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc; 39 | 40 | #[allow(non_upper_case_globals)] 41 | #[export_name = "malloc_conf"] 42 | pub static malloc_conf: &[u8] = b"prof:true,prof_active:true,lg_prof_sample:19\0"; 43 | ``` 44 | 45 | > If you do not use the `unprefixed_malloc_on_supported_platforms` feature, you have to name it `_rjem_malloc_conf` it instead of `malloc_conf`. 46 | 47 | 2^19 bytes (512KiB) is the default configuration for the sampling period, but we recommend being explicit. To understand more about jemalloc sampling check out the [detailed docs](https://github.com/jemalloc/jemalloc/blob/dev/doc_internal/PROFILING_INTERNALS.md#sampling) on it. 48 | 49 | We recommend serving the profiling data on an HTTP server such as [axum](https://github.com/tokio-rs/axum), that could look like this, and we'll intentionally include a 4mb allocation to trigger sampling. 50 | 51 | ```rust,no_run 52 | #[tokio::main] 53 | async fn main() { 54 | let mut v = vec![]; 55 | for i in 0..1000000 { 56 | v.push(i); 57 | } 58 | 59 | let app = axum::Router::new() 60 | .route("/debug/pprof/heap", axum::routing::get(handle_get_heap)); 61 | 62 | // run our app with hyper, listening globally on port 3000 63 | let listener = tokio::net::TcpListener::bind("0.0.0.0:3000").await.unwrap(); 64 | axum::serve(listener, app).await.unwrap(); 65 | } 66 | 67 | use axum::http::StatusCode; 68 | use axum::response::IntoResponse; 69 | 70 | pub async fn handle_get_heap() -> Result { 71 | let mut prof_ctl = jemalloc_pprof::PROF_CTL.as_ref().unwrap().lock().await; 72 | require_profiling_activated(&prof_ctl)?; 73 | let pprof = prof_ctl 74 | .dump_pprof() 75 | .map_err(|err| (StatusCode::INTERNAL_SERVER_ERROR, err.to_string()))?; 76 | Ok(pprof) 77 | } 78 | 79 | /// Checks whether jemalloc profiling is activated an returns an error response if not. 80 | fn require_profiling_activated(prof_ctl: &jemalloc_pprof::JemallocProfCtl) -> Result<(), (StatusCode, String)> { 81 | if prof_ctl.activated() { 82 | Ok(()) 83 | } else { 84 | Err((axum::http::StatusCode::FORBIDDEN, "heap profiling not activated".into())) 85 | } 86 | } 87 | ``` 88 | 89 | Then running the application, we can capture a profile and view it the pprof toolchain. 90 | 91 | ```shell 92 | curl localhost:3000/debug/pprof/heap > heap.pb.gz 93 | pprof -http=:8080 heap.pb.gz 94 | ``` 95 | 96 | > Note: if symbolization is not enabled, either `addr2line` or `llvm-addr2line` needs to be available in the path and pprof needs to be able to discover the respective debuginfos. 97 | 98 | To generate symbolized profiles, enable the `symbolize` crate feature: 99 | 100 | ```toml 101 | [dependencies] 102 | jemalloc_pprof = { version = "0.7", features = ["symbolize"] } 103 | ``` 104 | 105 | ### Flamegraph SVGs 106 | 107 | The `flamegraph` crate feature can also be enabled to generate interactive flamegraph SVGs directly 108 | (implies the `symbolize` feature): 109 | 110 | ```toml 111 | jemalloc_pprof = { version = "0.7", features = ["flamegraph"] } 112 | ``` 113 | 114 | We can then adjust the example above to also emit a flamegraph SVG: 115 | 116 | ```rust,ignore 117 | #[tokio::main] 118 | async fn main() { 119 | let app = axum::Router::new() 120 | .route("/debug/pprof/heap", axum::routing::get(handle_get_heap)) 121 | .route("/debug/pprof/heap/flamegraph", axum::routing::get(handle_get_heap_flamegraph)); 122 | // ... 123 | } 124 | 125 | pub async fn handle_get_heap_flamegraph() -> Result { 126 | use axum::body::Body; 127 | use axum::http::header::CONTENT_TYPE; 128 | use axum::response::Response; 129 | 130 | let mut prof_ctl = jemalloc_pprof::PROF_CTL.as_ref().unwrap().lock().await; 131 | require_profiling_activated(&prof_ctl)?; 132 | let svg = prof_ctl 133 | .dump_flamegraph() 134 | .map_err(|err| (StatusCode::INTERNAL_SERVER_ERROR, err.to_string()))?; 135 | Response::builder() 136 | .header(CONTENT_TYPE, "image/svg+xml") 137 | .body(Body::from(svg)) 138 | .map_err(|err| (StatusCode::INTERNAL_SERVER_ERROR, err.to_string())) 139 | } 140 | ``` 141 | 142 | ### Writeable temporary directory 143 | 144 | The way this library works is that it creates a new temporary file (in the [platform-specific default temp dir](https://docs.rs/tempfile/latest/tempfile/struct.NamedTempFile.html)), and instructs jemalloc to dump a profile into that file. Therefore the platform respective temporary directory must be writeable by the process. After reading and converting it to pprof, the file is cleaned up via the destructor. A single profile tends to be only a few kilobytes large, so it doesn't require a significant space, but it's non-zero and needs to be writeable. 145 | 146 | ## Use with Polar Signals Cloud 147 | 148 | Polar Signals Cloud allows continuously collecting heap profiling data, so you always have the right profiling data available, and don't need to search for the right data, you already have it! 149 | 150 | Polar Signals Cloud supports anything in the pprof format, so a process exposing the above explained pprof endpoint, can then be scraped as elaborated in the [scraping docs](https://www.polarsignals.com/docs/setup-scraper). 151 | 152 | ## Use from C or C++ 153 | 154 | The functionality to dump the current jemalloc heap profile in pprof 155 | format is exposed to C and C++ (or any other language that can use 156 | jemalloc and can link against libraries via the C ABI). This 157 | functionality is exposed via the `capi` (C API) package. 158 | 159 | ### Building 160 | 161 | The following prerequisites are necessary to build the C API package: 162 | 163 | - Working Rust and C toolchains. The former can be installed by 164 | following the instructions at . The latter can be 165 | installed via the distribution's package manager. For example, on 166 | Ubuntu, run `sudo apt install build-essential`. 167 | - `jemalloc` and its development headers. For example, on Ubuntu, run 168 | `sudo apt install libjemalloc-dev`. 169 | 170 | Once the prerequisites are installed, the library can be built by 171 | running `make capi`. There are three files of 172 | interest: 173 | 174 | - The library itself, produced at 175 | `target/release/libjemalloc_pprof.so` 176 | - A header file, at `capi/include/jemalloc_pprof.h` 177 | - A manual page, at `capi/man/jemalloc_pprof.3`. 178 | 179 | The procedure for installing and using these files depends on your 180 | distribution and build system. 181 | 182 | ### Use 183 | 184 | Ensure that your binaries link against both jemalloc and 185 | jemalloc_pprof by passing the linker flags `-ljemalloc 186 | -ljemalloc_pprof`. The procedure for ensuring that these flags are 187 | passed depends on your build system and is currently outside the scope 188 | of this document. 189 | 190 | Once that is done, profiling can be enabled either by setting the 191 | `MALLOC_CONF` variable or by defining a symbol called `malloc_conf` in 192 | the binary. For example: 193 | 194 | ```shell 195 | export MALLOC_CONF="prof:true,prof_active:true,lg_prof_sample:19" 196 | ``` 197 | 198 | See the `jemalloc` man page for more details. When profiling is 199 | enabled, a profile may be dumped in pprof format via the 200 | `dump_jemalloc_pprof` function. 201 | 202 | ### Example 203 | 204 | This program allocates between 1 and 10 MiB every 100 milliseconds, 205 | and dumps a profile to the file `my_profile` every 2 seconds. 206 | 207 | ```c 208 | #include 209 | #include 210 | #include 211 | #include 212 | #include 213 | #include 214 | 215 | #include 216 | 217 | void 218 | a() 219 | { 220 | size_t sz = 1 * 1024 * 1024; 221 | char *x = malloc(sz); 222 | for (size_t i = 0; i < sz; ++i) { 223 | x[i] = '\0'; 224 | } 225 | } 226 | 227 | void 228 | b() 229 | { 230 | size_t sz = 2 * 1024 * 1024; 231 | char *x = malloc(sz); 232 | for (size_t i = 0; i < sz; ++i) { 233 | x[i] = '\0'; 234 | } 235 | } 236 | 237 | void 238 | c() 239 | { 240 | size_t sz = 3 * 1024 * 1024; 241 | char *x = malloc(sz); 242 | for (size_t i = 0; i < sz; ++i) { 243 | x[i] = '\0'; 244 | } 245 | } 246 | 247 | void 248 | d() 249 | { 250 | size_t sz = 4 * 1024 * 1024; 251 | char *x = malloc(sz); 252 | for (size_t i = 0; i < sz; ++i) { 253 | x[i] = '\0'; 254 | } 255 | } 256 | 257 | void 258 | e() 259 | { 260 | size_t sz = 5 * 1024 * 1024; 261 | char *x = malloc(sz); 262 | for (size_t i = 0; i < sz; ++i) { 263 | x[i] = '\0'; 264 | } 265 | } 266 | 267 | void 268 | f() 269 | { 270 | size_t sz = 6 * 1024 * 1024; 271 | char *x = malloc(sz); 272 | for (size_t i = 0; i < sz; ++i) { 273 | x[i] = '\0'; 274 | } 275 | } 276 | 277 | void 278 | g() 279 | { 280 | size_t sz = 7 * 1024 * 1024; 281 | char *x = malloc(sz); 282 | for (size_t i = 0; i < sz; ++i) { 283 | x[i] = '\0'; 284 | } 285 | } 286 | 287 | void 288 | h() 289 | { 290 | size_t sz = 8 * 1024 * 1024; 291 | char *x = malloc(sz); 292 | for (size_t i = 0; i < sz; ++i) { 293 | x[i] = '\0'; 294 | } 295 | } 296 | 297 | void 298 | j() 299 | { 300 | size_t sz = 9 * 1024 * 1024; 301 | char *x = malloc(sz); 302 | for (size_t i = 0; i < sz; ++i) { 303 | x[i] = '\0'; 304 | } 305 | } 306 | 307 | void 308 | k() 309 | { 310 | size_t sz = 10 * 1024 * 1024; 311 | char *x = malloc(sz); 312 | for (size_t i = 0; i < sz; ++i) { 313 | x[i] = '\0'; 314 | } 315 | } 316 | 317 | void * 318 | repeatedly_dump(void *ignored) 319 | { 320 | char *buf; 321 | size_t len = 0; 322 | int result; 323 | for (;;) { 324 | sleep(2); 325 | result = dump_jemalloc_pprof(&buf, &len); 326 | if (result != JP_SUCCESS) { 327 | fprintf(stderr, "errno: %d\n", errno); 328 | continue; 329 | } 330 | if (buf) { 331 | FILE *file = fopen("my_profile", "w"); 332 | assert(file); 333 | 334 | fwrite(buf, sizeof(char), len, file); 335 | fclose(file); 336 | printf("dumped pprof of size %lu\n", len); 337 | free(buf); 338 | } 339 | } 340 | return NULL; 341 | } 342 | 343 | int 344 | main() 345 | { 346 | pthread_t tid; 347 | int result; 348 | 349 | result = pthread_create(&tid, NULL, repeatedly_dump, NULL); 350 | assert(!result); 351 | for (;;) { 352 | usleep(100000); 353 | switch (rand() % 10) { 354 | case 0: 355 | a(); 356 | break; 357 | case 1: 358 | b(); 359 | break; 360 | case 2: 361 | c(); 362 | break; 363 | case 3: 364 | d(); 365 | break; 366 | case 4: 367 | e(); 368 | break; 369 | case 5: 370 | f(); 371 | break; 372 | case 6: 373 | g(); 374 | break; 375 | case 7: 376 | h(); 377 | break; 378 | case 8: 379 | j(); 380 | break; 381 | case 9: 382 | k(); 383 | break; 384 | } 385 | } 386 | } 387 | ``` 388 | -------------------------------------------------------------------------------- /buf.gen.yaml: -------------------------------------------------------------------------------- 1 | version: v1 2 | managed: 3 | enabled: true 4 | 5 | plugins: 6 | - plugin: buf.build/community/neoeinstein-prost:v0.2.3 7 | out: src 8 | -------------------------------------------------------------------------------- /buf.work.yaml: -------------------------------------------------------------------------------- 1 | version: v1 2 | directories: 3 | - proto 4 | -------------------------------------------------------------------------------- /capi/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "capi" 3 | version = "0.7.0" 4 | edition = "2021" 5 | 6 | [lib] 7 | crate-type = ["cdylib"] 8 | name = "jemalloc_pprof" 9 | 10 | [dependencies] 11 | libc.workspace = true 12 | util.workspace = true 13 | anyhow.workspace = true 14 | tempfile.workspace = true 15 | mappings.workspace = true 16 | errno.workspace = true 17 | -------------------------------------------------------------------------------- /capi/include/jemalloc_pprof.h: -------------------------------------------------------------------------------- 1 | #ifndef JEMALLOC_PPROF_H 2 | #define JEMALLOC_PPROF_H 3 | 4 | #include 5 | 6 | #define JP_SUCCESS 0 7 | #define JP_FAILURE 1 8 | 9 | #ifdef __cplusplus 10 | extern "C" 11 | #endif 12 | int dump_jemalloc_pprof(char **buf_out, size_t *n_out); 13 | 14 | #endif // include guard 15 | -------------------------------------------------------------------------------- /capi/man/dump_jemalloc_pprof.3: -------------------------------------------------------------------------------- 1 | .TH dump_jemalloc_pprof 3 "rust-jemalloc-pprof manual" 2 | .SH NAME 3 | dump_jemalloc_pprof \- dump the current jemalloc allocation profile in pprof format 4 | 5 | .SH SYNOPSIS 6 | \fB#include \fR 7 | 8 | \fBint dump_jemalloc_pprof(\fR\fIunsigned char **buf_out, size_t *n_out\fR\fB);\fR 9 | 10 | .SH DESCRIPTION 11 | The \fBdump_jemalloc_pprof()\fR function is intended to be called from C code to dump the current jemalloc allocation profile in pprof format. It allocates a buffer and stores a pointer to this buffer in \fI*buf_out\fR. The size of the buffer is stored in \fI*n_out\fR. The function returns \fIJP_SUCCESS\fR if the operation succeeds or \fIJP_FAILURE\fR if it fails. In the case of failure, an error code may be stored in \fIerrno\fR if meaningful. 12 | 13 | If \fIJP_FAILURE\fR is returned, the values pointed to by \fIbuf_out\fR and \fIn_out\fR are unspecified. 14 | 15 | This function requires jemalloc profiling to be enabled and active. For more information, see \fBjemalloc(3)\fR. 16 | 17 | .SH SAFETY 18 | This function is marked as unsafe and should not be called directly from Rust code. A corresponding Rust API should be used instead. 19 | 20 | .SH "RETURN VALUES" 21 | .TP 22 | \fBJP_SUCCESS\fR 23 | Indicates that the operation succeeded. 24 | .TP 25 | \fBJP_FAILURE\fR 26 | Indicates that the operation failed. The specific error code can be found in \fIerrno\fR. 27 | 28 | .SH "SEE ALSO" 29 | \fBjemalloc(3)\fR, \fBerrno(3)\fR 30 | 31 | .SH AUTHOR 32 | This library was written by the Polar Signals team. See 33 | https://polarsignals.com for more information. 34 | -------------------------------------------------------------------------------- /capi/src/lib.rs: -------------------------------------------------------------------------------- 1 | use std::ffi::CString; 2 | use std::io::BufReader; 3 | use std::mem::size_of_val; 4 | use std::os::unix::ffi::OsStrExt; 5 | use std::ptr::null_mut; 6 | 7 | use errno::{set_errno, Errno}; 8 | use libc::{c_char, c_int, c_void, size_t}; 9 | use mappings::MAPPINGS; 10 | use tempfile::NamedTempFile; 11 | use util::parse_jeheap; 12 | 13 | pub const JP_SUCCESS: c_int = 0; 14 | pub const JP_FAILURE: c_int = -1; 15 | 16 | #[link(name = "jemalloc")] 17 | extern "C" { 18 | // int mallctl(const char *name, void *oldp, size_t *oldlenp, void *newp, size_t newlen); 19 | fn mallctl( 20 | name: *const c_char, 21 | oldp: *mut c_void, 22 | oldlenp: *mut size_t, 23 | newp: *mut c_void, 24 | newlen: size_t, 25 | ) -> c_int; 26 | } 27 | 28 | enum Error { 29 | Io(std::io::Error), 30 | Mallctl(c_int), 31 | ParseProfile(), 32 | } 33 | 34 | impl From for Error { 35 | fn from(e: std::io::Error) -> Self { 36 | Self::Io(e) 37 | } 38 | } 39 | 40 | fn dump_pprof_inner() -> Result, Error> { 41 | let f = NamedTempFile::new()?; 42 | let path = CString::new(f.path().as_os_str().as_bytes().to_vec()).unwrap(); 43 | // SAFETY: "prof.dump" is documented as being writable and taking a C string as input: 44 | // http://jemalloc.net/jemalloc.3.html#prof.dump 45 | let pp = (&mut path.as_ptr()) as *mut _ as *mut _; 46 | let ret = unsafe { 47 | mallctl( 48 | b"prof.dump\0" as *const _ as *const c_char, 49 | null_mut(), 50 | null_mut(), 51 | pp, 52 | size_of_val(&pp), 53 | ) 54 | }; 55 | if ret != 0 { 56 | return Err(Error::Mallctl(ret)); 57 | } 58 | 59 | let dump_reader = BufReader::new(f); 60 | let profile = 61 | parse_jeheap(dump_reader, MAPPINGS.as_deref()).map_err(|_| Error::ParseProfile())?; 62 | let pprof = profile.to_pprof(("inuse_space", "bytes"), ("space", "bytes"), None); 63 | Ok(pprof) 64 | } 65 | 66 | /// Dump the current jemalloc heap profile in pprof format. 67 | /// 68 | /// This is intended to be called from C. A buffer is allocated 69 | /// and a pointer to it is stored in `buf_out`; its size is stored in 70 | /// `n_out`. [`JP_FAILURE`] or [`JP_SUCCESS`] is returned according to whether 71 | /// the operation succeeded or failed; an error code is stored in `errno` if it 72 | /// is meaningful to do so. 73 | /// 74 | /// If `JP_FAILURE` is returned, the values pointed to by `buf_out` and `n_out` 75 | /// are unspecified. 76 | /// 77 | /// # Safety 78 | /// 79 | /// You probably don't want to call this from Rust. 80 | /// Use the Rust API instead. 81 | #[no_mangle] 82 | pub unsafe extern "C" fn dump_jemalloc_pprof(buf_out: *mut *mut u8, n_out: *mut size_t) -> c_int { 83 | let buf = match dump_pprof_inner() { 84 | Ok(buf) => buf, 85 | Err(Error::Io(e)) if e.raw_os_error().is_some() => { 86 | set_errno(Errno(e.raw_os_error().unwrap())); 87 | return JP_FAILURE; 88 | } 89 | Err(Error::Mallctl(i)) => { 90 | set_errno(Errno(i)); 91 | return JP_FAILURE; 92 | } 93 | // TODO - maybe some of these can have errnos 94 | Err(_) => { 95 | return JP_FAILURE; 96 | } 97 | }; 98 | 99 | // Disable clippy warning. 100 | // usize is defined to be the same as uintptr_t (AKA have the same representation as a pointer), 101 | // which is different from size_t, which is the maximum size of an array. 102 | // This is not usually an issue, except on some platforms like CHERI which store extra information in the pointer. 103 | // On those platforms, usize will be 128 bits, while size_t is 64 bit. 104 | #[allow(clippy::useless_conversion)] 105 | let len: size_t = buf.len().try_into().expect("absurd length"); 106 | let p = if len > 0 { 107 | // leak is ok, consumer is responsible for freeing 108 | buf.leak().as_mut_ptr() 109 | } else { 110 | null_mut() 111 | }; 112 | unsafe { 113 | if !buf_out.is_null() { 114 | std::ptr::write(buf_out, p); 115 | } 116 | if !n_out.is_null() { 117 | std::ptr::write(n_out, len); 118 | } 119 | } 120 | JP_SUCCESS 121 | } 122 | -------------------------------------------------------------------------------- /example/.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | -------------------------------------------------------------------------------- /example/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "rust-jemalloc-pprof-example" 3 | version = "0.1.0" 4 | edition = "2021" 5 | publish = false 6 | 7 | [dependencies] 8 | jemalloc_pprof = { path = ".." } 9 | tokio = { version = "1", features = ["full"] } 10 | axum = "0.7.2" 11 | [target.'cfg(not(target_env = "msvc"))'.dependencies] 12 | tikv-jemallocator = { version = "0.6", features = ["profiling", "stats", "unprefixed_malloc_on_supported_platforms", "background_threads"] } 13 | 14 | [features] 15 | flamegraph = ["jemalloc_pprof/flamegraph"] -------------------------------------------------------------------------------- /example/src/main.rs: -------------------------------------------------------------------------------- 1 | use axum::http::StatusCode; 2 | use axum::response::IntoResponse; 3 | 4 | #[cfg(not(target_env = "msvc"))] 5 | #[global_allocator] 6 | static ALLOC: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc; 7 | 8 | #[allow(non_upper_case_globals)] 9 | #[export_name = "malloc_conf"] 10 | pub static malloc_conf: &[u8] = b"prof:true,prof_active:true,lg_prof_sample:19\0"; 11 | 12 | #[tokio::main] 13 | async fn main() { 14 | let mut v = vec![]; 15 | for i in 0..1000000 { 16 | v.push(i); 17 | } 18 | 19 | let app = axum::Router::new().route("/debug/pprof/heap", axum::routing::get(handle_get_heap)); 20 | 21 | // Add a flamegraph SVG route if enabled via `cargo run -F flamegraph`. 22 | #[cfg(feature = "flamegraph")] 23 | let app = app.route( 24 | "/debug/pprof/heap/flamegraph", 25 | axum::routing::get(handle_get_heap_flamegraph), 26 | ); 27 | 28 | // run our app with hyper, listening globally on port 3000 29 | let listener = tokio::net::TcpListener::bind("0.0.0.0:3000").await.unwrap(); 30 | axum::serve(listener, app).await.unwrap(); 31 | } 32 | 33 | pub async fn handle_get_heap() -> Result { 34 | let mut prof_ctl = jemalloc_pprof::PROF_CTL.as_ref().unwrap().lock().await; 35 | require_profiling_activated(&prof_ctl)?; 36 | let pprof = prof_ctl 37 | .dump_pprof() 38 | .map_err(|err| (StatusCode::INTERNAL_SERVER_ERROR, err.to_string()))?; 39 | Ok(pprof) 40 | } 41 | 42 | #[cfg(feature = "flamegraph")] 43 | pub async fn handle_get_heap_flamegraph() -> Result { 44 | use axum::body::Body; 45 | use axum::http::header::CONTENT_TYPE; 46 | use axum::response::Response; 47 | 48 | let mut prof_ctl = jemalloc_pprof::PROF_CTL.as_ref().unwrap().lock().await; 49 | require_profiling_activated(&prof_ctl)?; 50 | let svg = prof_ctl 51 | .dump_flamegraph() 52 | .map_err(|err| (StatusCode::INTERNAL_SERVER_ERROR, err.to_string()))?; 53 | Response::builder() 54 | .header(CONTENT_TYPE, "image/svg+xml") 55 | .body(Body::from(svg)) 56 | .map_err(|err| (StatusCode::INTERNAL_SERVER_ERROR, err.to_string())) 57 | } 58 | 59 | /// Checks whether jemalloc profiling is activated an returns an error response if not. 60 | fn require_profiling_activated( 61 | prof_ctl: &jemalloc_pprof::JemallocProfCtl, 62 | ) -> Result<(), (axum::http::StatusCode, String)> { 63 | if prof_ctl.activated() { 64 | Ok(()) 65 | } else { 66 | Err(( 67 | axum::http::StatusCode::FORBIDDEN, 68 | "heap profiling not activated".into(), 69 | )) 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /mappings/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "mappings" 3 | version = "0.7.0" 4 | edition = "2021" 5 | description = "Get the mappings of a process (currently only on Linux)" 6 | publish = true 7 | license = "Apache-2.0" 8 | repository = "https://github.com/polarsignals/rust-jemalloc-pprof" 9 | homepage = "https://crates.io/crates/mappings" 10 | 11 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 12 | 13 | [dependencies] 14 | anyhow.workspace = true 15 | libc.workspace = true 16 | once_cell.workspace = true 17 | tracing.workspace = true 18 | util.workspace = true 19 | -------------------------------------------------------------------------------- /mappings/src/lib.rs: -------------------------------------------------------------------------------- 1 | // Copyright Materialize, Inc. and contributors. All rights reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License in the LICENSE file at the 6 | // root of this repository, or online at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | //! Linux-specific process introspection. 17 | 18 | //! Utility crate to extract information about the running process. 19 | //! 20 | //! Currently only works on Linux. 21 | use std::path::PathBuf; 22 | 23 | use once_cell::sync::Lazy; 24 | use tracing::error; 25 | 26 | use util::{BuildId, Mapping}; 27 | 28 | #[cfg(target_os = "linux")] 29 | mod enabled { 30 | use std::ffi::{CStr, OsStr}; 31 | use std::os::unix::ffi::OsStrExt; 32 | use std::path::PathBuf; 33 | use std::str::FromStr; 34 | 35 | use anyhow::Context; 36 | use libc::{ 37 | c_int, c_void, dl_iterate_phdr, dl_phdr_info, size_t, Elf64_Word, PT_LOAD, PT_NOTE, 38 | }; 39 | 40 | use util::{BuildId, CastFrom}; 41 | 42 | use crate::LoadedSegment; 43 | 44 | use super::SharedObject; 45 | 46 | /// Collects information about all shared objects loaded into the current 47 | /// process, including the main program binary as well as all dynamically loaded 48 | /// libraries. Intended to be useful for profilers, who can use this information 49 | /// to symbolize stack traces offline. 50 | /// 51 | /// Uses `dl_iterate_phdr` to walk all shared objects and extract the wanted 52 | /// information from their program headers. 53 | /// 54 | /// SAFETY: This function is written in a hilariously unsafe way: it involves 55 | /// following pointers to random parts of memory, and then assuming that 56 | /// particular structures can be found there. However, it was written by 57 | /// carefully reading `man dl_iterate_phdr` and `man elf`, and is thus intended 58 | /// to be relatively safe for callers to use. Assuming I haven't written any 59 | /// bugs (and that the documentation is correct), the only known safety 60 | /// requirements are: 61 | /// 62 | /// (1) The running binary must be in ELF format and running on Linux. 63 | pub unsafe fn collect_shared_objects() -> Result, anyhow::Error> { 64 | let mut state = CallbackState { 65 | result: Ok(Vec::new()), 66 | }; 67 | let state_ptr = std::ptr::addr_of_mut!(state).cast(); 68 | 69 | // SAFETY: `dl_iterate_phdr` has no documented restrictions on when 70 | // it can be called. 71 | unsafe { dl_iterate_phdr(Some(iterate_cb), state_ptr) }; 72 | 73 | state.result 74 | } 75 | 76 | struct CallbackState { 77 | result: Result, anyhow::Error>, 78 | } 79 | 80 | impl CallbackState { 81 | fn is_first(&self) -> bool { 82 | match &self.result { 83 | Ok(v) => v.is_empty(), 84 | Err(_) => false, 85 | } 86 | } 87 | } 88 | 89 | const CB_RESULT_OK: c_int = 0; 90 | const CB_RESULT_ERROR: c_int = -1; 91 | 92 | unsafe extern "C" fn iterate_cb( 93 | info: *mut dl_phdr_info, 94 | _size: size_t, 95 | data: *mut c_void, 96 | ) -> c_int { 97 | let state: *mut CallbackState = data.cast(); 98 | 99 | // SAFETY: `data` is a pointer to a `CallbackState`, and no mutable reference 100 | // aliases with it in Rust. Furthermore, `dl_iterate_phdr` doesn't do anything 101 | // with `data` other than pass it to this callback, so nothing will be mutating 102 | // the object it points to while we're inside here. 103 | assert_pointer_valid(state); 104 | let state = unsafe { state.as_mut() }.expect("pointer is valid"); 105 | 106 | // SAFETY: similarly, `dl_iterate_phdr` isn't mutating `info` 107 | // while we're here. 108 | assert_pointer_valid(info); 109 | let info = unsafe { info.as_ref() }.expect("pointer is valid"); 110 | 111 | let base_address = usize::cast_from(info.dlpi_addr); 112 | 113 | let path_name = if state.is_first() { 114 | // From `man dl_iterate_phdr`: 115 | // "The first object visited by callback is the main program. For the main 116 | // program, the dlpi_name field will be an empty string." 117 | match current_exe().context("failed to read the name of the current executable") { 118 | Ok(pb) => pb, 119 | Err(e) => { 120 | // Profiles will be of dubious usefulness 121 | // if we can't get the build ID for the main executable, 122 | // so just bail here. 123 | state.result = Err(e); 124 | return CB_RESULT_ERROR; 125 | } 126 | } 127 | } else if info.dlpi_name.is_null() { 128 | // This would be unexpected, but let's handle this case gracefully by skipping this object. 129 | return CB_RESULT_OK; 130 | } else { 131 | // SAFETY: `dl_iterate_phdr` documents this as being a null-terminated string. 132 | assert_pointer_valid(info.dlpi_name); 133 | let name = unsafe { CStr::from_ptr(info.dlpi_name) }; 134 | 135 | OsStr::from_bytes(name.to_bytes()).into() 136 | }; 137 | 138 | // Walk the headers of this image, looking for `PT_LOAD` and `PT_NOTE` segments. 139 | let mut loaded_segments = Vec::new(); 140 | let mut build_id = None; 141 | 142 | // SAFETY: `dl_iterate_phdr` is documented as setting `dlpi_phnum` to the 143 | // length of the array pointed to by `dlpi_phdr`. 144 | assert_pointer_valid(info.dlpi_phdr); 145 | let program_headers = 146 | unsafe { std::slice::from_raw_parts(info.dlpi_phdr, info.dlpi_phnum.into()) }; 147 | 148 | for ph in program_headers { 149 | if ph.p_type == PT_LOAD { 150 | loaded_segments.push(LoadedSegment { 151 | file_offset: u64::cast_from(ph.p_offset), 152 | memory_offset: usize::cast_from(ph.p_vaddr), 153 | memory_size: usize::cast_from(ph.p_memsz), 154 | }); 155 | } else if ph.p_type == PT_NOTE { 156 | // From `man elf`: 157 | // typedef struct { 158 | // Elf64_Word n_namesz; 159 | // Elf64_Word n_descsz; 160 | // Elf64_Word n_type; 161 | // } Elf64_Nhdr; 162 | #[repr(C)] 163 | struct NoteHeader { 164 | n_namesz: Elf64_Word, 165 | n_descsz: Elf64_Word, 166 | n_type: Elf64_Word, 167 | } 168 | // This is how `man dl_iterate_phdr` says to find the 169 | // segment headers in memory. 170 | // 171 | // Note - it seems on some old 172 | // versions of Linux (I observed it on CentOS 7), 173 | // `p_vaddr` can be negative, so we use wrapping add here 174 | let mut offset = usize::cast_from(ph.p_vaddr.wrapping_add(info.dlpi_addr)); 175 | let orig_offset = offset; 176 | 177 | const NT_GNU_BUILD_ID: Elf64_Word = 3; 178 | const GNU_NOTE_NAME: &[u8; 4] = b"GNU\0"; 179 | const ELF_NOTE_STRING_ALIGN: usize = 4; 180 | 181 | while offset + std::mem::size_of::() + GNU_NOTE_NAME.len() 182 | <= orig_offset + usize::cast_from(ph.p_memsz) 183 | { 184 | // Justification: Our logic for walking this header 185 | // follows exactly the code snippet in the 186 | // `Notes (Nhdr)` section of `man elf`, 187 | // so `offset` will always point to a `NoteHeader` 188 | // (called `Elf64_Nhdr` in that document) 189 | #[allow(clippy::as_conversions)] 190 | let nh_ptr = offset as *const NoteHeader; 191 | 192 | // SAFETY: Iterating according to the `Notes (Nhdr)` 193 | // section of `man elf` ensures that this pointer is 194 | // aligned. The offset check above ensures that it 195 | // is in-bounds. 196 | assert_pointer_valid(nh_ptr); 197 | let nh = unsafe { nh_ptr.as_ref() }.expect("pointer is valid"); 198 | 199 | // from elf.h 200 | if nh.n_type == NT_GNU_BUILD_ID 201 | && nh.n_descsz != 0 202 | && usize::cast_from(nh.n_namesz) == GNU_NOTE_NAME.len() 203 | { 204 | // Justification: since `n_namesz` is 4, the name is a four-byte value. 205 | #[allow(clippy::as_conversions)] 206 | let p_name = (offset + std::mem::size_of::()) as *const [u8; 4]; 207 | 208 | // SAFETY: since `n_namesz` is 4, the name is a four-byte value. 209 | assert_pointer_valid(p_name); 210 | let name = unsafe { p_name.as_ref() }.expect("pointer is valid"); 211 | 212 | if name == GNU_NOTE_NAME { 213 | // We found what we're looking for! 214 | // Justification: simple pointer arithmetic 215 | #[allow(clippy::as_conversions)] 216 | let p_desc = (p_name as usize + 4) as *const u8; 217 | 218 | // SAFETY: This is the documented meaning of `n_descsz`. 219 | assert_pointer_valid(p_desc); 220 | let desc = unsafe { 221 | std::slice::from_raw_parts(p_desc, usize::cast_from(nh.n_descsz)) 222 | }; 223 | 224 | build_id = Some(BuildId(desc.to_vec())); 225 | break; 226 | } 227 | } 228 | offset = offset 229 | + std::mem::size_of::() 230 | + align_up::(usize::cast_from(nh.n_namesz)) 231 | + align_up::(usize::cast_from(nh.n_descsz)); 232 | } 233 | } 234 | } 235 | 236 | let objects = state.result.as_mut().expect("we return early on errors"); 237 | objects.push(SharedObject { 238 | base_address, 239 | path_name, 240 | build_id, 241 | loaded_segments, 242 | }); 243 | 244 | CB_RESULT_OK 245 | } 246 | 247 | /// Increases `p` as little as possible (including possibly 0) 248 | /// such that it becomes a multiple of `N`. 249 | pub const fn align_up(p: usize) -> usize { 250 | if p % N == 0 { 251 | p 252 | } else { 253 | p + (N - (p % N)) 254 | } 255 | } 256 | 257 | /// Asserts that the given pointer is valid. 258 | /// 259 | /// # Panics 260 | /// 261 | /// Panics if the given pointer: 262 | /// * is a null pointer 263 | /// * is not properly aligned for `T` 264 | fn assert_pointer_valid(ptr: *const T) { 265 | // No other known way to convert a pointer to `usize`. 266 | #[allow(clippy::as_conversions)] 267 | let address = ptr as usize; 268 | let align = std::mem::align_of::(); 269 | 270 | assert!(!ptr.is_null()); 271 | assert!(address % align == 0, "unaligned pointer"); 272 | } 273 | 274 | fn current_exe_from_dladdr() -> Result { 275 | let progname = unsafe { 276 | let mut dlinfo = std::mem::MaybeUninit::uninit(); 277 | 278 | // This should set the filepath of the current executable 279 | // because it must contain the function pointer of itself. 280 | let ret = libc::dladdr( 281 | current_exe_from_dladdr as *const libc::c_void, 282 | dlinfo.as_mut_ptr(), 283 | ); 284 | if ret == 0 { 285 | anyhow::bail!("dladdr failed"); 286 | } 287 | CStr::from_ptr(dlinfo.assume_init().dli_fname).to_str()? 288 | }; 289 | 290 | Ok(PathBuf::from_str(progname)?) 291 | } 292 | 293 | /// Get the name of the current executable by dladdr and fall back to std::env::current_exe 294 | /// if it fails. Try dladdr first because it returns the actual exe even when it's invoked 295 | /// by ld.so. 296 | fn current_exe() -> Result { 297 | match current_exe_from_dladdr() { 298 | Ok(path) => Ok(path), 299 | Err(e) => { 300 | // when failed to get current exe from dladdr, fall back to the conventional way 301 | std::env::current_exe().context(e) 302 | } 303 | } 304 | } 305 | } 306 | 307 | /// Mappings of the processes' executable and shared libraries. 308 | #[cfg(target_os = "linux")] 309 | pub static MAPPINGS: Lazy>> = Lazy::new(|| { 310 | /// Build a list of mappings for the passed shared objects. 311 | fn build_mappings(objects: &[SharedObject]) -> Vec { 312 | let mut mappings = Vec::new(); 313 | for object in objects { 314 | for segment in &object.loaded_segments { 315 | // I have observed that `memory_offset` can be negative on some very old 316 | // versions of Linux (e.g. CentOS 7), so use wrapping add here. 317 | let memory_start = object.base_address.wrapping_add(segment.memory_offset); 318 | mappings.push(Mapping { 319 | memory_start, 320 | memory_end: memory_start + segment.memory_size, 321 | memory_offset: segment.memory_offset, 322 | file_offset: segment.file_offset, 323 | pathname: object.path_name.clone(), 324 | build_id: object.build_id.clone(), 325 | }); 326 | } 327 | } 328 | mappings 329 | } 330 | 331 | // SAFETY: We are on Linux 332 | match unsafe { enabled::collect_shared_objects() } { 333 | Ok(objects) => Some(build_mappings(&objects)), 334 | Err(err) => { 335 | error!("build ID fetching failed: {err}"); 336 | None 337 | } 338 | } 339 | }); 340 | 341 | #[cfg(not(target_os = "linux"))] 342 | pub static MAPPINGS: Lazy>> = Lazy::new(|| { 343 | error!("build ID fetching is only supported on Linux"); 344 | None 345 | }); 346 | 347 | /// Information about a shared object loaded into the current process. 348 | #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] 349 | pub struct SharedObject { 350 | /// The address at which the object is loaded. 351 | pub base_address: usize, 352 | /// The path of that file the object was loaded from. 353 | pub path_name: PathBuf, 354 | /// The build ID of the object, if found. 355 | pub build_id: Option, 356 | /// Loaded segments of the object. 357 | pub loaded_segments: Vec, 358 | } 359 | 360 | /// A segment of a shared object that's loaded into memory. 361 | #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] 362 | pub struct LoadedSegment { 363 | /// Offset of the segment in the source file. 364 | pub file_offset: u64, 365 | /// Offset to the `SharedObject`'s `base_address`. 366 | pub memory_offset: usize, 367 | /// Size of the segment in memory. 368 | pub memory_size: usize, 369 | } 370 | -------------------------------------------------------------------------------- /proto/google/pprof/profile.proto: -------------------------------------------------------------------------------- 1 | // Copyright 2016 Google Inc. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | // Profile is a common stacktrace profile format. 16 | // 17 | // Measurements represented with this format should follow the 18 | // following conventions: 19 | // 20 | // - Consumers should treat unset optional fields as if they had been 21 | // set with their default value. 22 | // 23 | // - When possible, measurements should be stored in "unsampled" form 24 | // that is most useful to humans. There should be enough 25 | // information present to determine the original sampled values. 26 | // 27 | // - On-disk, the serialized proto must be gzip-compressed. 28 | // 29 | // - The profile is represented as a set of samples, where each sample 30 | // references a sequence of locations, and where each location belongs 31 | // to a mapping. 32 | // - There is a N->1 relationship from sample.location_id entries to 33 | // locations. For every sample.location_id entry there must be a 34 | // unique Location with that id. 35 | // - There is an optional N->1 relationship from locations to 36 | // mappings. For every nonzero Location.mapping_id there must be a 37 | // unique Mapping with that id. 38 | 39 | syntax = "proto3"; 40 | 41 | package perftools.profiles; 42 | 43 | option java_outer_classname = "ProfileProto"; 44 | option java_package = "com.google.perftools.profiles"; 45 | 46 | message Profile { 47 | // A description of the samples associated with each Sample.value. 48 | // For a cpu profile this might be: 49 | // [["cpu","nanoseconds"]] or [["wall","seconds"]] or [["syscall","count"]] 50 | // For a heap profile, this might be: 51 | // [["allocations","count"], ["space","bytes"]], 52 | // If one of the values represents the number of events represented 53 | // by the sample, by convention it should be at index 0 and use 54 | // sample_type.unit == "count". 55 | repeated ValueType sample_type = 1; 56 | // The set of samples recorded in this profile. 57 | repeated Sample sample = 2; 58 | // Mapping from address ranges to the image/binary/library mapped 59 | // into that address range. mapping[0] will be the main binary. 60 | repeated Mapping mapping = 3; 61 | // Useful program location 62 | repeated Location location = 4; 63 | // Functions referenced by locations 64 | repeated Function function = 5; 65 | // A common table for strings referenced by various messages. 66 | // string_table[0] must always be "". 67 | repeated string string_table = 6; 68 | // frames with Function.function_name fully matching the following 69 | // regexp will be dropped from the samples, along with their successors. 70 | int64 drop_frames = 7; // Index into string table. 71 | // frames with Function.function_name fully matching the following 72 | // regexp will be kept, even if it matches drop_frames. 73 | int64 keep_frames = 8; // Index into string table. 74 | 75 | // The following fields are informational, do not affect 76 | // interpretation of results. 77 | 78 | // Time of collection (UTC) represented as nanoseconds past the epoch. 79 | int64 time_nanos = 9; 80 | // Duration of the profile, if a duration makes sense. 81 | int64 duration_nanos = 10; 82 | // The kind of events between sampled ocurrences. 83 | // e.g [ "cpu","cycles" ] or [ "heap","bytes" ] 84 | ValueType period_type = 11; 85 | // The number of events between sampled occurrences. 86 | int64 period = 12; 87 | // Freeform text associated to the profile. 88 | repeated int64 comment = 13; // Indices into string table. 89 | // Index into the string table of the type of the preferred sample 90 | // value. If unset, clients should default to the last sample value. 91 | int64 default_sample_type = 14; 92 | } 93 | 94 | // ValueType describes the semantics and measurement units of a value. 95 | message ValueType { 96 | int64 type = 1; // Index into string table. 97 | int64 unit = 2; // Index into string table. 98 | } 99 | 100 | // Each Sample records values encountered in some program 101 | // context. The program context is typically a stack trace, perhaps 102 | // augmented with auxiliary information like the thread-id, some 103 | // indicator of a higher level request being handled etc. 104 | message Sample { 105 | // The ids recorded here correspond to a Profile.location.id. 106 | // The leaf is at location_id[0]. 107 | repeated uint64 location_id = 1; 108 | // The type and unit of each value is defined by the corresponding 109 | // entry in Profile.sample_type. All samples must have the same 110 | // number of values, the same as the length of Profile.sample_type. 111 | // When aggregating multiple samples into a single sample, the 112 | // result has a list of values that is the element-wise sum of the 113 | // lists of the originals. 114 | repeated int64 value = 2; 115 | // label includes additional context for this sample. It can include 116 | // things like a thread id, allocation size, etc 117 | repeated Label label = 3; 118 | } 119 | 120 | message Label { 121 | int64 key = 1; // Index into string table 122 | 123 | // At most one of the following must be present 124 | int64 str = 2; // Index into string table 125 | int64 num = 3; 126 | 127 | // Should only be present when num is present. 128 | // Specifies the units of num. 129 | // Use arbitrary string (for example, "requests") as a custom count unit. 130 | // If no unit is specified, consumer may apply heuristic to deduce the unit. 131 | // Consumers may also interpret units like "bytes" and "kilobytes" as memory 132 | // units and units like "seconds" and "nanoseconds" as time units, 133 | // and apply appropriate unit conversions to these. 134 | int64 num_unit = 4; // Index into string table 135 | } 136 | 137 | message Mapping { 138 | // Unique nonzero id for the mapping. 139 | uint64 id = 1; 140 | // Address at which the binary (or DLL) is loaded into memory. 141 | uint64 memory_start = 2; 142 | // The limit of the address range occupied by this mapping. 143 | uint64 memory_limit = 3; 144 | // Offset in the binary that corresponds to the first mapped address. 145 | uint64 file_offset = 4; 146 | // The object this entry is loaded from. This can be a filename on 147 | // disk for the main binary and shared libraries, or virtual 148 | // abstractions like "[vdso]". 149 | int64 filename = 5; // Index into string table 150 | // A string that uniquely identifies a particular program version 151 | // with high probability. E.g., for binaries generated by GNU tools, 152 | // it could be the contents of the .note.gnu.build-id field. 153 | int64 build_id = 6; // Index into string table 154 | 155 | // The following fields indicate the resolution of symbolic info. 156 | bool has_functions = 7; 157 | bool has_filenames = 8; 158 | bool has_line_numbers = 9; 159 | bool has_inline_frames = 10; 160 | } 161 | 162 | // Describes function and line table debug information. 163 | message Location { 164 | // Unique nonzero id for the location. A profile could use 165 | // instruction addresses or any integer sequence as ids. 166 | uint64 id = 1; 167 | // The id of the corresponding profile.Mapping for this location. 168 | // It can be unset if the mapping is unknown or not applicable for 169 | // this profile type. 170 | uint64 mapping_id = 2; 171 | // The instruction address for this location, if available. It 172 | // should be within [Mapping.memory_start...Mapping.memory_limit] 173 | // for the corresponding mapping. A non-leaf address may be in the 174 | // middle of a call instruction. It is up to display tools to find 175 | // the beginning of the instruction if necessary. 176 | uint64 address = 3; 177 | // Multiple line indicates this location has inlined functions, 178 | // where the last entry represents the caller into which the 179 | // preceding entries were inlined. 180 | // 181 | // E.g., if memcpy() is inlined into printf: 182 | // line[0].function_name == "memcpy" 183 | // line[1].function_name == "printf" 184 | repeated Line line = 4; 185 | // Provides an indication that multiple symbols map to this location's 186 | // address, for example due to identical code folding by the linker. In that 187 | // case the line information above represents one of the multiple 188 | // symbols. This field must be recomputed when the symbolization state of the 189 | // profile changes. 190 | bool is_folded = 5; 191 | } 192 | 193 | message Line { 194 | // The id of the corresponding profile.Function for this line. 195 | uint64 function_id = 1; 196 | // Line number in source code. 197 | int64 line = 2; 198 | } 199 | 200 | message Function { 201 | // Unique nonzero id for the function. 202 | uint64 id = 1; 203 | // Name of the function, in human-readable form if available. 204 | int64 name = 2; // Index into string table 205 | // Name of the function, as identified by the system. 206 | // For instance, it can be a C++ mangled name. 207 | int64 system_name = 3; // Index into string table 208 | // Source file containing the function. 209 | int64 filename = 4; // Index into string table 210 | // Line number in source file. 211 | int64 start_line = 5; 212 | } 213 | -------------------------------------------------------------------------------- /shell.nix: -------------------------------------------------------------------------------- 1 | /* 2 | based on 3 | https://discourse.nixos.org/t/how-can-i-set-up-my-rust-programming-environment/4501/9 4 | */ 5 | let 6 | rust_overlay = import (builtins.fetchTarball "https://github.com/oxalica/rust-overlay/archive/master.tar.gz"); 7 | pkgs = import { overlays = [ rust_overlay ]; }; 8 | rustVersion = "1.74.1"; 9 | rust = pkgs.rust-bin.stable.${rustVersion}.default.override { 10 | extensions = [ 11 | "rust-src" # for rust-analyzer 12 | ]; 13 | }; 14 | in 15 | pkgs.mkShell { 16 | buildInputs = [ 17 | rust 18 | ] ++ (with pkgs; [ 19 | rust-analyzer 20 | pkg-config 21 | ]); 22 | RUST_BACKTRACE = 1; 23 | } 24 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | // Copyright Materialize, Inc. and contributors. All rights reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License in the LICENSE file at the 6 | // root of this repository, or online at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | #![doc = include_str!("../README.md")] 17 | 18 | use std::ffi::CString; 19 | 20 | use std::io::BufReader; 21 | use std::sync::Arc; 22 | use std::time::Instant; 23 | 24 | use libc::size_t; 25 | use once_cell::sync::Lazy; 26 | 27 | use mappings::MAPPINGS; 28 | use tempfile::NamedTempFile; 29 | use tikv_jemalloc_ctl::raw; 30 | use tokio::sync::Mutex; 31 | 32 | #[cfg(feature = "flamegraph")] 33 | pub use util::FlamegraphOptions; 34 | use util::{parse_jeheap, ProfStartTime}; 35 | 36 | /// Activate jemalloc profiling. 37 | pub async fn activate_jemalloc_profiling() { 38 | let Some(ctl) = PROF_CTL.as_ref() else { 39 | tracing::warn!("jemalloc profiling is disabled and cannot be activated"); 40 | return; 41 | }; 42 | 43 | let mut ctl = ctl.lock().await; 44 | if ctl.activated() { 45 | return; 46 | } 47 | 48 | match ctl.activate() { 49 | Ok(()) => tracing::info!("jemalloc profiling activated"), 50 | Err(err) => tracing::warn!("could not activate jemalloc profiling: {err}"), 51 | } 52 | } 53 | 54 | /// Deactivate jemalloc profiling. 55 | pub async fn deactivate_jemalloc_profiling() { 56 | let Some(ctl) = PROF_CTL.as_ref() else { 57 | return; // jemalloc not enabled 58 | }; 59 | 60 | let mut ctl = ctl.lock().await; 61 | if !ctl.activated() { 62 | return; 63 | } 64 | 65 | match ctl.deactivate() { 66 | Ok(()) => tracing::info!("jemalloc profiling deactivated"), 67 | Err(err) => tracing::warn!("could not deactivate jemalloc profiling: {err}"), 68 | } 69 | } 70 | 71 | /// Per-process singleton for controlling jemalloc profiling. 72 | pub static PROF_CTL: Lazy>>> = 73 | Lazy::new(|| JemallocProfCtl::get().map(|ctl| Arc::new(Mutex::new(ctl)))); 74 | 75 | /// Metadata about a jemalloc heap profiler. 76 | #[derive(Copy, Clone, Debug)] 77 | pub struct JemallocProfMetadata { 78 | pub start_time: Option, 79 | } 80 | 81 | /// A handle to control jemalloc profiling. 82 | #[derive(Debug)] 83 | pub struct JemallocProfCtl { 84 | md: JemallocProfMetadata, 85 | } 86 | 87 | impl JemallocProfCtl { 88 | // Creates and returns the global singleton. 89 | fn get() -> Option { 90 | // SAFETY: "opt.prof" is documented as being readable and returning a bool: 91 | // http://jemalloc.net/jemalloc.3.html#opt.prof 92 | let prof_enabled: bool = unsafe { raw::read(b"opt.prof\0") }.unwrap(); 93 | if prof_enabled { 94 | // SAFETY: "opt.prof_active" is documented as being readable and returning a bool: 95 | // http://jemalloc.net/jemalloc.3.html#opt.prof_active 96 | let prof_active: bool = unsafe { raw::read(b"opt.prof_active\0") }.unwrap(); 97 | let start_time = if prof_active { 98 | Some(ProfStartTime::TimeImmemorial) 99 | } else { 100 | None 101 | }; 102 | let md = JemallocProfMetadata { start_time }; 103 | Some(Self { md }) 104 | } else { 105 | None 106 | } 107 | } 108 | 109 | /// Returns the base 2 logarithm of the sample rate (average interval, in bytes, between allocation samples). 110 | pub fn lg_sample(&self) -> size_t { 111 | // SAFETY: "prof.lg_sample" is documented as being readable and returning size_t: 112 | // https://jemalloc.net/jemalloc.3.html#opt.lg_prof_sample 113 | unsafe { raw::read(b"prof.lg_sample\0") }.unwrap() 114 | } 115 | 116 | /// Returns the metadata of the profiler. 117 | pub fn get_md(&self) -> JemallocProfMetadata { 118 | self.md 119 | } 120 | 121 | /// Returns whether the profiler is active. 122 | pub fn activated(&self) -> bool { 123 | self.md.start_time.is_some() 124 | } 125 | 126 | /// Activate the profiler and if unset, set the start time to the current time. 127 | pub fn activate(&mut self) -> Result<(), tikv_jemalloc_ctl::Error> { 128 | // SAFETY: "prof.active" is documented as being writable and taking a bool: 129 | // http://jemalloc.net/jemalloc.3.html#prof.active 130 | unsafe { raw::write(b"prof.active\0", true) }?; 131 | if self.md.start_time.is_none() { 132 | self.md.start_time = Some(ProfStartTime::Instant(Instant::now())); 133 | } 134 | Ok(()) 135 | } 136 | 137 | /// Deactivate the profiler. 138 | pub fn deactivate(&mut self) -> Result<(), tikv_jemalloc_ctl::Error> { 139 | // SAFETY: "prof.active" is documented as being writable and taking a bool: 140 | // http://jemalloc.net/jemalloc.3.html#prof.active 141 | unsafe { raw::write(b"prof.active\0", false) }?; 142 | let rate = self.lg_sample(); 143 | // SAFETY: "prof.reset" is documented as being writable and taking a size_t: 144 | // http://jemalloc.net/jemalloc.3.html#prof.reset 145 | unsafe { raw::write(b"prof.reset\0", rate) }?; 146 | 147 | self.md.start_time = None; 148 | Ok(()) 149 | } 150 | 151 | /// Dump a profile into a temporary file and return it. 152 | pub fn dump(&mut self) -> anyhow::Result { 153 | let f = NamedTempFile::new()?; 154 | let path = CString::new(f.path().as_os_str().as_encoded_bytes()).unwrap(); 155 | 156 | // SAFETY: "prof.dump" is documented as being writable and taking a C string as input: 157 | // http://jemalloc.net/jemalloc.3.html#prof.dump 158 | unsafe { raw::write(b"prof.dump\0", path.as_ptr()) }?; 159 | Ok(f.into_file()) 160 | } 161 | 162 | /// Dump a profile in pprof format (gzipped protobuf) and 163 | /// return a buffer with its contents. 164 | pub fn dump_pprof(&mut self) -> anyhow::Result> { 165 | let f = self.dump()?; 166 | let dump_reader = BufReader::new(f); 167 | let profile = parse_jeheap(dump_reader, MAPPINGS.as_deref())?; 168 | let pprof = profile.to_pprof(("inuse_space", "bytes"), ("space", "bytes"), None); 169 | Ok(pprof) 170 | } 171 | 172 | /// Dump a profile flamegraph in SVG format. 173 | #[cfg(feature = "flamegraph")] 174 | pub fn dump_flamegraph(&mut self) -> anyhow::Result> { 175 | let mut opts = FlamegraphOptions::default(); 176 | opts.title = "inuse_space".to_string(); 177 | opts.count_name = "bytes".to_string(); 178 | self.dump_flamegraph_with_options(&mut opts) 179 | } 180 | 181 | /// Dump a profile flamegraph in SVG format with the given options. 182 | #[cfg(feature = "flamegraph")] 183 | pub fn dump_flamegraph_with_options( 184 | &mut self, 185 | opts: &mut FlamegraphOptions, 186 | ) -> anyhow::Result> { 187 | let f = self.dump()?; 188 | let dump_reader = BufReader::new(f); 189 | let profile = parse_jeheap(dump_reader, MAPPINGS.as_deref())?; 190 | profile.to_flamegraph(opts) 191 | } 192 | } 193 | -------------------------------------------------------------------------------- /util/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "pprof_util" 3 | version = "0.7.0" 4 | edition = "2021" 5 | description = "various utilities for representing and manipulating profiling data" 6 | publish = true 7 | license = "Apache-2.0" 8 | repository = "https://github.com/polarsignals/rust-jemalloc-pprof" 9 | homepage = "https://crates.io/crates/pprof_util" 10 | 11 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 12 | 13 | [dependencies] 14 | flate2.workspace = true 15 | prost.workspace = true 16 | anyhow.workspace = true 17 | num.workspace = true 18 | paste.workspace = true 19 | backtrace = { workspace = true, optional = true } 20 | inferno = { workspace = true, optional = true } 21 | 22 | [features] 23 | flamegraph = ["symbolize", "dep:inferno"] 24 | symbolize = ["dep:backtrace"] 25 | -------------------------------------------------------------------------------- /util/src/cast.rs: -------------------------------------------------------------------------------- 1 | // Copyright Materialize, Inc. and contributors. All rights reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License in the LICENSE file at the 6 | // root of this repository, or online at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | //! Cast utilities. 17 | 18 | use num::traits::bounds::UpperBounded; 19 | use num::Signed; 20 | use std::error::Error; 21 | use std::fmt; 22 | use std::ops::Deref; 23 | 24 | /// A trait for safe, simple, and infallible casts. 25 | /// 26 | /// `CastFrom` is like [`std::convert::From`], but it is implemented for some 27 | /// platform-specific casts that are missing from the standard library. For 28 | /// example, there is no `From for usize` implementation, because Rust may 29 | /// someday support platforms where usize is smaller than 32 bits. Since we 30 | /// don't care about such platforms, we are happy to provide a `CastFrom 31 | /// for usize` implementation. 32 | /// 33 | /// `CastFrom` should be preferred to the `as` operator, since the `as` operator 34 | /// will silently truncate if the target type is smaller than the source type. 35 | /// When applicable, `CastFrom` should also be preferred to the 36 | /// [`std::convert::TryFrom`] trait, as `TryFrom` will produce a runtime error, 37 | /// while `CastFrom` will produce a compile-time error. 38 | pub trait CastFrom { 39 | /// Performs the cast. 40 | fn cast_from(from: T) -> Self; 41 | } 42 | 43 | macro_rules! cast_from { 44 | ($from:ty, $to:ty) => { 45 | paste::paste! { 46 | impl crate::cast::CastFrom<$from> for $to { 47 | #[allow(clippy::as_conversions)] 48 | #[allow(unused)] 49 | fn cast_from(from: $from) -> $to { 50 | from as $to 51 | } 52 | } 53 | 54 | /// Casts [`$from`] to [`$to`]. 55 | /// 56 | /// This is equivalent to the [`crate::cast::CastFrom`] implementation but is 57 | /// available as a `const fn`. 58 | #[allow(clippy::as_conversions)] 59 | #[allow(unused)] 60 | pub const fn [< $from _to_ $to >](from: $from) -> $to { 61 | from as $to 62 | } 63 | } 64 | }; 65 | } 66 | 67 | #[cfg(target_pointer_width = "32")] 68 | /// Safe casts for 32bit platforms 69 | mod target32 { 70 | // size_of < size_of 71 | cast_from!(u8, usize); 72 | cast_from!(u16, usize); 73 | cast_from!(u8, isize); 74 | cast_from!(i8, isize); 75 | cast_from!(u16, isize); 76 | cast_from!(i16, isize); 77 | 78 | cast_from!(usize, u64); 79 | cast_from!(usize, i64); 80 | cast_from!(usize, u128); 81 | cast_from!(usize, i128); 82 | cast_from!(isize, i64); 83 | cast_from!(isize, i128); 84 | 85 | // size_of == size_of 86 | cast_from!(usize, u32); 87 | cast_from!(isize, i32); 88 | cast_from!(u32, usize); 89 | cast_from!(i32, isize); 90 | } 91 | 92 | #[cfg(target_pointer_width = "64")] 93 | /// Safe casts for 64bit platforms 94 | pub mod target64 { 95 | // size_of < size_of 96 | cast_from!(u8, usize); 97 | cast_from!(u16, usize); 98 | cast_from!(u32, usize); 99 | cast_from!(u8, isize); 100 | cast_from!(i8, isize); 101 | cast_from!(u16, isize); 102 | cast_from!(i16, isize); 103 | cast_from!(u32, isize); 104 | cast_from!(i32, isize); 105 | 106 | cast_from!(usize, u128); 107 | cast_from!(usize, i128); 108 | cast_from!(isize, i128); 109 | 110 | // size_of == size_of 111 | cast_from!(usize, u64); 112 | cast_from!(isize, i64); 113 | cast_from!(u64, usize); 114 | cast_from!(i64, isize); 115 | } 116 | 117 | // TODO(petrosagg): remove these once the std From impls become const 118 | cast_from!(u8, u8); 119 | cast_from!(u8, u16); 120 | cast_from!(u8, i16); 121 | cast_from!(u8, u32); 122 | cast_from!(u8, i32); 123 | cast_from!(u8, u64); 124 | cast_from!(u8, i64); 125 | cast_from!(u8, u128); 126 | cast_from!(u8, i128); 127 | cast_from!(u16, u16); 128 | cast_from!(u16, u32); 129 | cast_from!(u16, i32); 130 | cast_from!(u16, u64); 131 | cast_from!(u16, i64); 132 | cast_from!(u16, u128); 133 | cast_from!(u16, i128); 134 | cast_from!(u32, u32); 135 | cast_from!(u32, u64); 136 | cast_from!(u32, i64); 137 | cast_from!(u32, u128); 138 | cast_from!(u32, i128); 139 | cast_from!(u64, u64); 140 | cast_from!(u64, u128); 141 | cast_from!(u64, i128); 142 | cast_from!(i8, i8); 143 | cast_from!(i8, i16); 144 | cast_from!(i8, i32); 145 | cast_from!(i8, i64); 146 | cast_from!(i8, i128); 147 | cast_from!(i16, i16); 148 | cast_from!(i16, i32); 149 | cast_from!(i16, i64); 150 | cast_from!(i16, i128); 151 | cast_from!(i32, i32); 152 | cast_from!(i32, i64); 153 | cast_from!(i32, i128); 154 | cast_from!(i64, i64); 155 | cast_from!(i64, i128); 156 | 157 | /// A trait for attempted casts. 158 | /// 159 | /// `TryCast` is like `as`, but returns `None` if 160 | /// the conversion can't be round-tripped. 161 | /// 162 | /// Note: there may be holes in the domain of `try_cast_from`, 163 | /// which is probably why `TryFrom` wasn't implemented for floats in the 164 | /// standard library. For example, `i64::MAX` can be converted to 165 | /// `f64`, but `i64::MAX - 1` can't. 166 | pub trait TryCastFrom: Sized { 167 | /// Attempts to perform the cast 168 | fn try_cast_from(from: T) -> Option; 169 | } 170 | 171 | /// Implement `TryCastFrom` for the specified types. 172 | /// This is only necessary for types for which `as` exists, 173 | /// but `TryFrom` doesn't (notably floats). 174 | macro_rules! try_cast_from { 175 | ($from:ty, $to:ty) => { 176 | impl crate::cast::TryCastFrom<$from> for $to { 177 | #[allow(clippy::as_conversions)] 178 | fn try_cast_from(from: $from) -> Option<$to> { 179 | let to = from as $to; 180 | let inverse = to as $from; 181 | if from == inverse { 182 | Some(to) 183 | } else { 184 | None 185 | } 186 | } 187 | } 188 | }; 189 | } 190 | 191 | try_cast_from!(f64, i64); 192 | try_cast_from!(i64, f64); 193 | try_cast_from!(f64, u64); 194 | try_cast_from!(u64, f64); 195 | 196 | /// A wrapper type which ensures a signed number is non-negative. 197 | #[derive(Debug, Clone, Copy, Eq, PartialEq, Ord, PartialOrd)] 198 | #[repr(transparent)] 199 | pub struct NonNeg(T) 200 | where 201 | T: Signed + fmt::Display; 202 | 203 | impl NonNeg 204 | where 205 | T: Signed + fmt::Display, 206 | { 207 | /// Returns the minimum value of the type. 208 | pub fn min() -> NonNeg { 209 | NonNeg(T::zero()) 210 | } 211 | 212 | /// Returns the maximum value of the type. 213 | pub fn max() -> NonNeg 214 | where 215 | T: UpperBounded, 216 | { 217 | NonNeg(T::max_value()) 218 | } 219 | 220 | /// Attempts to construct a `NonNeg` from its underlying type. 221 | /// 222 | /// Returns an error if `n` is negative. 223 | pub fn try_from(n: T) -> Result, NonNegError> { 224 | match n.is_negative() { 225 | false => Ok(NonNeg(n)), 226 | true => Err(NonNegError), 227 | } 228 | } 229 | } 230 | 231 | impl fmt::Display for NonNeg 232 | where 233 | T: Signed + fmt::Display, 234 | { 235 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 236 | self.0.fmt(f) 237 | } 238 | } 239 | 240 | impl Deref for NonNeg 241 | where 242 | T: Signed + fmt::Display, 243 | { 244 | type Target = T; 245 | 246 | fn deref(&self) -> &T { 247 | &self.0 248 | } 249 | } 250 | 251 | impl From> for u64 { 252 | fn from(n: NonNeg) -> u64 { 253 | u64::try_from(*n).expect("non-negative") 254 | } 255 | } 256 | 257 | #[cfg(target_pointer_width = "64")] 258 | impl CastFrom> for usize { 259 | #[allow(clippy::as_conversions)] 260 | fn cast_from(from: NonNeg) -> usize { 261 | usize::cast_from(u64::from(from)) 262 | } 263 | } 264 | 265 | /// An error indicating the attempted construction of a `NonNeg` with a negative 266 | /// number. 267 | #[derive(Debug, Clone)] 268 | pub struct NonNegError; 269 | 270 | impl fmt::Display for NonNegError { 271 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 272 | f.write_str("cannot construct NonNeg from negative number") 273 | } 274 | } 275 | 276 | impl Error for NonNegError {} 277 | -------------------------------------------------------------------------------- /util/src/lib.rs: -------------------------------------------------------------------------------- 1 | mod cast; 2 | 3 | use std::collections::BTreeMap; 4 | use std::fmt; 5 | use std::io::BufRead; 6 | use std::io::Write; 7 | use std::path::PathBuf; 8 | use std::time::{Instant, SystemTime, UNIX_EPOCH}; 9 | 10 | use anyhow::bail; 11 | use flate2::write::GzEncoder; 12 | use flate2::Compression; 13 | use prost::Message; 14 | 15 | pub use cast::CastFrom; 16 | pub use cast::TryCastFrom; 17 | 18 | #[cfg(feature = "flamegraph")] 19 | pub use inferno::flamegraph::Options as FlamegraphOptions; 20 | 21 | /// Start times of the profiler. 22 | #[derive(Copy, Clone, Debug)] 23 | pub enum ProfStartTime { 24 | Instant(Instant), 25 | TimeImmemorial, 26 | } 27 | 28 | /// Helper struct to simplify building a `string_table` for the pprof format. 29 | #[derive(Default)] 30 | struct StringTable(BTreeMap); 31 | 32 | impl StringTable { 33 | fn new() -> Self { 34 | // Element 0 must always be the emtpy string. 35 | let inner = [("".into(), 0)].into(); 36 | Self(inner) 37 | } 38 | 39 | fn insert(&mut self, s: &str) -> i64 { 40 | if let Some(idx) = self.0.get(s) { 41 | *idx 42 | } else { 43 | let idx = i64::try_from(self.0.len()).expect("must fit"); 44 | self.0.insert(s.into(), idx); 45 | idx 46 | } 47 | } 48 | 49 | fn finish(self) -> Vec { 50 | let mut vec: Vec<_> = self.0.into_iter().collect(); 51 | vec.sort_by_key(|(_, idx)| *idx); 52 | vec.into_iter().map(|(s, _)| s).collect() 53 | } 54 | } 55 | 56 | #[path = "perftools.profiles.rs"] 57 | mod proto; 58 | 59 | /// A single sample in the profile. The stack is a list of addresses. 60 | #[derive(Clone, Debug)] 61 | pub struct WeightedStack { 62 | pub addrs: Vec, 63 | pub weight: f64, 64 | } 65 | 66 | /// A mapping of a single shared object. 67 | #[derive(Clone, Debug)] 68 | pub struct Mapping { 69 | pub memory_start: usize, 70 | pub memory_end: usize, 71 | pub memory_offset: usize, 72 | pub file_offset: u64, 73 | pub pathname: PathBuf, 74 | pub build_id: Option, 75 | } 76 | 77 | /// Build ID of a shared object. 78 | #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] 79 | pub struct BuildId(pub Vec); 80 | 81 | impl fmt::Display for BuildId { 82 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 83 | for byte in &self.0 { 84 | write!(f, "{byte:02x}")?; 85 | } 86 | Ok(()) 87 | } 88 | } 89 | 90 | /// A minimal representation of a profile that can be parsed from the jemalloc heap profile. 91 | #[derive(Default)] 92 | pub struct StackProfile { 93 | pub annotations: Vec, 94 | // The second element is the index in `annotations`, if one exists. 95 | pub stacks: Vec<(WeightedStack, Option)>, 96 | pub mappings: Vec, 97 | } 98 | 99 | impl StackProfile { 100 | /// Converts the profile into the pprof format. 101 | /// 102 | /// pprof encodes profiles as gzipped protobuf messages of the Profile message type 103 | /// (see `pprof/profile.proto`). 104 | pub fn to_pprof( 105 | &self, 106 | sample_type: (&str, &str), 107 | period_type: (&str, &str), 108 | anno_key: Option, 109 | ) -> Vec { 110 | let profile = self.to_pprof_proto(sample_type, period_type, anno_key); 111 | let encoded = profile.encode_to_vec(); 112 | 113 | let mut gz = GzEncoder::new(Vec::new(), Compression::default()); 114 | gz.write_all(&encoded).unwrap(); 115 | gz.finish().unwrap() 116 | } 117 | 118 | /// Converts the profile into the pprof Protobuf format (see `pprof/profile.proto`). 119 | fn to_pprof_proto( 120 | &self, 121 | sample_type: (&str, &str), 122 | period_type: (&str, &str), 123 | anno_key: Option, 124 | ) -> proto::Profile { 125 | let mut profile = proto::Profile::default(); 126 | let mut strings = StringTable::new(); 127 | 128 | let anno_key = anno_key.unwrap_or_else(|| "annotation".into()); 129 | 130 | profile.sample_type = vec![proto::ValueType { 131 | r#type: strings.insert(sample_type.0), 132 | unit: strings.insert(sample_type.1), 133 | }]; 134 | profile.period_type = Some(proto::ValueType { 135 | r#type: strings.insert(period_type.0), 136 | unit: strings.insert(period_type.1), 137 | }); 138 | 139 | profile.time_nanos = SystemTime::now() 140 | .duration_since(UNIX_EPOCH) 141 | .expect("now is later than UNIX epoch") 142 | .as_nanos() 143 | .try_into() 144 | .expect("the year 2554 is far away"); 145 | 146 | for (mapping, mapping_id) in self.mappings.iter().zip(1..) { 147 | let pathname = mapping.pathname.to_string_lossy(); 148 | let filename_idx = strings.insert(&pathname); 149 | 150 | let build_id_idx = match &mapping.build_id { 151 | Some(build_id) => strings.insert(&build_id.to_string()), 152 | None => 0, 153 | }; 154 | 155 | profile.mapping.push(proto::Mapping { 156 | id: mapping_id, 157 | memory_start: u64::cast_from(mapping.memory_start), 158 | memory_limit: u64::cast_from(mapping.memory_end), 159 | file_offset: mapping.file_offset, 160 | filename: filename_idx, 161 | build_id: build_id_idx, 162 | ..Default::default() 163 | }); 164 | 165 | // This is a is a Polar Signals-specific extension: For correct offline symbolization 166 | // they need access to the memory offset of mappings, but the pprof format only has a 167 | // field for the file offset. So we instead encode additional information about 168 | // mappings in magic comments. There must be exactly one comment for each mapping. 169 | 170 | // Take a shortcut and assume the ELF type is always `ET_DYN`. This is true for shared 171 | // libraries and for position-independent executable, so it should always be true for 172 | // any mappings we have. 173 | // Getting the actual information is annoying. It's in the ELF header (the `e_type` 174 | // field), but there is no guarantee that the full ELF header gets mapped, so we might 175 | // not be able to find it in memory. We could try to load it from disk instead, but 176 | // then we'd have to worry about blocking disk I/O. 177 | let elf_type = 3; 178 | 179 | let comment = format!( 180 | "executableInfo={:x};{:x};{:x}", 181 | elf_type, mapping.file_offset, mapping.memory_offset 182 | ); 183 | profile.comment.push(strings.insert(&comment)); 184 | } 185 | 186 | let mut location_ids = BTreeMap::new(); 187 | #[cfg(feature = "symbolize")] 188 | let mut function_ids = BTreeMap::new(); 189 | for (stack, anno) in self.iter() { 190 | let mut sample = proto::Sample::default(); 191 | 192 | let value = stack.weight.trunc(); 193 | let value = i64::try_cast_from(value).expect("no exabyte heap sizes"); 194 | sample.value.push(value); 195 | 196 | for addr in stack.addrs.iter().rev() { 197 | // See the comment 198 | // [here](https://github.com/rust-lang/backtrace-rs/blob/036d4909e1fb9c08c2bb0f59ac81994e39489b2f/src/symbolize/mod.rs#L123-L147) 199 | // for why we need to subtract one. tl;dr addresses 200 | // in stack traces are actually the return address of 201 | // the called function, which is one past the call 202 | // itself. 203 | // 204 | // Of course, the `call` instruction can be more than one byte, so after subtracting 205 | // one, we might point somewhere in the middle of it, rather 206 | // than to the beginning of the instruction. That's fine; symbolization 207 | // tools don't seem to get confused by this. 208 | let addr = u64::cast_from(*addr) - 1; 209 | 210 | let loc_id = *location_ids.entry(addr).or_insert_with(|| { 211 | // profile.proto says the location id may be the address, but Polar Signals 212 | // insists that location ids are sequential, starting with 1. 213 | let id = u64::cast_from(profile.location.len()) + 1; 214 | 215 | #[allow(unused_mut)] // for feature = "symbolize" 216 | let mut mapping = profile 217 | .mapping 218 | .iter_mut() 219 | .find(|m| m.memory_start <= addr && m.memory_limit > addr); 220 | 221 | // If online symbolization is enabled, resolve the function and line. 222 | #[allow(unused_mut)] 223 | let mut line = Vec::new(); 224 | #[cfg(feature = "symbolize")] 225 | backtrace::resolve(addr as *mut std::ffi::c_void, |symbol| { 226 | let Some(symbol_name) = symbol.name() else { 227 | return; 228 | }; 229 | let function_name = format!("{symbol_name:#}"); 230 | let lineno = symbol.lineno().unwrap_or(0) as i64; 231 | 232 | let function_id = *function_ids.entry(function_name).or_insert_with_key( 233 | |function_name| { 234 | let function_id = profile.function.len() as u64 + 1; 235 | let system_name = String::from_utf8_lossy(symbol_name.as_bytes()); 236 | let filename = symbol 237 | .filename() 238 | .map(|path| path.to_string_lossy()) 239 | .unwrap_or(std::borrow::Cow::Borrowed("")); 240 | 241 | if let Some(ref mut mapping) = mapping { 242 | mapping.has_functions = true; 243 | mapping.has_filenames |= !filename.is_empty(); 244 | mapping.has_line_numbers |= lineno > 0; 245 | } 246 | 247 | profile.function.push(proto::Function { 248 | id: function_id, 249 | name: strings.insert(function_name), 250 | system_name: strings.insert(&system_name), 251 | filename: strings.insert(&filename), 252 | ..Default::default() 253 | }); 254 | function_id 255 | }, 256 | ); 257 | 258 | line.push(proto::Line { 259 | function_id, 260 | line: lineno, 261 | }); 262 | 263 | if let Some(ref mut mapping) = mapping { 264 | mapping.has_inline_frames |= line.len() > 1; 265 | } 266 | }); 267 | 268 | profile.location.push(proto::Location { 269 | id, 270 | mapping_id: mapping.map_or(0, |m| m.id), 271 | address: addr, 272 | line, 273 | ..Default::default() 274 | }); 275 | id 276 | }); 277 | 278 | sample.location_id.push(loc_id); 279 | 280 | if let Some(anno) = anno { 281 | sample.label.push(proto::Label { 282 | key: strings.insert(&anno_key), 283 | str: strings.insert(anno), 284 | ..Default::default() 285 | }) 286 | } 287 | } 288 | 289 | profile.sample.push(sample); 290 | } 291 | 292 | profile.string_table = strings.finish(); 293 | 294 | profile 295 | } 296 | 297 | /// Converts the profile into a flamegraph SVG, using the given options. 298 | #[cfg(feature = "flamegraph")] 299 | pub fn to_flamegraph(&self, opts: &mut FlamegraphOptions) -> anyhow::Result> { 300 | use std::collections::HashMap; 301 | 302 | // We start from a symbolized Protobuf profile. We just pass in empty type names, since 303 | // they're not used in the final flamegraph. 304 | let profile = self.to_pprof_proto(("", ""), ("", ""), None); 305 | 306 | // Index locations, functions, and strings. 307 | let locations: HashMap = 308 | profile.location.into_iter().map(|l| (l.id, l)).collect(); 309 | let functions: HashMap = 310 | profile.function.into_iter().map(|f| (f.id, f)).collect(); 311 | let strings = profile.string_table; 312 | 313 | // Resolve stacks as function name vectors, and sum sample values per stack. Also reverse 314 | // the stack, since inferno expects it bottom-up. 315 | let mut stacks: HashMap, i64> = HashMap::new(); 316 | for sample in profile.sample { 317 | let mut stack = Vec::with_capacity(sample.location_id.len()); 318 | for location in sample.location_id.into_iter().rev() { 319 | let location = locations.get(&location).expect("missing location"); 320 | for line in location.line.iter().rev() { 321 | let function = functions.get(&line.function_id).expect("missing function"); 322 | let name = strings.get(function.name as usize).expect("missing string"); 323 | stack.push(name.as_str()); 324 | } 325 | } 326 | let value = sample.value.first().expect("missing value"); 327 | *stacks.entry(stack).or_default() += value; 328 | } 329 | 330 | // Construct stack lines for inferno. 331 | let mut lines = stacks 332 | .into_iter() 333 | .map(|(stack, value)| format!("{} {}", stack.join(";"), value)) 334 | .collect::>(); 335 | lines.sort(); 336 | 337 | // Generate the flamegraph SVG. 338 | let mut bytes = Vec::new(); 339 | let lines = lines.iter().map(|line| line.as_str()); 340 | inferno::flamegraph::from_lines(opts, lines, &mut bytes)?; 341 | Ok(bytes) 342 | } 343 | } 344 | 345 | pub struct StackProfileIter<'a> { 346 | inner: &'a StackProfile, 347 | idx: usize, 348 | } 349 | 350 | impl<'a> Iterator for StackProfileIter<'a> { 351 | type Item = (&'a WeightedStack, Option<&'a str>); 352 | 353 | fn next(&mut self) -> Option { 354 | let (stack, anno) = self.inner.stacks.get(self.idx)?; 355 | self.idx += 1; 356 | let anno = anno.map(|idx| self.inner.annotations.get(idx).unwrap().as_str()); 357 | Some((stack, anno)) 358 | } 359 | } 360 | 361 | impl StackProfile { 362 | pub fn push_stack(&mut self, stack: WeightedStack, annotation: Option<&str>) { 363 | let anno_idx = if let Some(annotation) = annotation { 364 | Some( 365 | self.annotations 366 | .iter() 367 | .position(|anno| annotation == anno.as_str()) 368 | .unwrap_or_else(|| { 369 | self.annotations.push(annotation.to_string()); 370 | self.annotations.len() - 1 371 | }), 372 | ) 373 | } else { 374 | None 375 | }; 376 | self.stacks.push((stack, anno_idx)) 377 | } 378 | 379 | pub fn push_mapping(&mut self, mapping: Mapping) { 380 | self.mappings.push(mapping); 381 | } 382 | 383 | pub fn iter(&self) -> StackProfileIter<'_> { 384 | StackProfileIter { 385 | inner: self, 386 | idx: 0, 387 | } 388 | } 389 | } 390 | 391 | /// Parse a jemalloc profile file, producing a vector of stack traces along with their weights. 392 | pub fn parse_jeheap( 393 | r: R, 394 | mappings: Option<&[Mapping]>, 395 | ) -> anyhow::Result { 396 | let mut cur_stack = None; 397 | let mut profile = StackProfile::default(); 398 | let mut lines = r.lines(); 399 | 400 | let first_line = match lines.next() { 401 | Some(s) => s?, 402 | None => bail!("Heap dump file was empty"), 403 | }; 404 | // The first line of the file should be e.g. "heap_v2/524288", where the trailing 405 | // number is the inverse probability of a byte being sampled. 406 | let sampling_rate: f64 = str::parse(first_line.trim_start_matches("heap_v2/"))?; 407 | 408 | for line in &mut lines { 409 | let line = line?; 410 | let line = line.trim(); 411 | 412 | let words: Vec<_> = line.split_ascii_whitespace().collect(); 413 | if !words.is_empty() && words[0] == "@" { 414 | if cur_stack.is_some() { 415 | bail!("Stack without corresponding weight!") 416 | } 417 | let mut addrs = words[1..] 418 | .iter() 419 | .map(|w| { 420 | let raw = w.trim_start_matches("0x"); 421 | usize::from_str_radix(raw, 16) 422 | }) 423 | .collect::, _>>()?; 424 | addrs.reverse(); 425 | cur_stack = Some(addrs); 426 | } 427 | if words.len() > 2 && words[0] == "t*:" { 428 | if let Some(addrs) = cur_stack.take() { 429 | // The format here is e.g.: 430 | // t*: 40274: 2822125696 [0: 0] 431 | // 432 | // "t*" means summary across all threads; someday we will support per-thread dumps but don't now. 433 | // "40274" is the number of sampled allocations (`n_objs` here). 434 | // On all released versions of jemalloc, "2822125696" is the total number of bytes in those allocations. 435 | // 436 | // To get the predicted number of total bytes from the sample, we need to un-bias it by following the logic in 437 | // jeprof's `AdjustSamples`: https://github.com/jemalloc/jemalloc/blob/498f47e1ec83431426cdff256c23eceade41b4ef/bin/jeprof.in#L4064-L4074 438 | // 439 | // However, this algorithm is actually wrong: you actually need to unbias each sample _before_ you add them together, rather 440 | // than adding them together first and then unbiasing the average allocation size. But the heap profile format in released versions of jemalloc 441 | // does not give us access to each individual allocation, so this is the best we can do (and `jeprof` does the same). 442 | // 443 | // It usually seems to be at least close enough to being correct to be useful, but could be very wrong if for the same stack, there is a 444 | // very large amount of variance in the amount of bytes allocated (e.g., if there is one allocation of 8 MB and 1,000,000 of 8 bytes) 445 | // 446 | // In the latest unreleased jemalloc sources from github, the issue is worked around by unbiasing the numbers for each sampled allocation, 447 | // and then fudging them to maintain compatibility with jeprof's logic. So, once those are released and we start using them, 448 | // this will become even more correct. 449 | // 450 | // For more details, see this doc: https://github.com/jemalloc/jemalloc/pull/1902 451 | // 452 | // And this gitter conversation between me (Brennan Vincent) and David Goldblatt: https://gitter.im/jemalloc/jemalloc?at=5f31b673811d3571b3bb9b6b 453 | let n_objs: f64 = str::parse(words[1].trim_end_matches(':'))?; 454 | let bytes_in_sampled_objs: f64 = str::parse(words[2])?; 455 | let ratio = (bytes_in_sampled_objs / n_objs) / sampling_rate; 456 | let scale_factor = 1.0 / (1.0 - (-ratio).exp()); 457 | let weight = bytes_in_sampled_objs * scale_factor; 458 | profile.push_stack(WeightedStack { addrs, weight }, None); 459 | } 460 | } 461 | } 462 | if cur_stack.is_some() { 463 | bail!("Stack without corresponding weight!"); 464 | } 465 | 466 | if let Some(mappings) = mappings { 467 | for mapping in mappings { 468 | profile.push_mapping(mapping.clone()); 469 | } 470 | } 471 | 472 | Ok(profile) 473 | } 474 | -------------------------------------------------------------------------------- /util/src/perftools.profiles.rs: -------------------------------------------------------------------------------- 1 | // @generated 2 | #[allow(clippy::derive_partial_eq_without_eq)] 3 | #[derive(Clone, PartialEq, ::prost::Message)] 4 | pub struct Profile { 5 | /// A description of the samples associated with each Sample.value. 6 | /// For a cpu profile this might be: 7 | /// \[["cpu","nanoseconds"]\] or \[["wall","seconds"]\] or \[["syscall","count"]\] 8 | /// For a heap profile, this might be: 9 | /// \[["allocations","count"\], \["space","bytes"]\], 10 | /// If one of the values represents the number of events represented 11 | /// by the sample, by convention it should be at index 0 and use 12 | /// sample_type.unit == "count". 13 | #[prost(message, repeated, tag = "1")] 14 | pub sample_type: ::prost::alloc::vec::Vec, 15 | /// The set of samples recorded in this profile. 16 | #[prost(message, repeated, tag = "2")] 17 | pub sample: ::prost::alloc::vec::Vec, 18 | /// Mapping from address ranges to the image/binary/library mapped 19 | /// into that address range. mapping\[0\] will be the main binary. 20 | #[prost(message, repeated, tag = "3")] 21 | pub mapping: ::prost::alloc::vec::Vec, 22 | /// Useful program location 23 | #[prost(message, repeated, tag = "4")] 24 | pub location: ::prost::alloc::vec::Vec, 25 | /// Functions referenced by locations 26 | #[prost(message, repeated, tag = "5")] 27 | pub function: ::prost::alloc::vec::Vec, 28 | /// A common table for strings referenced by various messages. 29 | /// string_table\[0\] must always be "". 30 | #[prost(string, repeated, tag = "6")] 31 | pub string_table: ::prost::alloc::vec::Vec<::prost::alloc::string::String>, 32 | /// frames with Function.function_name fully matching the following 33 | /// regexp will be dropped from the samples, along with their successors. 34 | /// 35 | /// Index into string table. 36 | #[prost(int64, tag = "7")] 37 | pub drop_frames: i64, 38 | /// frames with Function.function_name fully matching the following 39 | /// regexp will be kept, even if it matches drop_frames. 40 | /// 41 | /// Index into string table. 42 | #[prost(int64, tag = "8")] 43 | pub keep_frames: i64, 44 | // The following fields are informational, do not affect 45 | // interpretation of results. 46 | /// Time of collection (UTC) represented as nanoseconds past the epoch. 47 | #[prost(int64, tag = "9")] 48 | pub time_nanos: i64, 49 | /// Duration of the profile, if a duration makes sense. 50 | #[prost(int64, tag = "10")] 51 | pub duration_nanos: i64, 52 | /// The kind of events between sampled ocurrences. 53 | /// e.g [ "cpu","cycles" ] or [ "heap","bytes" ] 54 | #[prost(message, optional, tag = "11")] 55 | pub period_type: ::core::option::Option, 56 | /// The number of events between sampled occurrences. 57 | #[prost(int64, tag = "12")] 58 | pub period: i64, 59 | /// Freeform text associated to the profile. 60 | /// 61 | /// Indices into string table. 62 | #[prost(int64, repeated, tag = "13")] 63 | pub comment: ::prost::alloc::vec::Vec, 64 | /// Index into the string table of the type of the preferred sample 65 | /// value. If unset, clients should default to the last sample value. 66 | #[prost(int64, tag = "14")] 67 | pub default_sample_type: i64, 68 | } 69 | /// ValueType describes the semantics and measurement units of a value. 70 | #[allow(clippy::derive_partial_eq_without_eq)] 71 | #[derive(Clone, PartialEq, ::prost::Message)] 72 | pub struct ValueType { 73 | /// Index into string table. 74 | #[prost(int64, tag = "1")] 75 | pub r#type: i64, 76 | /// Index into string table. 77 | #[prost(int64, tag = "2")] 78 | pub unit: i64, 79 | } 80 | /// Each Sample records values encountered in some program 81 | /// context. The program context is typically a stack trace, perhaps 82 | /// augmented with auxiliary information like the thread-id, some 83 | /// indicator of a higher level request being handled etc. 84 | #[allow(clippy::derive_partial_eq_without_eq)] 85 | #[derive(Clone, PartialEq, ::prost::Message)] 86 | pub struct Sample { 87 | /// The ids recorded here correspond to a Profile.location.id. 88 | /// The leaf is at location_id\[0\]. 89 | #[prost(uint64, repeated, tag = "1")] 90 | pub location_id: ::prost::alloc::vec::Vec, 91 | /// The type and unit of each value is defined by the corresponding 92 | /// entry in Profile.sample_type. All samples must have the same 93 | /// number of values, the same as the length of Profile.sample_type. 94 | /// When aggregating multiple samples into a single sample, the 95 | /// result has a list of values that is the element-wise sum of the 96 | /// lists of the originals. 97 | #[prost(int64, repeated, tag = "2")] 98 | pub value: ::prost::alloc::vec::Vec, 99 | /// label includes additional context for this sample. It can include 100 | /// things like a thread id, allocation size, etc 101 | #[prost(message, repeated, tag = "3")] 102 | pub label: ::prost::alloc::vec::Vec