├── .clang-format ├── .clippy.toml ├── .dockerignore ├── .gitattributes ├── .github └── workflows │ └── ci.yaml ├── .gitignore ├── Cargo.toml ├── LICENSE ├── Makefile ├── README.md ├── build.rs ├── python.gdbinit ├── scripts └── download_and_build_static_libs.sh ├── src ├── arch.rs ├── bindings │ └── mod.rs ├── bpf │ ├── .gitignore │ ├── basic_types.h │ ├── mod.rs │ ├── pyperf.bpf.c │ ├── pyperf.h │ ├── pyperf.rs │ └── vmlinux.h ├── lib.rs ├── main.rs ├── perf_event.rs ├── process_info.rs ├── profile.rs ├── py_perf.rs ├── python_readers.rs └── python_versions │ ├── mod.rs │ ├── python_2_7_15.yaml │ ├── python_3_10_0.yaml │ ├── python_3_11_0.yaml │ ├── python_3_3_7.yaml │ ├── python_3_5_5.yaml │ ├── python_3_6_6.yaml │ ├── python_3_7_0.yaml │ ├── python_3_8_0.yaml │ └── python_3_9_5.yaml └── xtask ├── Cargo.toml ├── README.md └── src └── main.rs /.clang-format: -------------------------------------------------------------------------------- 1 | --- 2 | BasedOnStyle: Google 3 | AllowShortIfStatementsOnASingleLine: false 4 | AllowShortLoopsOnASingleLine: false 5 | ColumnLimit: 120 6 | IndentWidth: 4 7 | SortIncludes: false 8 | -------------------------------------------------------------------------------- /.clippy.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kakkoyun/py-perf/cc29351f7692f69df50125ce3e40e8d75bbadddd/.clippy.toml -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | target/ 2 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | src/bpf/vmlinux.h linguist-generated 2 | -------------------------------------------------------------------------------- /.github/workflows/ci.yaml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | push: 5 | branches: [main] 6 | pull_request: 7 | branches: [main] 8 | 9 | jobs: 10 | build: 11 | name: Build 12 | runs-on: ubuntu-22.04 13 | strategy: 14 | fail-fast: false 15 | matrix: 16 | rust: [stable] 17 | steps: 18 | - uses: actions/checkout@v3 19 | - uses: dtolnay/rust-toolchain@1.71.0 20 | with: 21 | toolchain: ${{matrix.rust}} 22 | components: rust-src, rustfmt 23 | - name: Install build system dependencies 24 | run: | 25 | export DEBIAN_FRONTEND=noninteractive 26 | sudo apt-get -y install --no-install-recommends \ 27 | curl \ 28 | ca-certificates \ 29 | clang \ 30 | make \ 31 | pkg-config \ 32 | libelf-dev \ 33 | zlib1g-dev 34 | - name: Build 35 | run: | 36 | export RUSTFLAGS='-L /usr/lib/x86_64-linux-gnu' 37 | cargo build 38 | - name: Static build 39 | run: | 40 | export RUSTFLAGS='-L /usr/lib/x86_64-linux-gnu -C target-feature=+crt-static' 41 | cargo build --target x86_64-unknown-linux-gnu 42 | 43 | lint: 44 | name: Lint 45 | runs-on: ubuntu-22.04 46 | strategy: 47 | fail-fast: false 48 | matrix: 49 | rust: [stable] 50 | steps: 51 | - uses: actions/checkout@v3 52 | - uses: dtolnay/rust-toolchain@1.71.0 53 | with: 54 | toolchain: ${{matrix.rust}} 55 | components: rust-src, rustfmt 56 | - name: Run cargo fmt 57 | run: | 58 | # These files are generated at build time, so some rustfmt versions 59 | # fail with Error writing files: failed to resolve mod `bpf` if it 60 | # does not exist 61 | touch src/bpf/py-perf.rs 62 | touch src/bpf/features.rs 63 | cargo fmt 64 | git diff --exit-code 65 | 66 | clippy: 67 | name: Clippy 68 | runs-on: ubuntu-22.04 69 | steps: 70 | - uses: actions/checkout@v3 71 | - uses: dtolnay/rust-toolchain@1.71.0 72 | with: 73 | components: rust-src, clippy, rustfmt 74 | - name: Install build system dependencies 75 | run: | 76 | export DEBIAN_FRONTEND=noninteractive 77 | sudo apt-get -y install --no-install-recommends \ 78 | curl \ 79 | ca-certificates \ 80 | clang \ 81 | make \ 82 | pkg-config \ 83 | libelf-dev \ 84 | zlib1g-dev 85 | - name: Run clippy 86 | run: | 87 | export RUSTFLAGS='-L /usr/lib/x86_64-linux-gnu' 88 | cargo clippy -- -Dclippy::all 89 | 90 | test: 91 | name: Test 92 | runs-on: ubuntu-22.04 93 | strategy: 94 | fail-fast: false 95 | matrix: 96 | rust: [stable] 97 | steps: 98 | - uses: actions/checkout@v3 99 | - uses: dtolnay/rust-toolchain@1.71.0 100 | with: 101 | toolchain: ${{matrix.rust}} 102 | components: rust-src, rustfmt 103 | - name: Install build system dependencies 104 | run: | 105 | export DEBIAN_FRONTEND=noninteractive 106 | sudo apt-get -y install --no-install-recommends \ 107 | curl \ 108 | ca-certificates \ 109 | clang \ 110 | make \ 111 | pkg-config \ 112 | libelf-dev \ 113 | zlib1g-dev 114 | - name: Run unittests 115 | run: | 116 | export RUSTFLAGS='-L /usr/lib/x86_64-linux-gnu' 117 | export RUST_BACKTRACE=1 118 | cargo test -- --skip py-perf::tests 119 | - name: Install podman 120 | run: sudo apt-get -y install --no-install-recommends podman 121 | - name: Pull Ruby containers 122 | run: tools/pull_ruby_images 123 | - name: Run integration tests 124 | run: | 125 | export RUSTFLAGS='-L /usr/lib/x86_64-linux-gnu' 126 | export RUST_BACKTRACE=1 127 | # Running only 3.1.2 for a bit, will enable the rest once we make sure 128 | # that things are looking good 129 | cargo test -- py-perf::tests::py-perf_test_3_1_2 --nocapture 130 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Generated by Cargo 2 | # will have compiled files and executables 3 | debug/ 4 | target/ 5 | 6 | # Remove Cargo.lock from gitignore if creating an executable, leave it for libraries 7 | # More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html 8 | Cargo.lock 9 | 10 | # These are backup files generated by rustfmt 11 | **/*.rs.bk 12 | 13 | # MSVC Windows builds of rustc generate these, which store debugging information 14 | *.pdb 15 | 16 | # Added by cargo 17 | 18 | /target 19 | 20 | src/bpf/py-perf.rs 21 | 22 | py-perf_out* 23 | py-perf_flame* 24 | 25 | TODO.md 26 | 27 | /tmp 28 | py-perf_*.pb 29 | py-perf_*.pb.gz 30 | py-perf_*.txt 31 | py-perf_*.svg 32 | py-perf_*.json 33 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "py-perf" 3 | description = "A Proof-of-concept low-overhead sampling CPU profiler written in Rust for Python implemented using eBPF." 4 | version = "0.1.0" 5 | edition = "2021" 6 | repository = "https://github.com/kakkoyun/py-perf" 7 | authors = ["Kemal Akkoyun "] 8 | keywords = ["bpf", "ebpf", "python", "CPython", "profiler"] 9 | license = "Apache-2.0" 10 | categories = ["development-tools", "profiling", "performance"] 11 | readme = "README.md" 12 | 13 | [profile.release] 14 | lto = true 15 | 16 | [dependencies] 17 | anyhow = { version = "1.0", features = ["backtrace"] } 18 | chrono = "0.4" 19 | clap = { version = "4.3", features = ["derive"] } 20 | crossbeam = "0.8.2" 21 | ctrlc = "3.4" 22 | env_logger = "0.10" 23 | errno = "0.3" 24 | goblin = "0.7" 25 | humantime = "2" 26 | inferno = "0.11" 27 | libbpf-rs = { version = "0.21", features = ["static"] } 28 | libc = "0.2" 29 | log = "0.4" 30 | nix = "0.26" 31 | num_cpus = "1.16" 32 | perf-event-open-sys = "4.0" 33 | plain = "0.2.3" 34 | 35 | # TODO(kakkoyun): Send a patch to upstream. 36 | pprof = { git = "ssh://git@github.com/kakkoyun/pprof-rs.git", features = [ 37 | # pprof = { path = "../../Sandbox/Profiling/pprof-rs", features = [ 38 | "flamegraph", 39 | "inferno", 40 | "protobuf", 41 | "protobuf-codec" 42 | ] } 43 | proc-maps = "0.3" 44 | # TODO(kakkoyun): Send a patch to upstream. 45 | py-spy = { git = "ssh://git@github.com/kakkoyun/py-spy.git" } 46 | # py-spy = { path = "../../Sandbox/Profilers/py-spy" } 47 | remoteprocess = { version = "0.4.12", features = ["unwind"] } 48 | serde = { version = "1.0", features = ["derive"] } 49 | serde_json = "1.0" 50 | serde_yaml = "0.9" 51 | thiserror = "1.0" 52 | time = { version = "0.3.24", features = [ 53 | "formatting", 54 | "local-offset", 55 | "macros" 56 | ] } 57 | 58 | [build-dependencies] 59 | bindgen = "0.66" 60 | libbpf-cargo = "0.21" 61 | 62 | [workspace] 63 | members = [".", "xtask"] 64 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # TODO(kakkoyun): DRY this file. 2 | 3 | target/static-libs: 4 | @echo "Building static-libs" 5 | ./scripts/download_and_build_static_libs.sh 6 | 7 | target/debug/deps/libelf.a: target/static-libs 8 | mkdir -p target/debug/deps 9 | cp target/static-libs/libelf.a target/debug/deps/libelf.a 10 | 11 | target/release/deps/libelf.a: target/static-libs 12 | mkdir -p target/release/deps 13 | cp target/static-libs/libelf.a target/release/deps/libelf.a 14 | 15 | target/debug/deps/libz.a: target/static-libs 16 | mkdir -p target/debug/deps 17 | cp target/static-libs/libz.a target/debug/deps/libz.a 18 | 19 | target/release/deps/libz.a: target/static-libs 20 | mkdir -p target/release/deps 21 | cp target/static-libs/libz.a target/release/deps/libz.a 22 | 23 | deps: target/debug/deps/libelf.a target/debug/deps/libz.a target/release/deps/libelf.a target/release/deps/libz.a 24 | mkdir -p out/ruby_versions 25 | mkdir -p out/python_versions 26 | 27 | .PHONY: build 28 | build: target/debug/deps/libelf.a target/debug/deps/libz.a 29 | cargo build 30 | 31 | .PHONY: release-build 32 | release-build: target/release/deps/libelf.a target/release/deps/libz.a 33 | cargo build --release 34 | 35 | .PHONY: clean 36 | clean: 37 | rm -rf target 38 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![wakatime](https://wakatime.com/badge/user/c03c2c3a-0328-4e74-ba79-1ce0eb43a4f8/project/6de0edd3-d3d9-48b1-8f9e-e019fc7b42f1.svg)](https://wakatime.com/badge/user/c03c2c3a-0328-4e74-ba79-1ce0eb43a4f8/project/6de0edd3-d3d9-48b1-8f9e-e019fc7b42f1) 2 | 3 | # py-perf 4 | 5 | A Proof-of-concept low-overhead sampling CPU profiler written in Rust for Python implemented using eBPF. 6 | It is heavily "influenced" by [rbperf](https://github.com/javierhonduco/rbperf) and [py-spy](https://github.com/benfred/py-spy). 7 | 8 | > [!WARNING] 9 | > IT IS NOT READY FOR PRODUCTION USE AND IT IS NOT INTENDED TO BE A REPLACEMENT FOR EXISTING TOOLS. 10 | > It is still considered experimental. Please feel free to contribute or provide feedback! 11 | 12 | > [!IMPORTANT] 13 | > If you are looking for a production-ready tool, please check out [parca-agent](https://github.com/parca-dev/parca-agent) instead. 14 | 15 | > [!NOTE] 16 | > If you are curious how this tool works under the hood, check out the [blog post](https://www.polarsignals.com/blog/posts/2023/10/04/profiling-python-and-ruby-with-ebpf)! 17 | 18 | ## Features 19 | 20 | The main goals for `py-perf` are: 21 | 22 | - On-CPU profiling support 23 | - Low overhead 24 | - Profiled processes don't have to be restarted or modified in any way 25 | 26 | ## Installation 27 | 28 | The latest release is available [here](https://github.com/kakkoyun/py-perf/releases/latest). 29 | 30 | ## Usage 31 | 32 | ### CPU sampling 33 | 34 | ```shell 35 | sudo py-perf record --pid `pidof python` cpu 36 | ``` 37 | 38 | Some debug information will be printed, and a flame graph called `py-perf_flame_$date` will be written to disk 🎉 39 | 40 | ## Supported Python versions 41 | 42 | The currently supported Python (CPython) versions: 43 | 44 | - **2.7**: 2.7.x 45 | - **3.x**: 3.3.x, 3.5.x, 3.6.x, 3.7.x, 3.8.x, 3.9.x, 3.10.x, 3.11.x 46 | 47 | ## Supported kernels 48 | 49 | Linux kernel 4.18 is the minimum required version but 5.x and greater is recommended. 50 | 51 | ## Building 52 | 53 | To build `py-perf` you would need a modern Linux machine with: 54 | 55 | - The Rust toolchain 56 | - `clang` to compile the BPF code 57 | - `elfutils` and `zlib` installed 58 | - `make` and `pkg-config` to build libbpf 59 | 60 | Once the dependencies are installed: 61 | 62 | ```shell 63 | # As we are statically linking elfutils and zlib, we have to tell Rustc 64 | # where are they located. On my Ubuntu system they are under 65 | $ export RUSTFLAGS='-L /usr/lib/x86_64-linux-gnu' 66 | $ cargo build [--release] 67 | ``` 68 | 69 | The built binary can be found under `target/(debug|release)/py-perf`. 70 | 71 | ## Developing and troubleshooting 72 | 73 | Debug logs can be enabled with `RUST_LOG=debug`. The info subcommand, `py-perf info` shows the supported BPF features as well as other supported details. 74 | 75 | ## Stability 76 | 77 | `py-perf` is in active development and the CLI and APIs might change any time. 78 | 79 | ## Bugs 80 | 81 | If you encounter any bugs, feel free to open an issue on py-perf's [repo](https://github.com/kakkoyun/py-perf). 82 | 83 | ## Acknowledgments 84 | 85 | `py-perf` wouldn't be possible without all the open-source projects that we benefit from, such as [Rust](https://github.com/rust-lang), [rbperf](https://github.com/javierhonduco/rbperf), [py-spy](https://github.com/benfred/py-spy) and all the superb crates we use in this project, Python, the BPF ecosystem, and many others! 86 | 87 | ## License 88 | 89 | User-space code: Apache 2 90 | 91 | Kernel-space code (eBPF profiler): GNU General Public License, version 2 92 | 93 | #### TODO 94 | 95 | - TODO(kakkoyun): Add sections from parca-agent! 96 | - TODO(kakkoyun): Add reference to bcc, bcc/granulate and linux/tool examples from facebook. 97 | 98 | ## Features 99 | 100 | - Supports profiling Python processes running in Docker containers. Tested using official Python 101 | Docker images (`python:X.Y`). 102 | - Supports glibc- and musl-based environments. 103 | - Supports Python compiled in both PIE and non-PIE configurations. 104 | - Supports Python running standalone and as a library (linked with `libpythonX.Y`). 105 | 106 | ## Limitations 107 | 108 | - Architecture: x86_64. 109 | - Linux kernel version: oldest version tested is 4.14. Versions 4.11-4.14 may work. Required for 110 | `bpf_probe_read_str`. 111 | - BCC version: using BCC nightly is recommended. v0.17 is known to work. 112 | - Clang/LLVM: at least version 9. 113 | 114 | ## Overview 115 | 116 | PyPerf uses Linux's perf events subsystem to gather stack samples of running Python interpreters at 117 | a constant interval. Instead of capturing native execution stacks, PyPerf reads the information 118 | stored by the Python interpreter regarding the current state of execution. Unlike many existing 119 | tools however, the memory of the process is read from a kernel context. The advantages of this 120 | approach are mainly reduced system overhead and no intervention with the program being profiled. 121 | -------------------------------------------------------------------------------- /build.rs: -------------------------------------------------------------------------------- 1 | extern crate bindgen; 2 | 3 | use std::env; 4 | use std::path::PathBuf; 5 | 6 | use bindgen::callbacks::{DeriveInfo, ParseCallbacks}; 7 | use libbpf_cargo::{Error, SkeletonBuilder}; 8 | use std::fs::File; 9 | use std::io::Read; 10 | use std::io::Write; 11 | use std::path::Path; 12 | 13 | const PYTHON_STACK_SOURCE: &str = "./src/bpf/pyperf.bpf.c"; 14 | const PYTHON_STACK_HEADER: &str = "./src/bpf/pyperf.h"; 15 | const PYTHON_STACK_SKELETON: &str = "./src/bpf/pyperf.rs"; 16 | 17 | #[derive(Debug)] 18 | struct BuildCallbacks; 19 | 20 | impl ParseCallbacks for BuildCallbacks { 21 | fn add_derives(&self, derive_info: &DeriveInfo) -> Vec { 22 | if derive_info.name == "PythonVersionOffsets" || derive_info.name.starts_with("Py") { 23 | vec![ 24 | "Serialize".into(), 25 | "Deserialize".into(), 26 | "PartialEq".into(), 27 | "Eq".into(), 28 | "Hash".into(), 29 | ] 30 | } else if derive_info.name == "Stack" { 31 | vec!["PartialEq".into(), "Eq".into()] 32 | } else { 33 | vec![] 34 | } 35 | } 36 | 37 | // Copied from bindgen::CargoCallbacks, to tell cargo to invalidate 38 | // the built crate whenever any of the included header files changed. 39 | fn include_file(&self, filename: &str) { 40 | println!("cargo:rerun-if-changed={filename}"); 41 | } 42 | } 43 | 44 | fn main() { 45 | // The bindgen::Builder is the main entry point 46 | // to bindgen, and lets you build up options for 47 | // the resulting bindings. 48 | let bindings = bindgen::Builder::default() 49 | // The input header we would like to generate 50 | // bindings for. 51 | .header(PYTHON_STACK_HEADER) 52 | .derive_default(true) 53 | .parse_callbacks(Box::new(BuildCallbacks)) 54 | // Finish the builder and generate the bindings. 55 | .generate() 56 | // Unwrap the Result and panic on failure. 57 | .expect("Unable to generate bindings"); 58 | 59 | // Write the bindings to the $OUT_DIR/bindings.rs file. 60 | let out_path = PathBuf::from(env::var("OUT_DIR").unwrap()); 61 | let bindings_out_file = out_path.join("bindings.rs"); 62 | bindings 63 | .write_to_file(&bindings_out_file) 64 | .expect("Couldn't write bindings!"); 65 | 66 | // Add Serde includes. 67 | let mut contents = String::new(); 68 | File::open(&bindings_out_file) 69 | .unwrap() 70 | .read_to_string(&mut contents) 71 | .unwrap(); 72 | let new_contents = format!("use serde::{{Serialize, Deserialize}};\n{contents}"); 73 | File::create(&bindings_out_file) 74 | .unwrap() 75 | .write_all(new_contents.as_bytes()) 76 | .unwrap(); 77 | 78 | let skel = Path::new(PYTHON_STACK_SKELETON); 79 | match SkeletonBuilder::new() 80 | .source(PYTHON_STACK_SOURCE) 81 | .clang_args("-Wextra -Wall -Werror") 82 | .debug(true) 83 | .build_and_generate(skel) 84 | { 85 | Ok(_) => {} 86 | Err(err) => match err { 87 | Error::Build(msg) => { 88 | panic!("Error running SkeletonBuilder for py-perf = Build: {msg:?}"); 89 | } 90 | Error::Generate(msg) => { 91 | panic!("Error running SkeletonBuilder for py-perf = Generate: {msg:?}"); 92 | } 93 | }, 94 | } 95 | 96 | // Turn off some clippy warnings in the generated BPF skeleton. 97 | let mut contents = String::new(); 98 | File::open(skel) 99 | .unwrap() 100 | .read_to_string(&mut contents) 101 | .unwrap(); 102 | let new_contents = format!("#![allow(clippy::derive_partial_eq_without_eq)]\n{contents}"); 103 | File::create(skel) 104 | .unwrap() 105 | .write_all(new_contents.as_bytes()) 106 | .unwrap(); 107 | 108 | println!("cargo:rerun-if-changed={PYTHON_STACK_SOURCE}"); 109 | println!("cargo:rerun-if-changed={PYTHON_STACK_HEADER}"); 110 | } 111 | -------------------------------------------------------------------------------- /python.gdbinit: -------------------------------------------------------------------------------- 1 | # -*- ksh -*- 2 | # 3 | # If you use the GNU debugger gdb to debug the Python C runtime, you 4 | # might find some of the following commands useful. Copy this to your 5 | # ~/.gdbinit file and it'll get loaded into gdb automatically when you 6 | # start it up. Then, at the gdb prompt you can do things like: 7 | # 8 | # (gdb) pyo apyobjectptr 9 | # 10 | # refcounts: 1 11 | # address : 84a7a2c 12 | # $1 = void 13 | # (gdb) 14 | 15 | # Prints a representation of the object to stderr, along with the 16 | # number of reference counts it current has and the hex address the 17 | # object is allocated at. The argument must be a PyObject* 18 | define pyo 19 | print _PyObject_Dump($arg0) 20 | end 21 | 22 | # Prints a representation of the object to stderr, along with the 23 | # number of reference counts it current has and the hex address the 24 | # object is allocated at. The argument must be a PyGC_Head* 25 | define pyg 26 | print _PyGC_Dump($arg0) 27 | end 28 | 29 | # print the local variables of the current frame 30 | define pylocals 31 | set $_i = 0 32 | while $_i < f->f_code->co_nlocals 33 | if f->f_localsplus + $_i != 0 34 | set $_names = co->co_varnames 35 | set $_name = PyString_AsString(PyTuple_GetItem($_names, $_i)) 36 | printf "%s:\n", $_name 37 | # side effect of calling _PyObject_Dump is to dump the object's 38 | # info - assigning just prevents gdb from printing the 39 | # NULL return value 40 | set $_val = _PyObject_Dump(f->f_localsplus[$_i]) 41 | end 42 | set $_i = $_i + 1 43 | end 44 | end 45 | 46 | # A rewrite of the Python interpreter's line number calculator in GDB's 47 | # command language 48 | define lineno 49 | set $__continue = 1 50 | set $__co = f->f_code 51 | set $__lasti = f->f_lasti 52 | set $__sz = ((PyStringObject *)$__co->co_lnotab)->ob_size/2 53 | set $__p = (unsigned char *)((PyStringObject *)$__co->co_lnotab)->ob_sval 54 | set $__li = $__co->co_firstlineno 55 | set $__ad = 0 56 | while ($__sz-1 >= 0 && $__continue) 57 | set $__sz = $__sz - 1 58 | set $__ad = $__ad + *$__p 59 | set $__p = $__p + 1 60 | if ($__ad > $__lasti) 61 | set $__continue = 0 62 | end 63 | set $__li = $__li + *$__p 64 | set $__p = $__p + 1 65 | end 66 | printf "%d", $__li 67 | end 68 | 69 | # print the current frame - verbose 70 | define pyframev 71 | pyframe 72 | pylocals 73 | end 74 | 75 | define pyframe 76 | set $__fn = (char *)((PyStringObject *)co->co_filename)->ob_sval 77 | set $__n = (char *)((PyStringObject *)co->co_name)->ob_sval 78 | printf "%s (", $__fn 79 | lineno 80 | printf "): %s\n", $__n 81 | ### Uncomment these lines when using from within Emacs/XEmacs so it will 82 | ### automatically track/display the current Python source line 83 | # printf "%c%c%s:", 032, 032, $__fn 84 | # lineno 85 | # printf ":1\n" 86 | end 87 | 88 | ### Use these at your own risk. It appears that a bug in gdb causes it 89 | ### to crash in certain circumstances. 90 | 91 | #define up 92 | # up-silently 1 93 | # printframe 94 | #end 95 | 96 | #define down 97 | # down-silently 1 98 | # printframe 99 | #end 100 | 101 | define printframe 102 | if $pc > PyEval_EvalFrameEx && $pc < PyEval_EvalCodeEx 103 | pyframe 104 | else 105 | frame 106 | end 107 | end 108 | 109 | # Here's a somewhat fragile way to print the entire Python stack from gdb. 110 | # It's fragile because the tests for the value of $pc depend on the layout 111 | # of specific functions in the C source code. 112 | 113 | # Explanation of while and if tests: We want to pop up the stack until we 114 | # land in Py_Main (this is probably an incorrect assumption in an embedded 115 | # interpreter, but the test can be extended by an interested party). If 116 | # Py_Main <= $pc <= Py_GetArgcArv is true, $pc is in Py_Main(), so the while 117 | # tests succeeds as long as it's not true. In a similar fashion the if 118 | # statement tests to see if we are in PyEval_EvalFrameEx(). 119 | 120 | # Note: The name of the main interpreter function and the function which 121 | # follow it has changed over time. This version of pystack works with this 122 | # version of Python. If you try using it with older or newer versions of 123 | # the interpreter you may will have to change the functions you compare with 124 | # $pc. 125 | 126 | # print the entire Python call stack 127 | define pystack 128 | while $pc < Py_Main || $pc > Py_GetArgcArgv 129 | if $pc > PyEval_EvalFrameEx && $pc < PyEval_EvalCodeEx 130 | pyframe 131 | end 132 | up-silently 1 133 | end 134 | select-frame 0 135 | end 136 | 137 | # print the entire Python call stack - verbose mode 138 | define pystackv 139 | while $pc < Py_Main || $pc > Py_GetArgcArgv 140 | if $pc > PyEval_EvalFrameEx && $pc < PyEval_EvalCodeEx 141 | pyframev 142 | end 143 | up-silently 1 144 | end 145 | select-frame 0 146 | end 147 | 148 | # generally useful macro to print a Unicode string 149 | def pu 150 | set $uni = $arg0 151 | set $i = 0 152 | while (*$uni && $i++<100) 153 | if (*$uni < 0x80) 154 | print *(char*)$uni++ 155 | else 156 | print /x *(short*)$uni++ 157 | end 158 | end 159 | end 160 | -------------------------------------------------------------------------------- /scripts/download_and_build_static_libs.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Copyright (c) 2022 The rbperf authors 4 | # 5 | # This source code is licensed under the MIT license found in the 6 | # LICENSE file in the root directory of this source tree. 7 | set -o errexit nounset pipefail 8 | 9 | NPROC=$(nproc --all) 10 | ELFUTILS_VERSION="0.188" 11 | ELFUTILS_SHA_512="585551b2d937d19d1becfc2f28935db1dd1a3d25571a62f322b70ac8da98c1a741a55d070327705df6c3e2ee026652e0b9a3c733b050a0b0ec5f2fc75d5b74b5" 12 | 13 | ZLIB_VERSION="1.2.13" 14 | ZLIB_SHA256="b3a24de97a8fdbc835b9833169501030b8977031bcb54b3b3ac13740f846ab30" 15 | 16 | run() { 17 | "$@" 2>&1 18 | } 19 | 20 | mkdir -p target/static-libs 21 | mkdir -p target/static-libs/libz 22 | mkdir -p target/static-libs/elfutils 23 | STATIC_LIBS_OUT_PATH="${PWD}/target/static-libs" 24 | 25 | run pushd "${STATIC_LIBS_OUT_PATH}" 26 | 27 | # Notes: 28 | # * -fpic is not the same as -FPIC 29 | # https://gcc.gnu.org/onlinedocs/gcc/Code-Gen-Options.html 30 | # 31 | # * cflags required for clang to compile elfutils 32 | export CFLAGS="-fno-omit-frame-pointer -fpic -Wno-gnu-variable-sized-type-not-at-end -Wno-unused-but-set-parameter" 33 | export CC=clang 34 | 35 | echo "=> Building elfutils" 36 | run curl -L -O "https://sourceware.org/pub/elfutils/${ELFUTILS_VERSION}/elfutils-${ELFUTILS_VERSION}.tar.bz2" 37 | if ! sha512sum "elfutils-${ELFUTILS_VERSION}.tar.bz2" | grep -q "$ELFUTILS_SHA_512"; then 38 | echo "Checksum for elfutils doesn't match" 39 | exit 1 40 | fi 41 | 42 | run tar xjf "elfutils-${ELFUTILS_VERSION}.tar.bz2" 43 | 44 | run pushd "elfutils-${ELFUTILS_VERSION}" 45 | run ./configure --prefix="${STATIC_LIBS_OUT_PATH}/elfutils" --disable-debuginfod --disable-libdebuginfod 46 | 47 | run make "-j${NPROC}" 48 | run make install 49 | cp "${STATIC_LIBS_OUT_PATH}/elfutils/lib/libelf.a" "${STATIC_LIBS_OUT_PATH}" 50 | run popd 51 | 52 | echo "=> Building zlib" 53 | run curl -L -O "https://zlib.net/zlib-${ZLIB_VERSION}.tar.gz" 54 | if ! sha256sum "zlib-${ZLIB_VERSION}.tar.gz" | grep -q "$ZLIB_SHA256"; then 55 | echo "Checksum for zlib doesn't match" 56 | exit 1 57 | fi 58 | run tar xzf zlib-${ZLIB_VERSION}.tar.gz 59 | 60 | run pushd "zlib-${ZLIB_VERSION}" 61 | run ./configure --prefix="${STATIC_LIBS_OUT_PATH}/libz" >/dev/null 62 | run make "-j${NPROC}" >/dev/null 63 | run make install >/dev/null 64 | cp "${STATIC_LIBS_OUT_PATH}/libz/lib/libz.a" "${STATIC_LIBS_OUT_PATH}" 65 | run popd 66 | 67 | run popd 68 | -------------------------------------------------------------------------------- /src/arch.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2023 The rbperf authors 2 | // 3 | // This source code is licensed under the MIT license found in the 4 | // LICENSE file in the root directory of this source tree. 5 | 6 | #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] 7 | #[must_use] 8 | pub const fn is_x86() -> bool { 9 | true 10 | } 11 | 12 | #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))] 13 | pub const fn is_x86() -> bool { 14 | false 15 | } 16 | -------------------------------------------------------------------------------- /src/bindings/mod.rs: -------------------------------------------------------------------------------- 1 | #![allow(non_upper_case_globals)] 2 | #![allow(non_camel_case_types)] 3 | #![allow(non_snake_case)] 4 | #![allow(clippy::too_many_lines)] 5 | include!(concat!(env!("OUT_DIR"), "/bindings.rs")); 6 | -------------------------------------------------------------------------------- /src/bpf/.gitignore: -------------------------------------------------------------------------------- 1 | others.h 2 | -------------------------------------------------------------------------------- /src/bpf/basic_types.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | // 3 | // This source code is licensed under the MIT license found in the 4 | // LICENSE file in the root directory of this source tree. 5 | // 6 | // Copyright (c) 2022 The rbperf authors 7 | // Copyright (c) 2023 The py-perf authors 8 | 9 | typedef signed char __s8; 10 | typedef unsigned char __u8; 11 | typedef short int __s16; 12 | typedef short unsigned int __u16; 13 | typedef int __s32; 14 | typedef unsigned int __u32; 15 | typedef long long int __s64; 16 | typedef long long unsigned int __u64; 17 | 18 | typedef __s8 s8; 19 | typedef __u8 u8; 20 | typedef __s16 s16; 21 | typedef __u16 u16; 22 | typedef __s32 s32; 23 | typedef __u32 u32; 24 | typedef __s64 s64; 25 | typedef __u64 u64; 26 | 27 | typedef int pid_t; 28 | typedef unsigned int uid_t; 29 | typedef unsigned int gid_t; 30 | -------------------------------------------------------------------------------- /src/bpf/mod.rs: -------------------------------------------------------------------------------- 1 | #[allow(clippy::all)] 2 | #[allow(clippy::missing_const_for_fn)] 3 | #[allow(clippy::non_send_fields_in_send_ty)] 4 | pub mod pyperf; 5 | -------------------------------------------------------------------------------- /src/bpf/pyperf.bpf.c: -------------------------------------------------------------------------------- 1 | #include "pyperf.h" 2 | #include "vmlinux.h" 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | // 9 | // ╔═════════════════════════════════════════════════════════════════════════╗ 10 | // ║ Constants and Configuration ║ 11 | // ╚═════════════════════════════════════════════════════════════════════════╝ 12 | // 13 | const volatile bool verbose = false; 14 | 15 | #define MAX_STACK_DEPTH 127 16 | #define MAX_STACK_TRACES_ENTRIES 64000 17 | #define MAX_STACK_COUNTS_ENTRIES 10240 18 | 19 | // 20 | // ╔═════════════════════════════════════════════════════════════════════════╗ 21 | // ║ Type Definitions ║ 22 | // ╚═════════════════════════════════════════════════════════════════════════╝ 23 | // 24 | 25 | #define EVENT_COMM_LEN 16 26 | #define EVENT_MSG_LEN 256 27 | 28 | // TODO(kakkoyun): Remove or use! 29 | struct event { 30 | pid_t tid; 31 | pid_t pid; 32 | uid_t uid; 33 | u8 comm[EVENT_COMM_LEN]; 34 | int kernel_stack_id; 35 | int user_stack_id; 36 | u8 msg[EVENT_MSG_LEN]; 37 | }; 38 | 39 | // Dummy instance for skeleton to generate definition. 40 | struct event _event = {}; 41 | 42 | // 43 | // ╔═════════════════════════════════════════════════════════════════════════╗ 44 | // ║ Macros ║ 45 | // ╚═════════════════════════════════════════════════════════════════════════╝ 46 | // 47 | // TODO(kakkoyun): Remove cluttering abstractions. 48 | // TODO(kakkoyun): Remove macros. 49 | #define BPF_MAP(_name, _type, _key_type, _value_type, _max_entries) \ 50 | struct { \ 51 | __uint(type, _type); \ 52 | __uint(max_entries, _max_entries); \ 53 | __type(key, _key_type); \ 54 | __type(value, _value_type); \ 55 | } _name SEC(".maps"); 56 | 57 | #define BPF_HASH(_name, _key_type, _value_type, _max_entries) \ 58 | BPF_MAP(_name, BPF_MAP_TYPE_HASH, _key_type, _value_type, _max_entries); 59 | 60 | typedef u64 stack_trace_type[MAX_STACK_DEPTH]; 61 | #define BPF_STACK_TRACE(_name, _max_entries) \ 62 | BPF_MAP(_name, BPF_MAP_TYPE_STACK_TRACE, u32, stack_trace_type, _max_entries); 63 | 64 | // 65 | // ╔═════════════════════════════════════════════════════════════════════════╗ 66 | // ║ BPF Maps ║ 67 | // ╚═════════════════════════════════════════════════════════════════════════╝ 68 | // 69 | 70 | struct { 71 | __uint(type, BPF_MAP_TYPE_PROG_ARRAY); 72 | __uint(max_entries, 3); 73 | __type(key, u32); 74 | __type(value, u32); 75 | } programs SEC(".maps"); 76 | 77 | struct { 78 | __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); 79 | __uint(key_size, sizeof(u32)); 80 | __uint(value_size, sizeof(u32)); 81 | __uint(max_entries, 8192); 82 | } events SEC(".maps"); 83 | 84 | // struct { 85 | // __uint(type, BPF_MAP_TYPE_RINGBUF); 86 | // __uint(max_entries, 8192); 87 | // } events SEC(".maps"); 88 | 89 | struct { 90 | __uint(type, BPF_MAP_TYPE_HASH); 91 | __uint(max_entries, 4096); 92 | __type(key, pid_t); 93 | __type(value, ProcessInfo); 94 | } pid_to_process_info SEC(".maps"); 95 | 96 | struct { 97 | __uint(type, BPF_MAP_TYPE_HASH); 98 | __uint(max_entries, 10); 99 | __type(key, u32); 100 | __type(value, PythonVersionOffsets); 101 | } version_specific_offsets SEC(".maps"); 102 | 103 | // struct { 104 | // __uint(type, BPF_MAP_TYPE_HASH); 105 | // __uint(max_entries, 1); 106 | // __type(key, int); 107 | // __type(value, event); 108 | // } eventmap SEC(".maps"); 109 | 110 | // TODO(kakkoyun): Rename to sample! 111 | struct { 112 | __uint(type, BPF_MAP_TYPE_HASH); 113 | __uint(max_entries, 1); 114 | __type(key, int); 115 | __type(value, Stack); 116 | } stackmap SEC(".maps"); 117 | 118 | struct { 119 | __uint(type, BPF_MAP_TYPE_ARRAY); 120 | __uint(max_entries, 1); 121 | __type(key, u32); 122 | __type(value, u64); 123 | } symbol_index SEC(".maps"); 124 | 125 | struct { 126 | __uint(type, BPF_MAP_TYPE_HASH); 127 | __uint(max_entries, 64000); 128 | __type(key, Symbol); 129 | __type(value, int); 130 | } symbols SEC(".maps"); 131 | 132 | BPF_STACK_TRACE(stack_traces, MAX_STACK_TRACES_ENTRIES); 133 | BPF_HASH(stack_counts, stack_count_key_t, u64, MAX_STACK_COUNTS_ENTRIES); 134 | 135 | struct { 136 | __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); 137 | __uint(max_entries, 1); 138 | __type(key, u32); 139 | __type(value, State); 140 | } global_state SEC(".maps"); 141 | 142 | // 143 | // ╔═════════════════════════════════════════════════════════════════════════╗ 144 | // ║ Generic Helpers ║ 145 | // ╚═════════════════════════════════════════════════════════════════════════╝ 146 | // 147 | 148 | #define GET_STATE() \ 149 | int zero = 0; \ 150 | State *state = bpf_map_lookup_elem(&global_state, &zero); \ 151 | if (state == NULL) { \ 152 | return 0; \ 153 | } 154 | 155 | #define GET_OFFSETS() \ 156 | PythonVersionOffsets *offsets = bpf_map_lookup_elem(&version_specific_offsets, &state->process_info.py_version); \ 157 | if (offsets == NULL) { \ 158 | return 0; \ 159 | } 160 | 161 | #define LOG(fmt, ...) \ 162 | ({ \ 163 | if (verbose) { \ 164 | bpf_printk(fmt, ##__VA_ARGS__); \ 165 | } \ 166 | }) 167 | 168 | static __always_inline void *bpf_map_lookup_or_try_init(void *map, const void *key, const void *init) { 169 | void *val; 170 | long err; 171 | 172 | val = bpf_map_lookup_elem(map, key); 173 | if (val) { 174 | return val; 175 | } 176 | 177 | err = bpf_map_update_elem(map, key, init, BPF_NOEXIST); 178 | if (err) { 179 | LOG("[error] bpf_map_lookup_or_try_init with ret: %d", err); 180 | return 0; 181 | } 182 | 183 | return bpf_map_lookup_elem(map, key); 184 | } 185 | 186 | static inline __attribute__((__always_inline__)) int submit_sample(struct bpf_perf_event_data *ctx, State *state) { 187 | LOG("[stop]"); 188 | LOG(""); 189 | // bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, &event, sizeof(event)); 190 | bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, &state->sample, sizeof(state->sample)); 191 | // bpf_ringbuf_submit(stack, 0); 192 | // bpf_ringbuf_output(&events, &stack, sizeof(stack), 0); 193 | return 0; 194 | } 195 | 196 | // static inline __attribute__((__always_inline__)) int 197 | // submit_event(struct bpf_perf_event_data *ctx, struct event *event) { 198 | // bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, &event, sizeof(event)); 199 | // // bpf_ringbuf_submit(event, 0); 200 | // // bpf_ringbuf_output(&events, &event, sizeof(event), 0); 201 | // return 0; 202 | // } 203 | 204 | // 205 | // ╔═════════════════════════════════════════════════════════════════════════╗ 206 | // ║ Runtime Helpers ║ 207 | // ╚═════════════════════════════════════════════════════════════════════════╝ 208 | // 209 | 210 | // static inline __attribute__((__always_inline__)) void * 211 | // get_interpreter(ProcessInfo *process_info) { 212 | 213 | // } 214 | 215 | // static inline __attribute__((__always_inline__)) void * 216 | // get_thread_state(ProcessInfo *process_info) { 217 | // } 218 | 219 | // 220 | // ╔═════════════════════════════════════════════════════════════════════════╗ 221 | // ║ BPF Programs ║ 222 | // ╚═════════════════════════════════════════════════════════════════════════╝ 223 | // 224 | SEC("perf_event") 225 | int on_event(struct bpf_perf_event_data *ctx) { 226 | u64 pid_tgid = bpf_get_current_pid_tgid(); 227 | pid_t pid = pid_tgid >> 32; 228 | pid_t tid = pid_tgid; 229 | 230 | if (pid == 0) { 231 | return 0; 232 | } 233 | 234 | ProcessInfo *process_info = bpf_map_lookup_elem(&pid_to_process_info, &pid); 235 | if (!process_info) { 236 | return 0; 237 | } 238 | 239 | LOG("[start]"); 240 | LOG("[event] pid=%d tid=%d", pid, tid); 241 | 242 | if (process_info->thread_state_addr == 0) { 243 | LOG("[error] process_info.thread_state_addr was NULL"); 244 | return 0; 245 | } 246 | 247 | // TODO(kakkoyun): Do or do not there is no try! 248 | // struct event *event = bpf_ringbuf_reserve(&events, sizeof(sample), 0); 249 | 250 | GET_STATE(); 251 | // Reset state. 252 | state->process_info = (ProcessInfo){0}; 253 | state->process_info = *process_info; 254 | // state->interpreter = 0; 255 | state->thread_state = 0; 256 | 257 | // state->base_stack = 0; 258 | // state->cfp = 0; 259 | state->frame_ptr = 0; 260 | state->stack_walker_prog_call_count = 0; 261 | 262 | state->sample = (Sample){0}; 263 | state->sample.timestamp = bpf_ktime_get_ns(); 264 | state->sample.tid = tid; 265 | state->sample.pid = pid; 266 | state->sample.cpu = bpf_get_smp_processor_id(); 267 | bpf_get_current_comm(&state->sample.comm, sizeof(state->sample.comm)); 268 | state->sample.native_stack_count_key = (stack_count_key_t){ 269 | .pid = pid, 270 | .tid = tid, 271 | .kernel_stack_id = bpf_get_stackid(ctx, &stack_traces, 0), 272 | .user_stack_id = bpf_get_stackid(ctx, &stack_traces, BPF_F_USER_STACK), 273 | }; 274 | state->sample.stack_status = STACK_ERROR; 275 | state->sample.error_code = ERROR_NONE; 276 | 277 | state->sample.stack = (Stack){0}; 278 | state->sample.stack.len = 0; 279 | __builtin_memset((void *)state->sample.stack.frames, 0, sizeof(state->sample.stack.frames)); 280 | 281 | u64 *scount = bpf_map_lookup_or_try_init(&stack_counts, &state->sample.native_stack_count_key, &zero); 282 | if (scount) { 283 | __sync_fetch_and_add(scount, 1); 284 | } 285 | 286 | // Fetch interpreter head. 287 | 288 | // LOG("process_info->interpreter_addr 0x%llx", process_info->interpreter_addr); 289 | // bpf_probe_read_user(&state->interpreter, 290 | // sizeof(state->interpreter), 291 | // (void *)(long)process_info->interpreter_addr); 292 | // LOG("interpreter 0x%llx", state->interpreter); 293 | 294 | // Fetch thread state. 295 | 296 | // GDB: ((PyThreadState *)_PyRuntime.gilstate.tstate_current) 297 | bpf_probe_read_user(&state->thread_state, sizeof(state->thread_state), 298 | (void *)(long)process_info->thread_state_addr); 299 | LOG("process_info->thread_state_addr 0x%llx", process_info->thread_state_addr); 300 | LOG("thread_state 0x%llx", state->thread_state); 301 | 302 | // Read PyThreadState of this Thread from TLS. 303 | // void *thread_state = get_thread_state(tls_base, process_info); 304 | // if (!thread_state) { 305 | // LOG("[error] thread_state was NULL"); 306 | // goto submit_event; 307 | // } 308 | 309 | // TODO(kakkoyun): THREAD STATE MATCH. 310 | // Check for matching between TLS PyThreadState and 311 | // the global _PyThreadState_Current. 312 | // event->thread_state_match = 313 | // get_thread_state_match(thread_state, thread_state_current); 314 | 315 | // Read pthread ID of this Thread from TLS. 316 | 317 | // TODO(kakkoyun): Add function to get tls_base/fs_base. 318 | struct task_struct *task = (struct task_struct *)bpf_get_current_task(); 319 | // This changes depending on arch and kernel version. 320 | // task->thread.fs, task->thread.tp_value, etc. 321 | long unsigned int tls_base = BPF_CORE_READ(task, thread.fsbase); 322 | LOG("tls_base 0x%llx", (void *)tls_base); 323 | 324 | GET_OFFSETS(); 325 | 326 | // s64 thread_id; 327 | void *pthread_self, *pthread_created; 328 | bpf_probe_read_user(&pthread_created, sizeof(pthread_created), 329 | state->thread_state + offsets->py_thread_state.thread_id); 330 | if (pthread_created == 0) { 331 | LOG("[error] pthread_created was NULL"); 332 | goto submit_event; 333 | } 334 | LOG("pthread_created 0x%llx", pthread_created); 335 | // For __x86_64__, GLIBC 336 | // 0x10 = offsetof(struct pthread, header.self) 337 | // 0x10 = offsetof(tcbhead_t, self) 338 | bpf_probe_read_user(&pthread_self, sizeof(pthread_self), (void *)tls_base + 0x10); 339 | if (pthread_self == 0) { 340 | LOG("[error] pthread_self was NULL"); 341 | goto submit_event; 342 | } 343 | LOG("pthread_self 0x%llx", pthread_self); 344 | 345 | // TODO(kakkoyun): PTHREAD ID MATCH. 346 | // Check for matching between pthread ID created current PyThreadState and 347 | // pthread of actual current pthread. 348 | // event->pthread_id_match = 349 | // get_pthread_id_match(thread_state, tls_base, pid_data); 350 | 351 | // TODO(kakkoyun): GIL. 352 | // p (PyThreadState *)PyThread_tss_get(&_PyRuntime.gilstate.autoTSSkey) 353 | // // Read GIL state 354 | // event->gil_state = 355 | // get_gil_state(thread_state, thread_state_current, pid_data); 356 | 357 | // TODO(kakkoyun): FRAME POINTER. 358 | if (state->thread_state == 0) { 359 | LOG("[error] thread_state was NULL"); 360 | goto submit_event; 361 | } 362 | 363 | // TODO(kakkoyun): Better to check version. 364 | // Get pointer to top frame from PyThreadState. 365 | if (offsets->py_thread_state.frame > -1) { 366 | // TODO(kakkoyun): Maybe do this in user-space?! 367 | bpf_probe_read_user(&state->frame_ptr, sizeof(void *), state->thread_state + offsets->py_thread_state.frame); 368 | } else { 369 | void *cframe; 370 | bpf_probe_read_user(&cframe, sizeof(cframe), (void *)(state->thread_state + offsets->py_thread_state.cframe)); 371 | if (cframe == 0) { 372 | LOG("[error] cframe was NULL"); 373 | state->sample.error_code = ERROR_TSTATE_CFRAME_IS_NULL; 374 | goto submit_event; 375 | } 376 | LOG("cframe 0x%llx", cframe); 377 | 378 | bpf_probe_read_user(&state->frame_ptr, sizeof(state->frame_ptr), 379 | (void *)(cframe + offsets->py_cframe.current_frame)); 380 | } 381 | if (state->frame_ptr == 0) { 382 | LOG("[error] frame_ptr was NULL"); 383 | state->sample.error_code = ERROR_EMPTY_STACK; 384 | goto submit_event; 385 | } 386 | 387 | LOG("frame_ptr 0x%llx", state->frame_ptr); 388 | bpf_tail_call(ctx, &programs, PYPERF_STACK_WALKING_PROGRAM_IDX); 389 | // bpf_tail_call(ctx, &programs, PYPERF_THREAD_STATE_PROGRAM_IDX); 390 | // This will never be executed. 391 | 392 | submit_event: 393 | // TODO(kakkoyun): To tag or not to tag?! 394 | submit_sample(ctx, state); 395 | return 0; 396 | } 397 | 398 | static inline __attribute__((__always_inline__)) u64 get_symbol_id(Symbol *sym) { 399 | int *symbol_id_ptr = bpf_map_lookup_elem(&symbols, sym); 400 | if (symbol_id_ptr) { 401 | return *symbol_id_ptr; 402 | } 403 | 404 | u32 zero = 0; 405 | u64 *sym_idx = bpf_map_lookup_elem(&symbol_index, &zero); 406 | if (sym_idx == NULL) { 407 | // Appease the verifier, this will never fail. 408 | return 0; 409 | } 410 | 411 | u64 idx = __sync_fetch_and_add(sym_idx, 1); 412 | int err; 413 | err = bpf_map_update_elem(&symbols, sym, &idx, BPF_ANY); 414 | if (err) { 415 | LOG("[error] symbols failed with %d", err); 416 | } 417 | return idx; 418 | } 419 | 420 | // TODO(kakkoyun): 421 | // ! Improve this function. 422 | // * Add error handling. 423 | // * Make sure we don't miss an edge case. 424 | static inline __attribute__((__always_inline__)) void read_symbol(PythonVersionOffsets *offsets, void *cur_frame, 425 | void *code_ptr, Symbol *symbol) { 426 | // Figure out if we want to parse class name, basically checking the name of 427 | // the first argument. 428 | // If it's 'self', we get the type and it's name, if it's cls, we just get 429 | // the name. This is not perfect but there is no better way to figure this 430 | // out from the code object. 431 | // Everything we do in this function is best effort, we don't want to fail 432 | // the program if we can't read something. 433 | 434 | // GDB: ((PyTupleObject*)$frame->f_code->co_varnames)->ob_item[0] 435 | void *args_ptr; 436 | bpf_probe_read_user(&args_ptr, sizeof(void *), code_ptr + offsets->py_code_object.co_varnames); 437 | bpf_probe_read_user(&args_ptr, sizeof(void *), args_ptr + offsets->py_tuple_object.ob_item); 438 | bpf_probe_read_user_str(&symbol->func, sizeof(symbol->func), args_ptr + offsets->py_string.data); 439 | 440 | // Compare strings as ints to save instructions. 441 | char self_str[4] = {'s', 'e', 'l', 'f'}; 442 | char cls_str[4] = {'c', 'l', 's', '\0'}; 443 | bool first_self = *(s32 *)symbol->func == *(s32 *)self_str; 444 | bool first_cls = *(s32 *)symbol->func == *(s32 *)cls_str; 445 | 446 | // GDB: $frame->f_localsplus[0]->ob_type->tp_name. 447 | if (first_self || first_cls) { 448 | void *ptr; 449 | bpf_probe_read_user(&ptr, sizeof(void *), cur_frame + offsets->py_frame_object.f_localsplus); 450 | if (first_self) { 451 | // We are working with an instance, first we need to get type. 452 | bpf_probe_read_user(&ptr, sizeof(void *), ptr + offsets->py_object.ob_type); 453 | } 454 | bpf_probe_read_user(&ptr, sizeof(void *), ptr + offsets->py_type_object.tp_name); 455 | bpf_probe_read_user_str(&symbol->class, sizeof(symbol->class), ptr); 456 | } 457 | 458 | void *pystr_ptr; 459 | 460 | // GDB: $frame->f_code->co_filename 461 | bpf_probe_read_user(&pystr_ptr, sizeof(void *), code_ptr + offsets->py_code_object.co_filename); 462 | bpf_probe_read_user_str(&symbol->file, sizeof(symbol->file), pystr_ptr + offsets->py_string.data); 463 | 464 | // GDB: $frame->f_code->co_name 465 | bpf_probe_read_user(&pystr_ptr, sizeof(void *), code_ptr + offsets->py_code_object.co_name); 466 | bpf_probe_read_user_str(&symbol->func, sizeof(symbol->func), pystr_ptr + offsets->py_string.data); 467 | 468 | // GDB: $frame->f_code->co_firstlineno 469 | bpf_probe_read_user(&symbol->line, sizeof(symbol->line), code_ptr + offsets->py_code_object.co_firstlineno); 470 | } 471 | 472 | // TODO(kakkoyun): Decide. 473 | // #define FAIL_COMPILATION_IF(condition) \ 474 | // typedef struct { \ 475 | // char _condition_check[1 - 2 * !!(condition)]; \ 476 | // } STR_CONCAT(compile_time_condition_check, __COUNTER__); 477 | 478 | // FAIL_COMPILATION_IF(sizeof(Symbol) == sizeof(struct bpf_perf_event_value)) 479 | 480 | static inline __attribute__((__always_inline__)) void reset_symbol(Symbol *sym) { 481 | __builtin_memset((void *)sym, 0, sizeof(Symbol)); 482 | 483 | // We re-use the same Symbol instance across loop iterations, which means 484 | // we will have left-over data in the struct. Although this won't affect 485 | // correctness of the result because we have '\0' at end of the strings read, 486 | // it would affect effectiveness of the deduplication. 487 | // Helper bpf_perf_prog_read_value clears the buffer on error, so here we 488 | // (ab)use this behavior to clear the memory. It requires the size of Symbol 489 | // to be different from struct bpf_perf_event_value, which we check at 490 | // compilation time using the FAIL_COMPILATION_IF macro. 491 | // bpf_perf_prog_read_value(ctx, (struct bpf_perf_event_value *)sym, sizeof(Symbol)); 492 | 493 | // sym->fn[0] = '\0'; 494 | // sym->class[0] = '\0'; 495 | // sym->file[0] = '\0'; 496 | } 497 | 498 | SEC("perf_event") 499 | int walk_python_stack(struct bpf_perf_event_data *ctx) { 500 | GET_STATE(); 501 | GET_OFFSETS(); 502 | 503 | LOG("[start] walk_python_stack"); 504 | state->stack_walker_prog_call_count++; 505 | Sample *sample = &state->sample; 506 | 507 | // TODO(kakkoyun): Remove after testing. 508 | int frame_count = 0; 509 | #pragma unroll 510 | for (int i = 0; i < PYTHON_STACK_FRAMES_PER_PROG; i++) { 511 | void *cur_frame = state->frame_ptr; 512 | if (!cur_frame) { 513 | break; 514 | } 515 | 516 | // Read the code pointer. PyFrameObject.f_code 517 | void *cur_code_ptr; 518 | bpf_probe_read_user(&cur_code_ptr, sizeof(cur_code_ptr), state->frame_ptr + offsets->py_frame_object.f_code); 519 | if (!cur_code_ptr) { 520 | LOG("[error] bpf_probe_read_user failed"); 521 | break; 522 | } 523 | 524 | LOG("frame %d", frame_count); 525 | LOG("cur_frame_ptr 0x%llx", cur_frame); 526 | LOG("cur_code_ptr 0x%llx", cur_code_ptr); 527 | 528 | Symbol sym = (Symbol){0}; 529 | reset_symbol(&sym); 530 | 531 | // Read symbol information from the code object if possible. 532 | read_symbol(offsets, cur_frame, cur_code_ptr, &sym); 533 | 534 | LOG("sym.file %s", sym.file); 535 | LOG("sym.class %s", sym.class); 536 | LOG("sym.fn %s", sym.func); 537 | LOG("sym.line %d", sym.line); 538 | 539 | u32 symbol_id = get_symbol_id(&sym); 540 | s64 cur_len = sample->stack.len; 541 | if (cur_len >= 0 && cur_len < STACK_MAX_LEN) { 542 | LOG("stack->frames[%d] = %d", cur_len, symbol_id); 543 | sample->stack.frames[cur_len] = symbol_id; 544 | sample->stack.len++; 545 | } 546 | frame_count++; 547 | 548 | bpf_probe_read_user(&state->frame_ptr, sizeof(state->frame_ptr), cur_frame + offsets->py_frame_object.f_back); 549 | if (!state->frame_ptr) { 550 | // There aren't any frames to read. We are done. 551 | goto complete; 552 | } 553 | } 554 | LOG("[iteration] frame_count %d", frame_count); 555 | 556 | LOG("state->stack_walker_prog_call_count %d", state->stack_walker_prog_call_count); 557 | if (state->stack_walker_prog_call_count < PYTHON_STACK_PROG_CNT) { 558 | LOG("[continue] walk_python_stack"); 559 | bpf_tail_call(ctx, &programs, PYPERF_STACK_WALKING_PROGRAM_IDX); 560 | state->sample.error_code = ERROR_CALL_FAILED; 561 | goto submit; 562 | } 563 | 564 | LOG("[error] walk_python_stack TRUNCATED"); 565 | LOG("[truncated] walk_python_stack, stack_len=%d", sample->stack.len); 566 | state->sample.error_code = ERROR_NONE; 567 | state->sample.stack_status = STACK_TRUNCATED; 568 | goto submit; 569 | 570 | complete: 571 | LOG("[complete] walk_python_stack, stack_len=%d", sample->stack.len); 572 | state->sample.error_code = ERROR_NONE; 573 | state->sample.stack_status = STACK_COMPLETE; 574 | submit: 575 | LOG("[stop] walk_python_stack"); 576 | submit_sample(ctx, state); 577 | return 0; 578 | } 579 | 580 | // 581 | // ╔═════════════════════════════════════════════════════════════════════════╗ 582 | // ║ Metadata ║ 583 | // ╚═════════════════════════════════════════════════════════════════════════╝ 584 | // 585 | #define KBUILD_MODNAME "py-perf" 586 | volatile const char bpf_metadata_name[] SEC(".rodata") = "py-perf (https://github.com/kakkoyun/py-perf)"; 587 | unsigned int VERSION SEC("version") = 1; 588 | char LICENSE[] SEC("license") = "GPL"; 589 | -------------------------------------------------------------------------------- /src/bpf/pyperf.h: -------------------------------------------------------------------------------- 1 | #include "basic_types.h" 2 | 3 | #define PYPERF_STACK_WALKING_PROGRAM_IDX 0 4 | // #define PYPERF_THREAD_STATE_PROGRAM_IDX 1 5 | 6 | // Maximum Python stack frames: 16x5 = 80 7 | #define PYTHON_STACK_FRAMES_PER_PROG 16 8 | #define PYTHON_STACK_PROG_CNT 5 9 | #define STACK_MAX_LEN (PYTHON_STACK_FRAMES_PER_PROG * PYTHON_STACK_PROG_CNT) 10 | // rbperf 11 | // #define MAX_STACKS_PER_PROGRAM 30 12 | // #define BPF_PROGRAMS_COUNT 25 13 | // #define MAX_STACK (MAX_STACKS_PER_PROGRAM * BPF_PROGRAMS_COUNT) 14 | 15 | typedef struct { 16 | s64 ob_type; 17 | } PyObject; 18 | 19 | typedef struct { 20 | s64 data; 21 | s64 size; 22 | } PyString; 23 | 24 | typedef struct { 25 | s64 tp_name; 26 | } PyTypeObject; 27 | 28 | typedef struct { 29 | s64 next; 30 | s64 interp; 31 | s64 frame; 32 | s64 thread_id; 33 | s64 native_thread_id; 34 | 35 | s64 cframe; 36 | } PyThreadState; 37 | 38 | typedef struct { 39 | // since Python 3.11 this structure holds pointer to target FrameObject. 40 | s64 current_frame; 41 | } PyCFrame; 42 | 43 | typedef struct { 44 | s64 tstate_head; 45 | } PyInterpreterState; 46 | 47 | typedef struct { 48 | s64 interp_main; 49 | } PyRuntimeState; 50 | 51 | typedef struct { 52 | s64 f_back; 53 | s64 f_code; 54 | s64 f_lineno; 55 | s64 f_localsplus; 56 | } PyFrameObject; 57 | 58 | typedef struct { 59 | s64 co_filename; 60 | s64 co_name; 61 | s64 co_varnames; 62 | s64 co_firstlineno; 63 | } PyCodeObject; 64 | 65 | typedef struct { 66 | s64 ob_item; 67 | } PyTupleObject; 68 | 69 | // Offsets of structures across different Python versions: 70 | 71 | // For the most part, these fields are named after their corresponding structures in Python. 72 | // They are depicted as structures with 64-bit offset fields named after the requisite fields in the original structure. 73 | // However, there are some deviations: 74 | // 1. String - The offsets target the Python string object structure. 75 | // - Owing to the varying representation of strings across versions, which depends on encoding and interning, 76 | // the field names don't match those of a specific structure. Here, `data` is the offset pointing to the string's 77 | // first character, while `size` indicates the offset to the 32-bit integer that denotes the string's byte length 78 | // (not the character count). 79 | // 2. PyRuntimeState.interp_main - This aligns with the offset of (_PyRuntimeState, interpreters.main). 80 | // 3. PyThreadState.thread - In certain Python versions, this field is referred to as "thread_id". 81 | typedef struct { 82 | u32 major_version; 83 | u32 minor_version; 84 | u32 patch_version; 85 | 86 | PyObject py_object; 87 | PyString py_string; 88 | PyTypeObject py_type_object; 89 | PyThreadState py_thread_state; 90 | PyCFrame py_cframe; 91 | PyInterpreterState py_interpreter_state; 92 | PyRuntimeState py_runtime_state; 93 | PyFrameObject py_frame_object; 94 | PyCodeObject py_code_object; 95 | PyTupleObject py_tuple_object; 96 | } PythonVersionOffsets; 97 | 98 | typedef struct { 99 | // u64 start_time; 100 | 101 | // u64 constant_buffer_addr; 102 | u64 interpreter_addr; 103 | u64 thread_state_addr; 104 | 105 | // TODO(kakkoyun): Try to obtain information in the runtime. 106 | // u64 tls_key_addr; // virtual address of autoTLSkey for pthreads TLS 107 | // u64 gil_locked_addr; // virtual address of gil_locked 108 | // u64 gil_last_holder_addr; // virtual address of gil_last_holder 109 | 110 | u32 py_version; 111 | } ProcessInfo; 112 | 113 | enum error_code { 114 | ERROR_NONE = 0, 115 | 116 | ERROR_MISSING_PYSTATE = 1, 117 | ERROR_THREAD_STATE_NULL = 2, 118 | ERROR_INTERPRETER_NULL = 3, 119 | 120 | ERROR_TOO_MANY_THREADS = 4, 121 | ERROR_THREAD_STATE_NOT_FOUND = 5, 122 | ERROR_EMPTY_STACK = 6, 123 | 124 | // ERROR_FRAME_CODE_IS_NULL = 7, 125 | ERROR_BAD_FSBASE = 8, 126 | ERROR_INVALID_PTHREADS_IMPL = 9, 127 | ERROR_THREAD_STATE_HEAD_NULL = 10, 128 | ERROR_BAD_THREAD_STATE = 11, 129 | ERROR_CALL_FAILED = 12, 130 | ERROR_TSTATE_CFRAME_IS_NULL = 13, 131 | }; 132 | 133 | enum stack_status { 134 | STACK_COMPLETE = 0, 135 | STACK_TRUNCATED = 1, 136 | STACK_ERROR = 2, 137 | }; 138 | 139 | // enum gil_state { 140 | // GIL_STATE_NO_INFO = 0, 141 | // GIL_STATE_ERROR = 1, 142 | // GIL_STATE_UNINITIALIZED = 2, 143 | // GIL_STATE_NOT_LOCKED = 3, 144 | // GIL_STATE_THIS_THREAD = 4, 145 | // GIL_STATE_GLOBAL_CURRENT_THREAD = 5, 146 | // GIL_STATE_OTHER_THREAD = 6, 147 | // GIL_STATE_NULL = 7, 148 | // }; 149 | 150 | enum thread_state { 151 | THREAD_STATE_UNKNOWN = 0, 152 | THREAD_STATE_MATCH = 1, 153 | THREAD_STATE_MISMATCH = 2, 154 | THREAD_STATE_THIS_THREAD_NULL = 3, 155 | THREAD_STATE_GLOBAL_CURRENT_THREAD_NULL = 4, 156 | THREAD_STATE_BOTH_NULL = 5, 157 | }; 158 | 159 | // enum pthread_id_match { 160 | // PTHREAD_ID_UNKNOWN = 0, 161 | // PTHREAD_ID_MATCH = 1, 162 | // PTHREAD_ID_MISMATCH = 2, 163 | // PTHREAD_ID_THREAD_STATE_NULL = 3, 164 | // PTHREAD_ID_NULL = 4, 165 | // PTHREAD_ID_ERROR = 5, 166 | // }; 167 | 168 | #define COMM_LEN 16 169 | #define CLASS_NAME_LEN 32 170 | #define FUNCTION_NAME_LEN 64 171 | #define FILE_NAME_LEN 128 172 | 173 | typedef struct { 174 | char file[FILE_NAME_LEN]; 175 | char class[CLASS_NAME_LEN]; 176 | char func[FUNCTION_NAME_LEN]; 177 | u32 line; 178 | } Symbol; 179 | 180 | // TODO(kakkoyun): Any useful fields could be moved to the Stack? 181 | // struct event { 182 | // u32 stack_len; 183 | // s32 stack[STACK_MAX_LEN]; 184 | 185 | // u64 user_ip; 186 | // u64 user_sp; 187 | 188 | // u32 user_stack_len; 189 | // uint8_t raw_user_stack[__USER_STACKS_PAGES__ * PAGE_SIZE]; 190 | // #define FRAME_CODE_IS_NULL 0x80000001 191 | // }; 192 | 193 | typedef struct { 194 | s16 len; 195 | u32 frames[STACK_MAX_LEN]; 196 | } Stack; 197 | 198 | typedef struct { 199 | pid_t pid; 200 | pid_t tid; 201 | int user_stack_id; 202 | int kernel_stack_id; 203 | } stack_count_key_t; 204 | 205 | typedef struct { 206 | u64 timestamp; 207 | u32 cpu; 208 | pid_t pid; 209 | pid_t tid; 210 | u8 comm[COMM_LEN]; 211 | 212 | stack_count_key_t native_stack_count_key; 213 | 214 | enum stack_status stack_status; 215 | enum error_code error_code; 216 | 217 | // TODO(kakkoyun): Clean up 218 | // bool thread_current; 219 | // enum gil_state gil_state; 220 | // bool pthread_match; 221 | // enum pthread_id_match pthread_id_match; 222 | 223 | // TODO(kakkoyun): Shall we utilize this? 224 | // Stack related! 225 | // long long int size; 226 | // long long int expected_size; 227 | 228 | // int has_meta; 229 | // int metadata; 230 | // char dummy_safeguard; 231 | 232 | Stack stack; 233 | } Sample; 234 | 235 | typedef struct { 236 | ProcessInfo process_info; 237 | 238 | // void *interpreter; 239 | void *thread_state; 240 | 241 | // u64 current_thread_id; 242 | // int thread_state_prog_call_count; 243 | 244 | // TODO(kakkoyun): Unify naming. 245 | // TODO(kakkoyun): FrameData? FrameInfo? 246 | // u64 base_stack; // TODO(kakkoyun): Where to find it? sp? 247 | // u64 cfp; 248 | // u64 sp; 249 | // u64 pc; 250 | // u64 bp; 251 | void *frame_ptr; 252 | int stack_walker_prog_call_count; 253 | 254 | Sample sample; 255 | } State; 256 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | #![deny(clippy::all)] 2 | #![warn(clippy::pedantic)] 3 | #![warn(clippy::nursery)] 4 | #![warn(clippy::cargo)] 5 | #![warn(clippy::perf)] 6 | pub mod arch; 7 | pub mod bindings; 8 | pub mod py_perf; 9 | pub mod python_versions; 10 | 11 | mod bpf; 12 | mod perf_event; 13 | mod process_info; 14 | mod profile; 15 | mod python_readers; 16 | -------------------------------------------------------------------------------- /src/main.rs: -------------------------------------------------------------------------------- 1 | #![deny(clippy::all)] 2 | #![warn(clippy::pedantic)] 3 | #![warn(clippy::nursery)] 4 | #![warn(clippy::cargo)] 5 | #![warn(clippy::perf)] 6 | 7 | use chrono::{DateTime, Utc}; 8 | use log::{debug, error, info, trace}; 9 | use std::fs::File; 10 | use std::process::exit; 11 | use std::time::Duration; 12 | 13 | use anyhow::{anyhow, Error, Result}; 14 | use clap::{Parser, ValueEnum}; 15 | use crossbeam::channel::{unbounded, Receiver}; 16 | use env_logger::Env; 17 | use nix::sys::utsname::uname; 18 | use nix::unistd::Uid; 19 | 20 | use py_perf::arch; 21 | use py_perf::py_perf::PyPerf; 22 | 23 | #[derive(ValueEnum, Copy, Clone, Debug)] 24 | enum OutputType { 25 | Pprof, 26 | Flamegraph, 27 | Folded, 28 | } 29 | 30 | #[derive(Parser, Debug)] 31 | struct InfoSubcommand {} 32 | 33 | #[derive(Parser, Debug)] 34 | struct RecordSubcommand { 35 | /// Python process IDs to profile. 36 | #[clap(short, long)] 37 | pid: i32, 38 | /// Profiling duration to use. 39 | #[clap(short, long, default_value = "10s")] 40 | duration: Option, 41 | /// The frequency at which profiling data is collected. e.g., 19 samples per second. 42 | #[clap(long, short = 'q', default_value = "19")] 43 | frequency: Option, 44 | /// The output format to use. 45 | /// Valid values are: `pprof`, `flamegraph` and `folded`. 46 | /// The default value is `pprof`. 47 | #[clap(short, long, default_value = "pprof")] 48 | format: Option, 49 | } 50 | 51 | #[derive(clap::Subcommand, Debug)] 52 | enum Command { 53 | /// Record profiles from a running process. 54 | Record(RecordSubcommand), 55 | /// Print information about host. 56 | Info(InfoSubcommand), 57 | } 58 | 59 | #[derive(Parser, Debug)] 60 | #[clap(version, about, long_about = None)] 61 | struct Arguments { 62 | #[clap(subcommand)] 63 | subcmd: Command, 64 | } 65 | 66 | fn main() { 67 | if !arch::is_x86() { 68 | error!("py-perf only supports x86/x86_64 architectures for now."); 69 | } 70 | 71 | let env = Env::default().default_filter_or("info"); 72 | env_logger::Builder::from_env(env) 73 | .format_timestamp_nanos() 74 | .init(); 75 | debug!("debug mode enabled!"); 76 | 77 | if let Err(err) = run() { 78 | error!("error: {}", err); 79 | for (i, suberror) in err.chain().enumerate() { 80 | if i > 0 { 81 | error!("cause: {}", suberror); 82 | } 83 | } 84 | std::process::exit(1); 85 | } 86 | } 87 | 88 | fn ctrlc_channel() -> Result, Error> { 89 | let (sender, receiver) = unbounded(); 90 | ctrlc::set_handler(move || { 91 | trace!("signal handler is called"); 92 | sender.send(()).expect("could not send signal on channel."); 93 | })?; 94 | 95 | Ok(receiver) 96 | } 97 | 98 | fn run() -> Result<()> { 99 | let args = Arguments::parse(); 100 | match args.subcmd { 101 | Command::Info(_) => { 102 | if !Uid::current().is_root() { 103 | return Err(anyhow!( 104 | "py-perf requires root to load and run BPF programs" 105 | )); 106 | } 107 | 108 | let info = info()?; 109 | println!("System info"); 110 | println!("-----------"); 111 | println!("Kernel release: {}", info.system.os_release); 112 | println!("DebugFS mounted: {}", info.system.debug_fs); 113 | println!(); 114 | } 115 | 116 | Command::Record(record) => { 117 | if !Uid::current().is_root() { 118 | return Err(anyhow!( 119 | "py-perf requires root to load and run BPF programs" 120 | )); 121 | } 122 | 123 | let mut py_perf = PyPerf::new( 124 | Duration::from_millis(u64::try_from(record.duration.unwrap().as_millis())?), 125 | record.frequency.unwrap(), 126 | )?; 127 | 128 | if record.pid == 0 { 129 | error!("at least one PID must be given"); 130 | exit(1); 131 | } 132 | 133 | py_perf.record(record.pid)?; 134 | info!("py-perf is started!"); 135 | let profile = py_perf.start(&ctrlc_channel().unwrap())?; 136 | info!("py-perf is stopped!"); 137 | 138 | let now: DateTime = Utc::now(); 139 | let name_suffix = now.format("%m%d%Y_%Hh%Mm%Ss"); 140 | 141 | let report = profile.report()?; 142 | match record.format.unwrap() { 143 | OutputType::Pprof => { 144 | let path = format!("py-perf_{name_suffix}_profile.pb"); 145 | let f = File::create(&path).unwrap(); 146 | report.pprof(f)? 147 | } 148 | OutputType::Flamegraph => { 149 | let path = format!("py-perf_{name_suffix}_flamegraph.svg"); 150 | let f = File::create(&path).unwrap(); 151 | report.flamegraph(f)? 152 | } 153 | OutputType::Folded => { 154 | let path = format!("py-perf_{name_suffix}_folded.txt"); 155 | let f = File::create(&path).unwrap(); 156 | report.folded(f)? 157 | } 158 | }; 159 | info!("done!"); 160 | } 161 | } 162 | 163 | Ok(()) 164 | } 165 | 166 | pub struct SystemInfo { 167 | pub os_release: String, 168 | pub debug_fs: bool, 169 | } 170 | 171 | pub struct Info { 172 | pub system: SystemInfo, 173 | } 174 | 175 | /// Returns information about the host system. 176 | /// This function is used by the `info` subcommand. 177 | /// It is also used by the `record` subcommand to check for root privileges. 178 | /// # Errors 179 | /// 180 | /// Returns an error if the kernel release cannot be determined. 181 | pub fn info() -> Result { 182 | Ok(Info { 183 | system: SystemInfo { 184 | os_release: uname()?.release().to_string_lossy().to_string(), 185 | debug_fs: File::open("/sys/kernel/debug/").is_ok(), 186 | }, 187 | }) 188 | } 189 | -------------------------------------------------------------------------------- /src/perf_event.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2023 The rbperf authors 2 | // 3 | // This source code is licensed under the MIT license found in the 4 | // LICENSE file in the root directory of this source tree. 5 | 6 | use std::os::raw::{c_int, c_ulong}; 7 | 8 | use anyhow::{anyhow, Result}; 9 | use errno::errno; 10 | use libc::{self, pid_t}; 11 | 12 | use perf_event_open_sys as sys; 13 | use perf_event_open_sys::bindings::{perf_event_attr, PERF_FLAG_FD_CLOEXEC}; 14 | 15 | #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] 16 | unsafe fn perf_event_open( 17 | attrs: *mut perf_event_attr, 18 | pid: pid_t, 19 | cpu: c_int, 20 | group_fd: c_int, 21 | flags: c_ulong, 22 | ) -> c_int { 23 | sys::perf_event_open(attrs, pid, cpu, group_fd, flags) as c_int 24 | } 25 | 26 | /// # Safety 27 | pub unsafe fn setup(cpu: i32, frequency: u64, pid: Option) -> Result { 28 | let mut attrs = perf_event_open_sys::bindings::perf_event_attr { 29 | size: u32::try_from(std::mem::size_of::())?, 30 | type_: sys::bindings::PERF_TYPE_SOFTWARE, 31 | config: u64::from(sys::bindings::PERF_COUNT_SW_CPU_CLOCK), 32 | ..Default::default() 33 | }; 34 | let sample_period = u64::pow(10, 9) / frequency; 35 | attrs.__bindgen_anon_1.sample_period = sample_period; 36 | attrs.__bindgen_anon_1.sample_freq = frequency; 37 | attrs.set_disabled(1); 38 | 39 | let pid = pid.unwrap_or(-1); 40 | 41 | let fd = perf_event_open( 42 | &mut attrs, 43 | pid, /* pid */ 44 | cpu, /* cpu */ 45 | -1, /* group_fd */ 46 | u64::from(PERF_FLAG_FD_CLOEXEC), /* flags */ 47 | ); 48 | 49 | if fd < 0 { 50 | return Err(anyhow!("setup_perf_event failed with errno {}", errno())); 51 | } 52 | 53 | Ok(fd) 54 | } 55 | -------------------------------------------------------------------------------- /src/process_info.rs: -------------------------------------------------------------------------------- 1 | use log::info; 2 | use std::fmt; 3 | 4 | use anyhow::{Context, Result}; 5 | 6 | // use py_spy::python_interpreters::{InterpreterState, ThreadState}; 7 | use py_spy::python_process_info::{ 8 | get_interpreter_address, get_python_version, get_threadstate_address, PythonProcessInfo, 9 | }; 10 | use py_spy::version::Version; 11 | use remoteprocess::{Pid, Process}; 12 | 13 | pub struct ProcessInfo { 14 | pub pid: Pid, 15 | pub process: Process, 16 | 17 | pub version: Version, 18 | pub version_string: String, 19 | 20 | pub python_info: PythonProcessInfo, 21 | pub interpreter_address: u64, 22 | pub thread_state_address: u64, 23 | } 24 | 25 | impl fmt::Display for ProcessInfo { 26 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 27 | writeln!(f, "pid: {}", self.pid)?; 28 | writeln!(f, "python version: \n\t{:?}", self.version)?; 29 | writeln!(f, "\tstring: {}", self.version_string)?; 30 | 31 | writeln!(f, "python interpreter info:")?; 32 | writeln!( 33 | f, 34 | "\tfilename: {}", 35 | self.python_info.python_filename.display() 36 | )?; 37 | writeln!(f, "\tdockerized: {}", self.python_info.dockerized)?; 38 | writeln!(f, "\tinterpreter address: 0x{:x}", self.interpreter_address)?; 39 | writeln!( 40 | f, 41 | "\tthreadstate address: 0x{:x}", 42 | self.thread_state_address 43 | )?; 44 | 45 | Ok(()) 46 | } 47 | } 48 | 49 | impl ProcessInfo { 50 | pub fn new(pid: Pid) -> Result { 51 | let process = 52 | Process::new(pid).context("failed to open process: check if it is running.")?; 53 | 54 | let python_info = PythonProcessInfo::new(&process)?; 55 | 56 | let version = get_python_version(&python_info, &process)?; 57 | info!("python version {} detected", version); 58 | 59 | let interpreter_address = get_interpreter_address(&python_info, &process, &version)?; 60 | info!("found interpreter at 0x{:016x}", interpreter_address); 61 | 62 | let thread_state_address = get_threadstate_address(&python_info, &version, false)?; 63 | info!("found thread state at 0x{:016x}", thread_state_address); 64 | 65 | let version_string = format!("python{}.{}", version.major, version.minor); 66 | 67 | Ok(Self { 68 | pid, 69 | process, 70 | version, 71 | version_string, 72 | python_info, 73 | interpreter_address: interpreter_address as u64, 74 | thread_state_address: thread_state_address as u64, 75 | }) 76 | } 77 | 78 | pub fn children(self) -> Result, anyhow::Error> { 79 | let mut children = Vec::new(); 80 | 81 | for (child_pid, _) in self.process.child_processes()? { 82 | children.push(Self::new(child_pid)?); 83 | } 84 | 85 | Ok(children) 86 | } 87 | } 88 | -------------------------------------------------------------------------------- /src/profile.rs: -------------------------------------------------------------------------------- 1 | use std::fmt::Write; 2 | use std::fs; 3 | use std::time::{Duration, SystemTime}; 4 | use std::{collections::HashMap, path::Path}; 5 | 6 | use anyhow::Result; 7 | use pprof::protos; 8 | use pprof::protos::Message; 9 | use pprof::timer::ReportTiming; 10 | use pprof::{Frames, Symbol}; 11 | 12 | pub struct Report { 13 | pub data: HashMap, 14 | pub timing: ReportTiming, 15 | } 16 | 17 | impl Report { 18 | pub fn pprof(&self, mut writer: W) -> Result<()> 19 | where 20 | W: std::io::Write, 21 | { 22 | let report = pprof::Report { 23 | data: self.data.clone(), 24 | timing: self.timing.clone(), 25 | }; 26 | let profile: protos::Profile = report.pprof().unwrap(); 27 | 28 | let mut content = Vec::new(); 29 | profile.write_to_vec(&mut content).unwrap(); 30 | writer.write_all(&content).unwrap(); 31 | println!("report: {:?}", &report); 32 | Ok(()) 33 | } 34 | 35 | pub fn flamegraph(&self, writer: W) -> Result<()> 36 | where 37 | W: std::io::Write, 38 | { 39 | let report = pprof::Report { 40 | data: self.data.clone(), 41 | timing: self.timing.clone(), 42 | }; 43 | report.flamegraph(writer).unwrap(); 44 | println!("report: {:?}", &report); 45 | Ok(()) 46 | } 47 | 48 | pub fn folded(&self, mut writer: W) -> Result<()> 49 | where 50 | W: std::io::Write, 51 | { 52 | let report = pprof::Report { 53 | data: self.data.clone(), 54 | timing: self.timing.clone(), 55 | }; 56 | 57 | let lines: Vec = self 58 | .data 59 | .iter() 60 | .map(|(key, value)| { 61 | let mut line = key.thread_name_or_id(); 62 | line.push(';'); 63 | 64 | for frame in key.frames.iter().rev() { 65 | for symbol in frame.iter().rev() { 66 | write!(&mut line, "{};", symbol).unwrap(); 67 | } 68 | } 69 | 70 | line.pop().unwrap_or_default(); 71 | write!(&mut line, " {}", value).unwrap(); 72 | 73 | line 74 | }) 75 | .collect(); 76 | if !lines.is_empty() { 77 | writer.write_all(lines.join("\n").as_bytes()).unwrap(); 78 | } 79 | println!("report: {:?}", &report); 80 | Ok(()) 81 | } 82 | } 83 | 84 | #[derive(Debug, Default)] 85 | pub struct Profile { 86 | pub start_time: Option, 87 | 88 | duration: Duration, 89 | frequency: u64, 90 | 91 | // frames: Vec, 92 | thread_id_to_frames: HashMap, 93 | // thread_id_to_name: HashMap, 94 | data: HashMap, 95 | // From rbperf: 96 | // #[serde(skip)] 97 | // symbol_id_map: HashMap, 98 | // symbols: Vec, 99 | // samples: Vec, 100 | } 101 | 102 | impl Profile { 103 | pub fn new(duration: Duration, frequency: u64) -> Self { 104 | Self { 105 | start_time: None, 106 | duration, 107 | frequency, 108 | thread_id_to_frames: HashMap::new(), 109 | data: HashMap::new(), 110 | } 111 | } 112 | 113 | pub fn add_sample( 114 | &mut self, 115 | thread_id: u64, 116 | timestamp: SystemTime, 117 | sample: Vec, 118 | weight: isize, 119 | ) { 120 | let frames = self 121 | .thread_id_to_frames 122 | .entry(thread_id) 123 | .or_insert_with(|| Frames { 124 | frames: vec![sample], 125 | thread_name: get_thread_name(thread_id), 126 | thread_id, 127 | sample_timestamp: timestamp, 128 | }); 129 | *self.data.entry(frames.clone()).or_insert(weight) += weight; 130 | } 131 | 132 | pub fn report(&self) -> Result { 133 | Ok(Report { 134 | data: self.data.clone(), 135 | timing: ReportTiming { 136 | frequency: i32::try_from(self.frequency)?, 137 | start_time: self.start_time.unwrap(), 138 | duration: self.duration, 139 | }, 140 | }) 141 | } 142 | } 143 | 144 | fn get_thread_name(tid: u64) -> String { 145 | let path_str = format!("/proc/{tid}/comm"); 146 | let path = Path::new(&path_str); 147 | 148 | match fs::read_to_string(path) { 149 | Ok(name) => name.trim().to_string(), 150 | Err(_) => format!("Thread {tid}"), 151 | } 152 | } 153 | -------------------------------------------------------------------------------- /src/py_perf.rs: -------------------------------------------------------------------------------- 1 | use log::{debug, error, info, trace}; 2 | use pprof::Symbol; 3 | 4 | use std::collections::HashMap; 5 | use std::os::fd::{AsFd, AsRawFd}; 6 | use std::path::PathBuf; 7 | use std::sync::{Arc, RwLock}; 8 | use std::thread::ScopedJoinHandle; 9 | use std::time::{Duration, SystemTime, UNIX_EPOCH}; 10 | use std::{fmt, thread}; 11 | 12 | use libbpf_rs::skel::{OpenSkel, SkelBuilder}; 13 | use libbpf_rs::{MapFlags, PerfBufferBuilder, ProgramType}; 14 | 15 | use anyhow::{bail, Context, Result}; 16 | use crossbeam::channel::{bounded, select, tick, unbounded, Receiver}; 17 | use plain::Plain; 18 | use py_spy::version::Version; 19 | use serde_yaml; 20 | 21 | use crate::bindings; 22 | use crate::bindings::{PythonVersionOffsets, PYPERF_STACK_WALKING_PROGRAM_IDX}; 23 | use crate::bpf::pyperf::{PyperfSkel, PyperfSkelBuilder}; 24 | use crate::perf_event; 25 | use crate::process_info::ProcessInfo; 26 | use crate::profile::Profile; 27 | use crate::python_readers::any_as_u8_slice; 28 | use crate::python_versions::PYTHON_VERSION_CONFIGS_YAML; 29 | 30 | // TODO(kakkoyun): Matches this with error codes in the pyperf.h !! 31 | #[derive(Default, Clone, Debug)] 32 | pub struct Stats { 33 | pub total_events: u32, 34 | // Events discarded due to the kernel buffer being full. 35 | pub lost_event_errors: u32, 36 | // Failed to retrieve sample due to a failed read from a map. 37 | pub map_reading_errors: u32, 38 | // The stack is not complete. 39 | pub truncated_stacks: u32, 40 | // How many times have we bumped into garbled data. 41 | pub garbled_data_errors: u32, 42 | } 43 | 44 | impl Stats { 45 | #[must_use] 46 | pub const fn total_errors(&self) -> u32 { 47 | self.lost_event_errors 48 | + self.map_reading_errors 49 | + self.truncated_stacks 50 | + self.garbled_data_errors 51 | } 52 | 53 | #[must_use] 54 | pub const fn stack_errors(&self) -> u32 { 55 | self.map_reading_errors + self.truncated_stacks + self.garbled_data_errors 56 | } 57 | } 58 | 59 | impl fmt::Display for Stats { 60 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 61 | writeln!(f)?; 62 | writeln!(f, "total events: {}", self.total_events)?; 63 | writeln!(f, "total errors: {}", self.total_errors())?; 64 | writeln!(f, "lost event errors: {}", self.lost_event_errors)?; 65 | writeln!(f, "map reading errors: {}", self.map_reading_errors)?; 66 | writeln!(f, "truncated stacks: {}", self.truncated_stacks)?; 67 | writeln!(f, "garbled data errors: {}", self.garbled_data_errors)?; 68 | 69 | Ok(()) 70 | } 71 | } 72 | 73 | unsafe impl Plain for PythonVersionOffsets {} 74 | 75 | #[derive(Debug, PartialEq, Eq, Hash)] 76 | pub struct SupportedVersion { 77 | idx: u32, 78 | version: Version, 79 | offsets: PythonVersionOffsets, 80 | } 81 | 82 | pub struct SupportedVersions { 83 | versions: HashMap, 84 | } 85 | 86 | impl SupportedVersions { 87 | /// Returns a new instance of `SupportedVersions`. 88 | /// This function will read the supported Python versions from the `PYTHON_VERSION_CONFIGS_YAML` 89 | /// and populate the `versions` map. 90 | /// 91 | /// # Errors 92 | /// This function will return an error if the `PYTHON_VERSION_CONFIGS_YAML` is not valid YAML. 93 | pub fn new() -> Result { 94 | let versions = Self::read_supported_version_offsets()?; 95 | Ok(Self { versions }) 96 | } 97 | 98 | #[must_use] 99 | pub fn get(&self, version: &Version) -> Option<&SupportedVersion> { 100 | let version_string = format!("python{}.{}", version.major, version.minor); 101 | self.versions.get(&version_string) 102 | } 103 | 104 | #[must_use] 105 | pub fn version_string(version: &Version) -> String { 106 | format!("python{}.{}", version.major, version.minor) 107 | } 108 | 109 | fn read_supported_version_offsets() -> Result> { 110 | let mut supported_python_versions: HashMap = HashMap::new(); 111 | for (i, python_version_config_yaml) in PYTHON_VERSION_CONFIGS_YAML.iter().enumerate() { 112 | let python_version_config: PythonVersionOffsets = 113 | serde_yaml::from_str(python_version_config_yaml)?; 114 | let v = Version { 115 | major: u64::from(python_version_config.major_version), 116 | minor: u64::from(python_version_config.minor_version), 117 | patch: u64::from(python_version_config.patch_version), 118 | // TODO(kakkoyun): Add release flags to the config file. 119 | release_flags: String::new(), 120 | }; 121 | let version_string = Self::version_string(&v); 122 | supported_python_versions.insert( 123 | version_string, 124 | SupportedVersion { 125 | idx: u32::try_from(i)?, 126 | version: v.clone(), 127 | offsets: python_version_config, 128 | }, 129 | ); 130 | } 131 | Ok(supported_python_versions) 132 | } 133 | } 134 | 135 | // TODO(kakkoyun): Consider renaming to profiler. 136 | pub struct PyPerf<'a> { 137 | // TODO(kakkoyun): It's better to have a local version of this and return it in start(profile). 138 | pub stats: Arc>, 139 | 140 | frequency: u64, 141 | duration: Duration, 142 | started_at: Option, 143 | 144 | supported_versions: SupportedVersions, 145 | processes: Vec, 146 | 147 | bpf: PyperfSkel<'a>, 148 | } 149 | 150 | impl<'a> PyPerf<'a> { 151 | /// Returns a new instance of `PyPerf`. 152 | /// This function will open and load the BPF module. 153 | /// It will also populate the `process_info_map` with the given `pids`. 154 | /// 155 | /// # Errors 156 | /// This function will return an error if the BPF module fails to load. 157 | /// It will also return an error if the `process_info_map` fails to update. 158 | pub fn new(duration: Duration, frequency: u64) -> Result> { 159 | // Open and load the BPF module. 160 | let mut skel_builder = PyperfSkelBuilder::default(); 161 | skel_builder.obj_builder.debug(true); 162 | 163 | let mut open_skel = skel_builder.open()?; 164 | 165 | debug!("verbose_bpf_logging set to {}", true); 166 | open_skel.rodata().verbose = true; 167 | 168 | for prog in open_skel.obj.progs_iter_mut() { 169 | prog.set_prog_type(ProgramType::PerfEvent); 170 | } 171 | 172 | let bpf = open_skel.load()?; 173 | for prog in bpf.obj.progs_iter() { 174 | debug!( 175 | "open prog: {} has {} instructions", 176 | prog.name(), 177 | prog.insn_cnt() 178 | ); 179 | } 180 | 181 | let supported_versions = SupportedVersions::new()?; 182 | Ok(PyPerf { 183 | frequency, 184 | duration, 185 | 186 | started_at: None, 187 | supported_versions, 188 | 189 | bpf, 190 | processes: Vec::new(), 191 | stats: Arc::new(RwLock::new(Stats::default())), 192 | }) 193 | } 194 | 195 | // TODO(kakkoyun): Rename to register? 196 | /// Start recording the samples for the given `pids`. 197 | /// 198 | /// # Errors 199 | /// This function will return an error if it fails to send the `pids` to the BPF space. 200 | pub fn record(&mut self, pid: i32) -> Result<()> { 201 | let process_info = 202 | ProcessInfo::new(pid).context(format!("failed to fetch process info: {pid}"))?; 203 | 204 | debug!("python process: \n{}", process_info); 205 | self.processes.push(process_info); 206 | 207 | // let children = process_info.children()?; 208 | // for child in children { 209 | // debug!("python process: \n{}", child); 210 | // self.processes.push(child); 211 | // } 212 | 213 | if self.processes.is_empty() { 214 | bail!("No Python processes found to profile!"); 215 | } 216 | info!("found python processes: {}", self.processes.len()); 217 | 218 | // let bpf = self.bpf.clone(); 219 | // let mut bpf = bpf.write().unwrap(); 220 | let mut maps = self.bpf.maps_mut(); 221 | for proc in &self.processes { 222 | let offsets = match self.supported_versions.get(&proc.version) { 223 | Some(supported_version) => supported_version.offsets, 224 | None => bail!(format!("unsupported Python version: {}", proc.version)), 225 | }; 226 | 227 | let py_version = u32::try_from(proc.version.major * 100 + proc.version.minor)?; 228 | let key = py_version.to_le_bytes(); 229 | // let value = unsafe { any_as_u8_slice(&offsets) }; 230 | let value = unsafe { plain::as_bytes(&offsets) }; 231 | maps.version_specific_offsets() 232 | .update(&key, value, MapFlags::ANY) 233 | .context("failed to update version specific offsets map")?; 234 | 235 | let key = proc.pid.to_le_bytes(); 236 | let bpf_proc_info = crate::bindings::ProcessInfo { 237 | thread_state_addr: proc.thread_state_address, 238 | interpreter_addr: proc.interpreter_address, 239 | py_version, 240 | }; 241 | let value = unsafe { any_as_u8_slice(&bpf_proc_info) }; 242 | maps.pid_to_process_info() 243 | .update(&key, value, MapFlags::ANY) 244 | .context("failed to update process info map")?; 245 | } 246 | Ok(()) 247 | } 248 | 249 | // TODO(kakkoyun): Rename to profile? 250 | /// Start the profiler. 251 | /// This function will block until the profiler is stopped. 252 | /// The profiler can be stopped by sending a message to the `stop_channel_rx` channel. 253 | /// 254 | /// # Errors 255 | /// This function will return an error if the profiler fails to start. 256 | /// 257 | /// # Panics 258 | /// This function will panic if the profiler fails to attach the perf event. 259 | pub fn start(&mut self, stop_channel_rx: &Receiver<()>) -> Result { 260 | if self.processes.is_empty() { 261 | bail!("No Python processes found to profile!"); 262 | } 263 | info!("starting profiler"); 264 | 265 | let pid = self.processes[0].pid; 266 | let mut fds = Vec::new(); 267 | for i in 0..num_cpus::get() { 268 | // TODO(kakkoyun): Support multiple processes if there exists. 269 | let perf_fd = unsafe { perf_event::setup(i.try_into()?, self.frequency, Some(pid)) }?; 270 | fds.push(perf_fd); 271 | } 272 | 273 | // let bpf = self.bpf.clone(); 274 | // let mut bpf = bpf.write().unwrap(); 275 | let mut links = Vec::new(); 276 | for fd in fds { 277 | let prog = self.bpf.obj.prog_mut("on_event").unwrap(); 278 | let link = prog.attach_perf_event(fd)?; 279 | links.push(link); 280 | } 281 | 282 | for prog in self.bpf.obj.progs_iter_mut() { 283 | debug!( 284 | "program type: {}, name: {}, flags: {}, section: {}", 285 | prog.prog_type(), 286 | prog.name(), 287 | prog.flags(), 288 | prog.section() 289 | ); 290 | } 291 | 292 | // Insert stack walking program. 293 | let idx: i32 = PYPERF_STACK_WALKING_PROGRAM_IDX.try_into().unwrap(); 294 | let val = self 295 | .bpf 296 | .obj 297 | .prog("walk_python_stack") 298 | .unwrap() 299 | .as_fd() 300 | .as_raw_fd(); 301 | 302 | // let bpf = self.bpf.clone(); 303 | // let mut bpf = bpf.write().unwrap(); 304 | let mut maps = self.bpf.maps_mut(); 305 | let programs = maps.programs(); 306 | programs 307 | .update(&idx.to_le_bytes(), &val.to_le_bytes(), MapFlags::ANY) 308 | .unwrap(); 309 | 310 | debug!( 311 | "profiling duration: {}, frequency: {}", 312 | humantime::format_duration(self.duration), 313 | self.frequency 314 | ); 315 | 316 | let (sender, receiver) = unbounded(); 317 | 318 | let maps = self.bpf.maps(); 319 | let events = maps.events(); 320 | let stats = self.stats.clone(); 321 | // let cb_sender = sender.clone(); 322 | let perf_buffer = PerfBufferBuilder::new(events) 323 | .sample_cb(|cpu: i32, data: &[u8]| { 324 | trace!("received sample from cpu: {}", cpu); 325 | sender 326 | .send((cpu, data.to_vec())) 327 | .expect("could not send signal on channel."); 328 | }) 329 | .lost_cb(|cpu: i32, count: u64| { 330 | trace!("lost {} events on CPU {}", count, cpu); 331 | handle_lost_events(stats.clone(), cpu, count); 332 | }) 333 | .build()?; 334 | 335 | // TODO(kakkoyun): Enable ringbuffer 336 | // let ring_buffer = libbpf_rs::RingBufferBuilder::new() 337 | // .add(events, |data: &[u8]| -> i32 { 338 | // handle_event(0, data, &self.stats.clone()); 339 | // 0 340 | // }) 341 | // .unwrap() 342 | // .build(); 343 | 344 | self.started_at = Some(SystemTime::now()); 345 | info!("profiler started recording..."); 346 | 347 | let (done, stop) = bounded::<()>(1); 348 | let profile = thread::scope(|s| { 349 | let duration = self.duration; 350 | 351 | let processor: ScopedJoinHandle = s.spawn({ 352 | let duration = self.duration; 353 | let frequency = self.frequency; 354 | let started_at = self.started_at; 355 | let receiver: Receiver<(i32, Vec)> = receiver.clone(); 356 | 357 | move || { 358 | let mut profile = Profile::new(duration, frequency); 359 | profile.start_time = started_at; 360 | 361 | loop { 362 | select! { 363 | recv(receiver) -> sample => match sample { 364 | Ok((cpu, data)) => { 365 | trace!("received sample from cpu: {}", cpu); 366 | let mut sample = bindings::Sample::default(); 367 | plain::copy_from_bytes(&mut sample, &data[..]) 368 | .expect("data buffer was too short"); 369 | self.handle_sample(self.stats.clone(), &mut profile, cpu, sample); 370 | trace!("sample handled! Waiting for the next one..."); 371 | } 372 | Err(_) => continue, 373 | }, 374 | recv(stop) -> _ => { 375 | debug!("stopping profiling..."); 376 | break; 377 | } 378 | } 379 | } 380 | debug!("sample processor is done!"); 381 | profile 382 | } 383 | }); 384 | 385 | // let sender = sender.clone(); 386 | let ticks = tick(duration); 387 | loop { 388 | select! { 389 | recv(ticks) -> _ => { 390 | debug!("TICK!"); 391 | if let Err(err) = perf_buffer.poll(Duration::from_millis(100)) { 392 | debug!("polling perf buffer failed with {:?}", err); 393 | } 394 | } 395 | recv(stop_channel_rx) -> _ => { 396 | debug!("stopping profiling..."); 397 | drop(done); 398 | break; 399 | } 400 | } 401 | } 402 | debug!("profiling is stopped"); 403 | processor.join().unwrap() 404 | }); 405 | debug!("profiler is done!"); 406 | 407 | let stats = stats.read().unwrap(); 408 | info!("stats: {}", stats); 409 | 410 | Ok(profile) 411 | } 412 | 413 | // TODO(kakkoyun): Probably better than redundant data that we have. 414 | // fn populate_python_version_map( 415 | // supported_versions: &HashMap, 416 | // versions: &mut libbpf_rs::Map, 417 | // ) -> Result<()> { 418 | // use crate::python_readers::any_as_u8_slice; 419 | // for (version, i) in supported_versions.iter() { 420 | // let key: u32 = i.clone(); 421 | // let value = unsafe { any_as_u8_slice(&version.offsets) }; 422 | // versions.update(&key.to_le_bytes(), value, MapFlags::ANY)?; 423 | // } 424 | // Ok(()) 425 | // } 426 | 427 | fn handle_sample( 428 | &self, 429 | stats: Arc>, 430 | profile: &mut Profile, 431 | cpu: i32, 432 | raw_sample: bindings::Sample, 433 | ) { 434 | let stats = stats.clone(); 435 | 436 | let maps = self.bpf.maps(); 437 | let symbols = maps.symbols(); 438 | let mut id_to_symbol = HashMap::new(); 439 | for stack_bytes in symbols.keys() { 440 | match symbols.lookup(&stack_bytes, MapFlags::ANY) { 441 | Ok(Some(id_bytes)) => { 442 | let mut symbol = bindings::Symbol::default(); 443 | plain::copy_from_bytes(&mut symbol, &stack_bytes) 444 | .expect("data buffer was too short"); 445 | let id = u32::from_le_bytes(id_bytes.try_into().expect("parse frame id bytes")); 446 | id_to_symbol.insert(id, symbol); 447 | } 448 | _ => continue, 449 | } 450 | } 451 | stats.write().unwrap().total_events += 1; 452 | 453 | let now = now_formatted(); 454 | 455 | // TODO(kakkoyun): Check this could be used as thread_name! 456 | let comm_str = std::str::from_utf8(&raw_sample.comm) 457 | .unwrap() 458 | .trim_end_matches(char::from(0)); 459 | // NOTICE: It's similar to str_from_u8_nul 460 | 461 | // if recv_stack.stack_status == ruby_stack_status_STACK_INCOMPLETE { 462 | // error!("truncated stack"); 463 | // self.stats.truncated_stacks += 1; 464 | // continue; 465 | // } 466 | 467 | // TODO(kakkoyun): Record as metric. 468 | assert!(raw_sample.pid != 0, "pid is zero, this should never happen"); 469 | 470 | debug!( 471 | "cpu: {} received: {:9} pid: {:6} tid: {:<6} comm: {:<16} kernel: {} user: {}", 472 | cpu, 473 | now, 474 | raw_sample.pid, 475 | raw_sample.tid, 476 | comm_str, 477 | raw_sample.native_stack_count_key.kernel_stack_id, 478 | raw_sample.native_stack_count_key.user_stack_id 479 | ); 480 | 481 | // let timestamp = UNIX_EPOCH + Duration::from_nanos(sample.timestamp); 482 | let timestamp = UNIX_EPOCH + Duration::from_secs(raw_sample.timestamp); 483 | 484 | // TODO(kakkoyun): Handle native stack! 485 | 486 | let stack = raw_sample.stack; 487 | let mut read_frame_count = 0; 488 | let mut frames: Vec<(String, String, String, u32)> = Vec::new(); 489 | for symbol_id in &stack.frames { 490 | // Don't read past the last frame. 491 | if read_frame_count >= stack.len { 492 | break; 493 | } 494 | 495 | match id_to_symbol.get(symbol_id) { 496 | Some(symbol) => { 497 | let file_bytes: Vec = symbol.file.iter().map(|&c| c as u8).collect(); 498 | let file_name = unsafe { str_from_u8_nul(&file_bytes) }; 499 | if file_name.is_err() { 500 | stats.write().unwrap().garbled_data_errors += 1; 501 | continue; 502 | } 503 | let file_name = file_name 504 | .expect("file name should be valid unicode") 505 | .to_string(); 506 | 507 | let class_bytes: Vec = symbol.class.iter().map(|&c| c as u8).collect(); 508 | let class_name = unsafe { str_from_u8_nul(&class_bytes) }; 509 | if class_name.is_err() { 510 | stats.write().unwrap().garbled_data_errors += 1; 511 | continue; 512 | } 513 | let class_name = class_name 514 | .expect("class name should be valid unicode") 515 | .to_string(); 516 | 517 | let func_bytes: Vec = symbol.func.iter().map(|&c| c as u8).collect(); 518 | let func_name = unsafe { str_from_u8_nul(&func_bytes) }; 519 | if func_name.is_err() { 520 | stats.write().unwrap().garbled_data_errors += 1; 521 | continue; 522 | } 523 | let func_name = func_name 524 | .expect("function name should be valid unicode") 525 | .to_string(); 526 | 527 | let line = symbol.line; 528 | 529 | frames.push((file_name, class_name, func_name, line)); 530 | read_frame_count += 1; 531 | } 532 | None => { 533 | stats.write().unwrap().map_reading_errors += 1; 534 | } 535 | } 536 | } 537 | 538 | // TODO(kakkoyun): Utilize weight. Aggregate in BPF and send.} 539 | let mut sample = Vec::new(); 540 | for (file_name, class_name, func_name, line) in frames { 541 | // trace!( 542 | // "file: {:<32} class: {:<32} func: {:<32} line: {:<4}", 543 | // file_name, 544 | // class_name, 545 | // func_name, 546 | // line 547 | // ); 548 | sample.push(Symbol { 549 | name: Some(format!("{}::{}", class_name, func_name).into_bytes()), 550 | addr: None, 551 | lineno: Some(line), 552 | filename: Some(PathBuf::from(file_name)), 553 | }); 554 | } 555 | profile.add_sample(raw_sample.tid as u64, timestamp, sample, 1) 556 | } 557 | } 558 | 559 | unsafe impl Plain for bindings::Sample {} 560 | unsafe impl Plain for bindings::Symbol {} 561 | 562 | fn handle_lost_events(stats: Arc>, cpu: i32, count: u64) { 563 | stats.write().unwrap().lost_event_errors += u32::try_from(count).unwrap(); 564 | error!("lost {} events on CPU {}", count, cpu); 565 | } 566 | 567 | // unsafe impl Plain for pyperf_bss_types::event {} 568 | 569 | // TODO(kakkoyun): Clean this up. 570 | // fn handle_event(cpu: i32, data: &[u8], stats: &Arc>) { 571 | // stats.write().unwrap().total_events += 1; 572 | 573 | // let mut event = pyperf_bss_types::event::default(); 574 | // plain::copy_from_bytes(&mut event, data).expect("data buffer was too short"); 575 | 576 | // let now = now_formatted(); 577 | // let comm_str = std::str::from_utf8(&event.comm) 578 | // .unwrap() 579 | // .trim_end_matches(char::from(0)); 580 | // let msg_str = std::str::from_utf8(&event.msg) 581 | // .unwrap() 582 | // .trim_end_matches(char::from(0)); 583 | 584 | // info!( 585 | // "cpu: {} received: {:9} pid: {:6} tid: {:<6} uid: {:<6} comm: {:<16} msg: {:<256} kernel: {} user: {}", 586 | // cpu, now, event.pid, event.tid, event.uid, comm_str, msg_str, event.kernel_stack_id, event.user_stack_id 587 | // ); 588 | // } 589 | 590 | fn now_formatted() -> String { 591 | use time::macros::format_description; 592 | use time::OffsetDateTime; 593 | 594 | OffsetDateTime::now_local().map_or_else( 595 | |_| "00:00:00".to_string(), 596 | |now| { 597 | let format = format_description!("[hour]:[minute]:[second]"); 598 | now.format(&format) 599 | .unwrap_or_else(|_| "00:00:00".to_string()) 600 | }, 601 | ) 602 | } 603 | 604 | // TODO(kakkoyun): Can we find an alternative? 605 | use std::str::Utf8Error; 606 | 607 | pub unsafe fn str_from_u8_nul(utf8_src: &[u8]) -> Result<&str, Utf8Error> { 608 | let nul_range_end = utf8_src 609 | .iter() 610 | .position(|&c| c == b'\0') 611 | .unwrap_or(utf8_src.len()); // default to length if no `\0` present 612 | ::std::str::from_utf8(&utf8_src[0..nul_range_end]) 613 | } 614 | -------------------------------------------------------------------------------- /src/python_readers.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2023 The rbperf authors 2 | // 3 | // This source code is licensed under the MIT license found in the 4 | // LICENSE file in the root directory of this source tree. 5 | 6 | // TODO(kakkoyun): Check py-spy! 7 | // TODO(kakkoyun): Do we really need this? 8 | // TODO(kakkoyun): Can we use plain instead? 9 | 10 | pub const unsafe fn any_as_u8_slice(p: &T) -> &[u8] { 11 | ::std::slice::from_raw_parts((p as *const T).cast::(), ::std::mem::size_of::()) 12 | } 13 | -------------------------------------------------------------------------------- /src/python_versions/mod.rs: -------------------------------------------------------------------------------- 1 | pub const PYTHON_VERSION_CONFIGS_YAML: &[&str] = &[ 2 | include_str!("python_2_7_15.yaml"), 3 | include_str!("python_3_3_7.yaml"), 4 | include_str!("python_3_5_5.yaml"), 5 | include_str!("python_3_6_6.yaml"), 6 | include_str!("python_3_7_0.yaml"), 7 | include_str!("python_3_8_0.yaml"), 8 | include_str!("python_3_9_5.yaml"), 9 | include_str!("python_3_10_0.yaml"), 10 | include_str!("python_3_11_0.yaml"), 11 | ]; 12 | -------------------------------------------------------------------------------- /src/python_versions/python_2_7_15.yaml: -------------------------------------------------------------------------------- 1 | major_version: 2 2 | minor_version: 7 3 | patch_version: 15 4 | py_object: 5 | ob_type: 8 6 | py_string: 7 | data: 36 8 | size: 16 9 | py_type_object: 10 | tp_name: 24 11 | py_thread_state: 12 | next: 0 13 | interp: 8 14 | frame: 16 15 | thread_id: 144 16 | native_thread_id: -1 17 | cframe: -1 18 | py_cframe: 19 | current_frame: 0 20 | py_interpreter_state: 21 | tstate_head: 8 22 | py_runtime_state: 23 | interp_main: -1 24 | py_frame_object: 25 | f_back: 24 26 | f_code: 32 27 | f_lineno: 124 28 | f_localsplus: 376 29 | py_code_object: 30 | co_filename: 80 31 | co_name: 88 32 | co_varnames: 56 33 | co_firstlineno: 96 34 | py_tuple_object: 35 | ob_item: 24 36 | -------------------------------------------------------------------------------- /src/python_versions/python_3_10_0.yaml: -------------------------------------------------------------------------------- 1 | major_version: 3 2 | minor_version: 10 3 | patch_version: 0 4 | py_object: 5 | ob_type: 8 6 | py_string: 7 | data: 48 8 | size: -1 9 | py_type_object: 10 | tp_name: 24 11 | py_thread_state: 12 | next: 8 13 | interp: 16 14 | frame: 24 15 | thread_id: 176 16 | native_thread_id: -1 17 | cframe: -1 18 | py_cframe: 19 | current_frame: 0 20 | py_interpreter_state: 21 | tstate_head: 8 22 | py_runtime_state: 23 | interp_main: -1 24 | py_frame_object: 25 | f_back: 24 26 | f_code: 32 27 | f_lineno: 100 28 | f_localsplus: 352 29 | py_code_object: 30 | co_filename: 104 31 | co_name: 112 32 | co_varnames: 72 33 | co_firstlineno: 40 34 | py_tuple_object: 35 | ob_item: 24 36 | -------------------------------------------------------------------------------- /src/python_versions/python_3_11_0.yaml: -------------------------------------------------------------------------------- 1 | major_version: 3 2 | minor_version: 11 3 | patch_version: 0 4 | py_object: 5 | ob_type: 8 6 | py_string: 7 | data: 48 8 | size: -1 9 | py_type_object: 10 | tp_name: 24 11 | py_thread_state: 12 | next: 8 13 | interp: 16 14 | frame: -1 15 | thread_id: 152 16 | native_thread_id: 160 17 | cframe: 56 18 | py_cframe: 19 | current_frame: 8 20 | py_interpreter_state: 21 | tstate_head: 16 22 | py_runtime_state: 23 | interp_main: 48 24 | py_frame_object: 25 | f_back: 48 26 | f_code: 32 27 | f_lineno: -1 28 | f_localsplus: 72 29 | py_code_object: 30 | co_filename: 112 31 | co_name: 120 32 | co_varnames: 96 33 | co_firstlineno: 72 34 | py_tuple_object: 35 | ob_item: 24 36 | -------------------------------------------------------------------------------- /src/python_versions/python_3_3_7.yaml: -------------------------------------------------------------------------------- 1 | major_version: 3 2 | minor_version: 3 3 | patch_version: 7 4 | py_object: 5 | ob_type: 8 6 | py_string: 7 | data: 48 8 | size: 16 9 | py_type_object: 10 | tp_name: 24 11 | py_thread_state: 12 | next: 0 13 | interp: 8 14 | frame: 16 15 | thread_id: 144 16 | native_thread_id: -1 17 | cframe: -1 18 | py_cframe: 19 | current_frame: 0 20 | py_interpreter_state: 21 | tstate_head: 8 22 | py_runtime_state: 23 | interp_main: -1 24 | py_frame_object: 25 | f_back: 24 26 | f_code: 32 27 | f_lineno: 124 28 | f_localsplus: 376 29 | py_code_object: 30 | co_filename: 96 31 | co_name: 104 32 | co_varnames: 64 33 | co_firstlineno: 112 34 | py_tuple_object: 35 | ob_item: 24 36 | -------------------------------------------------------------------------------- /src/python_versions/python_3_5_5.yaml: -------------------------------------------------------------------------------- 1 | major_version: 3 2 | minor_version: 5 3 | patch_version: 5 4 | py_object: 5 | ob_type: 8 6 | py_string: 7 | data: 48 8 | size: 16 9 | py_type_object: 10 | tp_name: 24 11 | py_thread_state: 12 | next: 8 13 | interp: 16 14 | frame: 24 15 | thread_id: 152 16 | native_thread_id: -1 17 | cframe: -1 18 | py_cframe: 19 | current_frame: 0 20 | py_interpreter_state: 21 | tstate_head: 8 22 | py_runtime_state: 23 | interp_main: -1 24 | py_frame_object: 25 | f_back: 24 26 | f_code: 32 27 | f_lineno: 124 28 | f_localsplus: 376 29 | py_code_object: 30 | co_filename: 96 31 | co_name: 104 32 | co_varnames: 64 33 | co_firstlineno: 112 34 | py_tuple_object: 35 | ob_item: 24 36 | -------------------------------------------------------------------------------- /src/python_versions/python_3_6_6.yaml: -------------------------------------------------------------------------------- 1 | major_version: 3 2 | minor_version: 6 3 | patch_version: 6 4 | py_object: 5 | ob_type: 8 6 | py_string: 7 | data: 48 8 | size: 16 9 | py_type_object: 10 | tp_name: 24 11 | py_thread_state: 12 | next: 8 13 | interp: 16 14 | frame: 24 15 | thread_id: 152 16 | native_thread_id: -1 17 | cframe: -1 18 | py_cframe: 19 | current_frame: 0 20 | py_interpreter_state: 21 | tstate_head: 8 22 | py_runtime_state: 23 | interp_main: -1 24 | py_frame_object: 25 | f_back: 24 26 | f_code: 32 27 | f_lineno: 124 28 | f_localsplus: 376 29 | py_code_object: 30 | co_filename: 96 31 | co_name: 104 32 | co_varnames: 64 33 | co_firstlineno: 36 34 | py_tuple_object: 35 | ob_item: 24 36 | -------------------------------------------------------------------------------- /src/python_versions/python_3_7_0.yaml: -------------------------------------------------------------------------------- 1 | major_version: 3 2 | minor_version: 7 3 | patch_version: 0 4 | py_object: 5 | ob_type: 8 6 | py_string: 7 | data: 48 8 | size: 16 9 | py_type_object: 10 | tp_name: 24 11 | py_thread_state: 12 | next: 8 13 | interp: 16 14 | frame: 24 15 | thread_id: 176 16 | native_thread_id: -1 17 | cframe: -1 18 | py_cframe: 19 | current_frame: 0 20 | py_interpreter_state: 21 | tstate_head: 8 22 | py_runtime_state: 23 | interp_main: -1 24 | py_frame_object: 25 | f_back: 24 26 | f_code: 32 27 | f_lineno: 108 28 | f_localsplus: 360 29 | py_code_object: 30 | co_filename: 96 31 | co_name: 104 32 | co_varnames: 64 33 | co_firstlineno: 36 34 | py_tuple_object: 35 | ob_item: 24 36 | -------------------------------------------------------------------------------- /src/python_versions/python_3_8_0.yaml: -------------------------------------------------------------------------------- 1 | major_version: 3 2 | minor_version: 8 3 | patch_version: 0 4 | py_object: 5 | ob_type: 8 6 | py_string: 7 | data: 48 8 | size: 16 9 | py_type_object: 10 | tp_name: 24 11 | py_thread_state: 12 | next: 8 13 | interp: 16 14 | frame: 24 15 | thread_id: 176 16 | native_thread_id: -1 17 | cframe: -1 18 | py_cframe: 19 | current_frame: 0 20 | py_interpreter_state: 21 | tstate_head: 8 22 | py_runtime_state: 23 | interp_main: -1 24 | py_frame_object: 25 | f_back: 24 26 | f_code: 32 27 | f_lineno: 108 28 | f_localsplus: 360 29 | py_code_object: 30 | co_filename: 104 31 | co_name: 112 32 | co_varnames: 72 33 | co_firstlineno: 40 34 | py_tuple_object: 35 | ob_item: 24 36 | -------------------------------------------------------------------------------- /src/python_versions/python_3_9_5.yaml: -------------------------------------------------------------------------------- 1 | major_version: 3 2 | minor_version: 9 3 | patch_version: 5 4 | py_object: 5 | ob_type: 8 6 | py_string: 7 | data: 48 8 | size: 16 9 | py_type_object: 10 | tp_name: 24 11 | py_thread_state: 12 | next: 8 13 | interp: 16 14 | frame: 24 15 | thread_id: 176 16 | native_thread_id: -1 17 | cframe: -1 18 | py_cframe: 19 | current_frame: 0 20 | py_interpreter_state: 21 | tstate_head: 8 22 | py_runtime_state: 23 | interp_main: -1 24 | py_frame_object: 25 | f_back: 24 26 | f_code: 32 27 | f_lineno: 108 28 | f_localsplus: 360 29 | py_code_object: 30 | co_filename: 104 31 | co_name: 112 32 | co_varnames: 72 33 | co_firstlineno: 40 34 | py_tuple_object: 35 | ob_item: 24 36 | -------------------------------------------------------------------------------- /xtask/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "xtask" 3 | version = "0.1.0" 4 | edition = "2021" 5 | description = "An helper task to generate memory offsets for py-perf to use against CPython." 6 | repository = "https://github.com/kakkoyu/py-perf" 7 | authors = ["Kemal Akkoyun "] 8 | keywords = ["bpf", "ebpf", "python", "CPython", "profiler"] 9 | license = "Apache-2.0" 10 | readme = "README.md" 11 | categories = ["development-tools", "profiling"] 12 | 13 | [dependencies] 14 | py-perf = { path = ".." } 15 | # TODO(kakkoyun): Send a patch to upstream. 16 | py-spy = { path = "../../../Sandbox/Profilers/py-spy" } 17 | # py-spy = { git = "ssh://git@github.com/kakkoyun/py-spy.git" } 18 | memoffset = "0.9" 19 | serde_yaml = "0.9" 20 | -------------------------------------------------------------------------------- /xtask/README.md: -------------------------------------------------------------------------------- 1 | # xtasks 2 | -------------------------------------------------------------------------------- /xtask/src/main.rs: -------------------------------------------------------------------------------- 1 | use memoffset::offset_of; 2 | use std::fs::File; 3 | use std::io::Write; 4 | use std::mem::size_of; 5 | use std::path::Path; 6 | 7 | use py_perf::bindings::PythonVersionOffsets; 8 | 9 | static OUT_DIR: &str = "src/python_versions"; 10 | 11 | fn write_to_file(filename: &str, contents: PythonVersionOffsets) { 12 | let yaml = serde_yaml::to_string(&contents).unwrap(); 13 | 14 | File::create(Path::new(OUT_DIR).join(filename)) 15 | .unwrap() 16 | .write_all(yaml.as_bytes()) 17 | .unwrap(); 18 | } 19 | 20 | fn dump_python_structs_2_7_15() { 21 | let python_2_7_15_offsets = PythonVersionOffsets { 22 | major_version: 2, 23 | minor_version: 7, 24 | patch_version: 15, 25 | py_object: py_perf::bindings::PyObject { 26 | ob_type: offset_of!(py_spy::python_bindings::v2_7_15::PyObject, ob_type) as i64, 27 | }, 28 | py_string: py_perf::bindings::PyString { 29 | data: offset_of!(py_spy::python_bindings::v2_7_15::PyStringObject, ob_sval) as i64, 30 | size: offset_of!(py_spy::python_bindings::v2_7_15::PyVarObject, ob_size) as i64, 31 | }, 32 | py_type_object: py_perf::bindings::PyTypeObject { 33 | tp_name: offset_of!(py_spy::python_bindings::v2_7_15::PyTypeObject, tp_name) as i64, 34 | }, 35 | py_thread_state: py_perf::bindings::PyThreadState { 36 | interp: offset_of!(py_spy::python_bindings::v2_7_15::PyThreadState, interp) as i64, 37 | next: offset_of!(py_spy::python_bindings::v2_7_15::PyThreadState, next) as i64, 38 | frame: offset_of!(py_spy::python_bindings::v2_7_15::PyThreadState, frame) as i64, 39 | thread_id: offset_of!(py_spy::python_bindings::v2_7_15::PyThreadState, thread_id) 40 | as i64, 41 | native_thread_id: -1, 42 | cframe: -1, 43 | }, 44 | py_cframe: py_perf::bindings::PyCFrame::default(), 45 | py_interpreter_state: py_perf::bindings::PyInterpreterState { 46 | tstate_head: offset_of!( 47 | py_spy::python_bindings::v2_7_15::PyInterpreterState, 48 | tstate_head 49 | ) as i64, 50 | }, 51 | py_runtime_state: py_perf::bindings::PyRuntimeState { interp_main: -1 }, 52 | py_frame_object: py_perf::bindings::PyFrameObject { 53 | f_back: offset_of!(py_spy::python_bindings::v2_7_15::PyFrameObject, f_back) as i64, 54 | f_code: offset_of!(py_spy::python_bindings::v2_7_15::PyFrameObject, f_code) as i64, 55 | f_lineno: offset_of!(py_spy::python_bindings::v2_7_15::PyFrameObject, f_lineno) as i64, 56 | f_localsplus: offset_of!( 57 | py_spy::python_bindings::v2_7_15::PyFrameObject, 58 | f_localsplus 59 | ) as i64, 60 | }, 61 | py_code_object: py_perf::bindings::PyCodeObject { 62 | co_filename: offset_of!(py_spy::python_bindings::v2_7_15::PyCodeObject, co_filename) 63 | as i64, 64 | co_name: offset_of!(py_spy::python_bindings::v2_7_15::PyCodeObject, co_name) as i64, 65 | co_varnames: offset_of!(py_spy::python_bindings::v2_7_15::PyCodeObject, co_varnames) 66 | as i64, 67 | co_firstlineno: offset_of!( 68 | py_spy::python_bindings::v2_7_15::PyCodeObject, 69 | co_firstlineno 70 | ) as i64, 71 | }, 72 | py_tuple_object: py_perf::bindings::PyTupleObject { 73 | ob_item: offset_of!(py_spy::python_bindings::v2_7_15::PyTupleObject, ob_item) as i64, 74 | }, 75 | }; 76 | 77 | write_to_file("python_2_7_15.yaml", python_2_7_15_offsets) 78 | } 79 | 80 | fn dump_python_structs_3_3_7() { 81 | let python_3_3_7_offsets = PythonVersionOffsets { 82 | major_version: 3, 83 | minor_version: 3, 84 | patch_version: 7, 85 | py_object: py_perf::bindings::PyObject { 86 | ob_type: offset_of!(py_spy::python_bindings::v3_3_7::PyObject, ob_type) as i64, 87 | }, 88 | py_string: py_perf::bindings::PyString { 89 | data: size_of::() as i64, 90 | size: offset_of!(py_spy::python_bindings::v3_3_7::PyVarObject, ob_size) as i64, 91 | }, 92 | py_type_object: py_perf::bindings::PyTypeObject { 93 | tp_name: offset_of!(py_spy::python_bindings::v3_3_7::PyTypeObject, tp_name) as i64, 94 | }, 95 | py_thread_state: py_perf::bindings::PyThreadState { 96 | interp: offset_of!(py_spy::python_bindings::v3_3_7::PyThreadState, interp) as i64, 97 | next: offset_of!(py_spy::python_bindings::v3_3_7::PyThreadState, next) as i64, 98 | frame: offset_of!(py_spy::python_bindings::v3_3_7::PyThreadState, frame) as i64, 99 | thread_id: offset_of!(py_spy::python_bindings::v3_3_7::PyThreadState, thread_id) as i64, 100 | native_thread_id: -1, 101 | cframe: -1, 102 | }, 103 | py_cframe: py_perf::bindings::PyCFrame::default(), 104 | py_interpreter_state: py_perf::bindings::PyInterpreterState { 105 | tstate_head: offset_of!( 106 | py_spy::python_bindings::v3_3_7::PyInterpreterState, 107 | tstate_head 108 | ) as i64, 109 | }, 110 | py_runtime_state: py_perf::bindings::PyRuntimeState { interp_main: -1 }, 111 | py_frame_object: py_perf::bindings::PyFrameObject { 112 | f_back: offset_of!(py_spy::python_bindings::v3_3_7::PyFrameObject, f_back) as i64, 113 | f_code: offset_of!(py_spy::python_bindings::v3_3_7::PyFrameObject, f_code) as i64, 114 | f_lineno: offset_of!(py_spy::python_bindings::v3_3_7::PyFrameObject, f_lineno) as i64, 115 | f_localsplus: offset_of!(py_spy::python_bindings::v3_3_7::PyFrameObject, f_localsplus) 116 | as i64, 117 | }, 118 | py_code_object: py_perf::bindings::PyCodeObject { 119 | co_filename: offset_of!(py_spy::python_bindings::v3_3_7::PyCodeObject, co_filename) 120 | as i64, 121 | co_name: offset_of!(py_spy::python_bindings::v3_3_7::PyCodeObject, co_name) as i64, 122 | co_varnames: offset_of!(py_spy::python_bindings::v3_3_7::PyCodeObject, co_varnames) 123 | as i64, 124 | co_firstlineno: offset_of!( 125 | py_spy::python_bindings::v3_3_7::PyCodeObject, 126 | co_firstlineno 127 | ) as i64, 128 | }, 129 | py_tuple_object: py_perf::bindings::PyTupleObject { 130 | ob_item: offset_of!(py_spy::python_bindings::v3_3_7::PyTupleObject, ob_item) as i64, 131 | }, 132 | }; 133 | 134 | write_to_file("python_3_3_7.yaml", python_3_3_7_offsets) 135 | } 136 | 137 | fn dump_python_structs_3_5_5() { 138 | let python_3_5_5_offsets = PythonVersionOffsets { 139 | major_version: 3, 140 | minor_version: 5, 141 | patch_version: 5, 142 | py_object: py_perf::bindings::PyObject { 143 | ob_type: offset_of!(py_spy::python_bindings::v3_5_5::PyObject, ob_type) as i64, 144 | }, 145 | py_string: py_perf::bindings::PyString { 146 | data: size_of::() as i64, 147 | size: offset_of!(py_spy::python_bindings::v3_5_5::PyVarObject, ob_size) as i64, 148 | }, 149 | py_type_object: py_perf::bindings::PyTypeObject { 150 | tp_name: offset_of!(py_spy::python_bindings::v3_5_5::PyTypeObject, tp_name) as i64, 151 | }, 152 | py_thread_state: py_perf::bindings::PyThreadState { 153 | interp: offset_of!(py_spy::python_bindings::v3_5_5::PyThreadState, interp) as i64, 154 | next: offset_of!(py_spy::python_bindings::v3_5_5::PyThreadState, next) as i64, 155 | frame: offset_of!(py_spy::python_bindings::v3_5_5::PyThreadState, frame) as i64, 156 | thread_id: offset_of!(py_spy::python_bindings::v3_5_5::PyThreadState, thread_id) as i64, 157 | native_thread_id: -1, 158 | cframe: -1, 159 | }, 160 | py_cframe: py_perf::bindings::PyCFrame::default(), 161 | py_interpreter_state: py_perf::bindings::PyInterpreterState { 162 | tstate_head: offset_of!( 163 | py_spy::python_bindings::v3_5_5::PyInterpreterState, 164 | tstate_head 165 | ) as i64, 166 | }, 167 | py_runtime_state: py_perf::bindings::PyRuntimeState { interp_main: -1 }, 168 | py_frame_object: py_perf::bindings::PyFrameObject { 169 | f_back: offset_of!(py_spy::python_bindings::v3_5_5::PyFrameObject, f_back) as i64, 170 | f_code: offset_of!(py_spy::python_bindings::v3_5_5::PyFrameObject, f_code) as i64, 171 | f_lineno: offset_of!(py_spy::python_bindings::v3_5_5::PyFrameObject, f_lineno) as i64, 172 | f_localsplus: offset_of!(py_spy::python_bindings::v3_5_5::PyFrameObject, f_localsplus) 173 | as i64, 174 | }, 175 | py_code_object: py_perf::bindings::PyCodeObject { 176 | co_filename: offset_of!(py_spy::python_bindings::v3_5_5::PyCodeObject, co_filename) 177 | as i64, 178 | co_name: offset_of!(py_spy::python_bindings::v3_5_5::PyCodeObject, co_name) as i64, 179 | co_varnames: offset_of!(py_spy::python_bindings::v3_5_5::PyCodeObject, co_varnames) 180 | as i64, 181 | co_firstlineno: offset_of!( 182 | py_spy::python_bindings::v3_5_5::PyCodeObject, 183 | co_firstlineno 184 | ) as i64, 185 | }, 186 | py_tuple_object: py_perf::bindings::PyTupleObject { 187 | ob_item: offset_of!(py_spy::python_bindings::v3_5_5::PyTupleObject, ob_item) as i64, 188 | }, 189 | }; 190 | 191 | write_to_file("python_3_5_5.yaml", python_3_5_5_offsets) 192 | } 193 | 194 | fn dump_python_structs_3_6_6() { 195 | let python_3_6_6_offsets = PythonVersionOffsets { 196 | major_version: 3, 197 | minor_version: 6, 198 | patch_version: 6, 199 | py_object: py_perf::bindings::PyObject { 200 | ob_type: offset_of!(py_spy::python_bindings::v3_6_6::PyObject, ob_type) as i64, 201 | }, 202 | py_string: py_perf::bindings::PyString { 203 | data: size_of::() as i64, 204 | size: offset_of!(py_spy::python_bindings::v3_6_6::PyVarObject, ob_size) as i64, 205 | }, 206 | py_type_object: py_perf::bindings::PyTypeObject { 207 | tp_name: offset_of!(py_spy::python_bindings::v3_6_6::PyTypeObject, tp_name) as i64, 208 | }, 209 | py_thread_state: py_perf::bindings::PyThreadState { 210 | interp: offset_of!(py_spy::python_bindings::v3_6_6::PyThreadState, interp) as i64, 211 | next: offset_of!(py_spy::python_bindings::v3_6_6::PyThreadState, next) as i64, 212 | frame: offset_of!(py_spy::python_bindings::v3_6_6::PyThreadState, frame) as i64, 213 | thread_id: offset_of!(py_spy::python_bindings::v3_6_6::PyThreadState, thread_id) as i64, 214 | native_thread_id: -1, 215 | cframe: -1, 216 | }, 217 | py_cframe: py_perf::bindings::PyCFrame::default(), 218 | py_interpreter_state: py_perf::bindings::PyInterpreterState { 219 | tstate_head: offset_of!( 220 | py_spy::python_bindings::v3_6_6::PyInterpreterState, 221 | tstate_head 222 | ) as i64, 223 | }, 224 | py_runtime_state: py_perf::bindings::PyRuntimeState { interp_main: -1 }, 225 | py_frame_object: py_perf::bindings::PyFrameObject { 226 | f_back: offset_of!(py_spy::python_bindings::v3_6_6::PyFrameObject, f_back) as i64, 227 | f_code: offset_of!(py_spy::python_bindings::v3_6_6::PyFrameObject, f_code) as i64, 228 | f_lineno: offset_of!(py_spy::python_bindings::v3_6_6::PyFrameObject, f_lineno) as i64, 229 | f_localsplus: offset_of!(py_spy::python_bindings::v3_6_6::PyFrameObject, f_localsplus) 230 | as i64, 231 | }, 232 | py_code_object: py_perf::bindings::PyCodeObject { 233 | co_filename: offset_of!(py_spy::python_bindings::v3_6_6::PyCodeObject, co_filename) 234 | as i64, 235 | co_name: offset_of!(py_spy::python_bindings::v3_6_6::PyCodeObject, co_name) as i64, 236 | co_varnames: offset_of!(py_spy::python_bindings::v3_6_6::PyCodeObject, co_varnames) 237 | as i64, 238 | co_firstlineno: offset_of!( 239 | py_spy::python_bindings::v3_6_6::PyCodeObject, 240 | co_firstlineno 241 | ) as i64, 242 | }, 243 | py_tuple_object: py_perf::bindings::PyTupleObject { 244 | ob_item: offset_of!(py_spy::python_bindings::v3_6_6::PyTupleObject, ob_item) as i64, 245 | }, 246 | }; 247 | 248 | write_to_file("python_3_6_6.yaml", python_3_6_6_offsets) 249 | } 250 | 251 | fn dump_python_structs_3_7_0() { 252 | let python_3_7_0_offsets = PythonVersionOffsets { 253 | major_version: 3, 254 | minor_version: 7, 255 | patch_version: 0, 256 | py_object: py_perf::bindings::PyObject { 257 | ob_type: offset_of!(py_spy::python_bindings::v3_7_0::PyObject, ob_type) as i64, 258 | }, 259 | py_string: py_perf::bindings::PyString { 260 | data: size_of::() as i64, 261 | size: offset_of!(py_spy::python_bindings::v3_7_0::PyVarObject, ob_size) as i64, 262 | }, 263 | py_type_object: py_perf::bindings::PyTypeObject { 264 | tp_name: offset_of!(py_spy::python_bindings::v3_7_0::PyTypeObject, tp_name) as i64, 265 | }, 266 | py_thread_state: py_perf::bindings::PyThreadState { 267 | interp: offset_of!(py_spy::python_bindings::v3_7_0::PyThreadState, interp) as i64, 268 | next: offset_of!(py_spy::python_bindings::v3_7_0::PyThreadState, next) as i64, 269 | frame: offset_of!(py_spy::python_bindings::v3_7_0::PyThreadState, frame) as i64, 270 | thread_id: offset_of!(py_spy::python_bindings::v3_7_0::PyThreadState, thread_id) as i64, 271 | native_thread_id: -1, 272 | cframe: -1, 273 | }, 274 | py_cframe: py_perf::bindings::PyCFrame::default(), 275 | py_interpreter_state: py_perf::bindings::PyInterpreterState { 276 | tstate_head: offset_of!( 277 | py_spy::python_bindings::v3_7_0::PyInterpreterState, 278 | tstate_head 279 | ) as i64, 280 | }, 281 | py_runtime_state: py_perf::bindings::PyRuntimeState { interp_main: -1 }, 282 | py_frame_object: py_perf::bindings::PyFrameObject { 283 | f_back: offset_of!(py_spy::python_bindings::v3_7_0::PyFrameObject, f_back) as i64, 284 | f_code: offset_of!(py_spy::python_bindings::v3_7_0::PyFrameObject, f_code) as i64, 285 | f_lineno: offset_of!(py_spy::python_bindings::v3_7_0::PyFrameObject, f_lineno) as i64, 286 | f_localsplus: offset_of!(py_spy::python_bindings::v3_7_0::PyFrameObject, f_localsplus) 287 | as i64, 288 | }, 289 | py_code_object: py_perf::bindings::PyCodeObject { 290 | co_filename: offset_of!(py_spy::python_bindings::v3_7_0::PyCodeObject, co_filename) 291 | as i64, 292 | co_name: offset_of!(py_spy::python_bindings::v3_7_0::PyCodeObject, co_name) as i64, 293 | co_varnames: offset_of!(py_spy::python_bindings::v3_7_0::PyCodeObject, co_varnames) 294 | as i64, 295 | co_firstlineno: offset_of!( 296 | py_spy::python_bindings::v3_7_0::PyCodeObject, 297 | co_firstlineno 298 | ) as i64, 299 | }, 300 | py_tuple_object: py_perf::bindings::PyTupleObject { 301 | ob_item: offset_of!(py_spy::python_bindings::v3_7_0::PyTupleObject, ob_item) as i64, 302 | }, 303 | }; 304 | 305 | write_to_file("python_3_7_0.yaml", python_3_7_0_offsets) 306 | } 307 | 308 | fn dump_python_structs_3_8_0() { 309 | let python_3_8_0_offsets = PythonVersionOffsets { 310 | major_version: 3, 311 | minor_version: 8, 312 | patch_version: 0, 313 | py_object: py_perf::bindings::PyObject { 314 | ob_type: offset_of!(py_spy::python_bindings::v3_8_0::PyObject, ob_type) as i64, 315 | }, 316 | py_string: py_perf::bindings::PyString { 317 | data: size_of::() as i64, 318 | size: offset_of!(py_spy::python_bindings::v3_8_0::PyVarObject, ob_size) as i64, 319 | }, 320 | py_type_object: py_perf::bindings::PyTypeObject { 321 | tp_name: offset_of!(py_spy::python_bindings::v3_8_0::PyTypeObject, tp_name) as i64, 322 | }, 323 | py_thread_state: py_perf::bindings::PyThreadState { 324 | interp: offset_of!(py_spy::python_bindings::v3_8_0::PyThreadState, interp) as i64, 325 | next: offset_of!(py_spy::python_bindings::v3_8_0::PyThreadState, next) as i64, 326 | frame: offset_of!(py_spy::python_bindings::v3_8_0::PyThreadState, frame) as i64, 327 | thread_id: offset_of!(py_spy::python_bindings::v3_8_0::PyThreadState, thread_id) as i64, 328 | native_thread_id: -1, 329 | cframe: -1, 330 | }, 331 | py_cframe: py_perf::bindings::PyCFrame::default(), 332 | py_interpreter_state: py_perf::bindings::PyInterpreterState { 333 | tstate_head: offset_of!( 334 | py_spy::python_bindings::v3_8_0::PyInterpreterState, 335 | tstate_head 336 | ) as i64, 337 | }, 338 | py_runtime_state: py_perf::bindings::PyRuntimeState { interp_main: -1 }, 339 | py_frame_object: py_perf::bindings::PyFrameObject { 340 | f_back: offset_of!(py_spy::python_bindings::v3_8_0::PyFrameObject, f_back) as i64, 341 | f_code: offset_of!(py_spy::python_bindings::v3_8_0::PyFrameObject, f_code) as i64, 342 | f_lineno: offset_of!(py_spy::python_bindings::v3_8_0::PyFrameObject, f_lineno) as i64, 343 | f_localsplus: offset_of!(py_spy::python_bindings::v3_8_0::PyFrameObject, f_localsplus) 344 | as i64, 345 | }, 346 | py_code_object: py_perf::bindings::PyCodeObject { 347 | co_filename: offset_of!(py_spy::python_bindings::v3_8_0::PyCodeObject, co_filename) 348 | as i64, 349 | co_name: offset_of!(py_spy::python_bindings::v3_8_0::PyCodeObject, co_name) as i64, 350 | co_varnames: offset_of!(py_spy::python_bindings::v3_8_0::PyCodeObject, co_varnames) 351 | as i64, 352 | co_firstlineno: offset_of!( 353 | py_spy::python_bindings::v3_8_0::PyCodeObject, 354 | co_firstlineno 355 | ) as i64, 356 | }, 357 | py_tuple_object: py_perf::bindings::PyTupleObject { 358 | ob_item: offset_of!(py_spy::python_bindings::v3_8_0::PyTupleObject, ob_item) as i64, 359 | }, 360 | }; 361 | 362 | write_to_file("python_3_8_0.yaml", python_3_8_0_offsets) 363 | } 364 | 365 | fn dump_python_structs_3_9_5() { 366 | let python_3_9_5_offsets = PythonVersionOffsets { 367 | major_version: 3, 368 | minor_version: 9, 369 | patch_version: 5, 370 | py_object: py_perf::bindings::PyObject { 371 | ob_type: offset_of!(py_spy::python_bindings::v3_9_5::PyObject, ob_type) as i64, 372 | }, 373 | py_string: py_perf::bindings::PyString { 374 | data: size_of::() as i64, 375 | size: offset_of!(py_spy::python_bindings::v3_9_5::PyVarObject, ob_size) as i64, 376 | }, 377 | py_type_object: py_perf::bindings::PyTypeObject { 378 | tp_name: offset_of!(py_spy::python_bindings::v3_9_5::PyTypeObject, tp_name) as i64, 379 | }, 380 | py_thread_state: py_perf::bindings::PyThreadState { 381 | interp: offset_of!(py_spy::python_bindings::v3_9_5::PyThreadState, interp) as i64, 382 | next: offset_of!(py_spy::python_bindings::v3_9_5::PyThreadState, next) as i64, 383 | frame: offset_of!(py_spy::python_bindings::v3_9_5::PyThreadState, frame) as i64, 384 | thread_id: offset_of!(py_spy::python_bindings::v3_9_5::PyThreadState, thread_id) as i64, 385 | native_thread_id: -1, 386 | cframe: -1, 387 | }, 388 | py_cframe: py_perf::bindings::PyCFrame::default(), 389 | py_interpreter_state: py_perf::bindings::PyInterpreterState { 390 | tstate_head: offset_of!( 391 | py_spy::python_bindings::v3_9_5::PyInterpreterState, 392 | tstate_head 393 | ) as i64, 394 | }, 395 | py_runtime_state: py_perf::bindings::PyRuntimeState { interp_main: -1 }, 396 | py_frame_object: py_perf::bindings::PyFrameObject { 397 | f_back: offset_of!(py_spy::python_bindings::v3_9_5::PyFrameObject, f_back) as i64, 398 | f_code: offset_of!(py_spy::python_bindings::v3_9_5::PyFrameObject, f_code) as i64, 399 | f_lineno: offset_of!(py_spy::python_bindings::v3_9_5::PyFrameObject, f_lineno) as i64, 400 | f_localsplus: offset_of!(py_spy::python_bindings::v3_9_5::PyFrameObject, f_localsplus) 401 | as i64, 402 | }, 403 | py_code_object: py_perf::bindings::PyCodeObject { 404 | co_filename: offset_of!(py_spy::python_bindings::v3_9_5::PyCodeObject, co_filename) 405 | as i64, 406 | co_name: offset_of!(py_spy::python_bindings::v3_9_5::PyCodeObject, co_name) as i64, 407 | co_varnames: offset_of!(py_spy::python_bindings::v3_9_5::PyCodeObject, co_varnames) 408 | as i64, 409 | co_firstlineno: offset_of!( 410 | py_spy::python_bindings::v3_9_5::PyCodeObject, 411 | co_firstlineno 412 | ) as i64, 413 | }, 414 | py_tuple_object: py_perf::bindings::PyTupleObject { 415 | ob_item: offset_of!(py_spy::python_bindings::v3_9_5::PyTupleObject, ob_item) as i64, 416 | }, 417 | }; 418 | 419 | write_to_file("python_3_9_5.yaml", python_3_9_5_offsets) 420 | } 421 | 422 | fn dump_python_structs_3_10_0() { 423 | let python_3_10_0_offsets = PythonVersionOffsets { 424 | major_version: 3, 425 | minor_version: 10, 426 | patch_version: 0, 427 | py_object: py_perf::bindings::PyObject { 428 | ob_type: offset_of!(py_spy::python_bindings::v3_10_0::PyObject, ob_type) as i64, 429 | }, 430 | py_string: py_perf::bindings::PyString { 431 | // see https://github.com/python/cpython/blob/3.10/Include/cpython/unicodeobject.h#L82-L84 432 | data: size_of::() as i64, 433 | size: -1, 434 | }, 435 | py_type_object: py_perf::bindings::PyTypeObject { 436 | tp_name: offset_of!(py_spy::python_bindings::v3_10_0::PyTypeObject, tp_name) as i64, 437 | }, 438 | py_thread_state: py_perf::bindings::PyThreadState { 439 | interp: offset_of!(py_spy::python_bindings::v3_10_0::PyThreadState, interp) as i64, 440 | next: offset_of!(py_spy::python_bindings::v3_10_0::PyThreadState, next) as i64, 441 | frame: offset_of!(py_spy::python_bindings::v3_10_0::PyThreadState, frame) as i64, 442 | thread_id: offset_of!(py_spy::python_bindings::v3_10_0::PyThreadState, thread_id) 443 | as i64, 444 | native_thread_id: -1, 445 | cframe: -1, 446 | }, 447 | py_cframe: py_perf::bindings::PyCFrame::default(), 448 | py_interpreter_state: py_perf::bindings::PyInterpreterState { 449 | tstate_head: offset_of!( 450 | py_spy::python_bindings::v3_10_0::PyInterpreterState, 451 | tstate_head 452 | ) as i64, 453 | }, 454 | py_runtime_state: py_perf::bindings::PyRuntimeState { interp_main: -1 }, 455 | py_frame_object: py_perf::bindings::PyFrameObject { 456 | f_back: offset_of!(py_spy::python_bindings::v3_10_0::PyFrameObject, f_back) as i64, 457 | f_code: offset_of!(py_spy::python_bindings::v3_10_0::PyFrameObject, f_code) as i64, 458 | f_lineno: offset_of!(py_spy::python_bindings::v3_10_0::PyFrameObject, f_lineno) as i64, 459 | f_localsplus: offset_of!( 460 | py_spy::python_bindings::v3_10_0::PyFrameObject, 461 | f_localsplus 462 | ) as i64, 463 | }, 464 | py_code_object: py_perf::bindings::PyCodeObject { 465 | co_filename: offset_of!(py_spy::python_bindings::v3_10_0::PyCodeObject, co_filename) 466 | as i64, 467 | co_name: offset_of!(py_spy::python_bindings::v3_10_0::PyCodeObject, co_name) as i64, 468 | co_varnames: offset_of!(py_spy::python_bindings::v3_10_0::PyCodeObject, co_varnames) 469 | as i64, 470 | co_firstlineno: offset_of!( 471 | py_spy::python_bindings::v3_10_0::PyCodeObject, 472 | co_firstlineno 473 | ) as i64, 474 | }, 475 | py_tuple_object: py_perf::bindings::PyTupleObject { 476 | ob_item: offset_of!(py_spy::python_bindings::v3_10_0::PyTupleObject, ob_item) as i64, 477 | }, 478 | }; 479 | 480 | write_to_file("python_3_10_0.yaml", python_3_10_0_offsets) 481 | } 482 | 483 | fn dump_python_structs_3_11_0() { 484 | let python_3_11_0_offsets = PythonVersionOffsets { 485 | major_version: 3, 486 | minor_version: 11, 487 | patch_version: 0, 488 | py_object: py_perf::bindings::PyObject { 489 | ob_type: offset_of!(py_spy::python_bindings::v3_11_0::PyObject, ob_type) as i64, 490 | }, 491 | py_string: py_perf::bindings::PyString { 492 | // see https://github.com/python/cpython/blob/3.11/Include/cpython/unicodeobject.h#L69-L71 493 | data: size_of::() as i64, 494 | size: -1, 495 | }, 496 | py_type_object: py_perf::bindings::PyTypeObject { 497 | tp_name: offset_of!(py_spy::python_bindings::v3_11_0::PyTypeObject, tp_name) as i64, 498 | }, 499 | py_thread_state: py_perf::bindings::PyThreadState { 500 | interp: offset_of!(py_spy::python_bindings::v3_11_0::PyThreadState, interp) as i64, 501 | next: offset_of!(py_spy::python_bindings::v3_11_0::PyThreadState, next) as i64, 502 | frame: -1, 503 | thread_id: offset_of!(py_spy::python_bindings::v3_11_0::PyThreadState, thread_id) 504 | as i64, 505 | native_thread_id: offset_of!( 506 | py_spy::python_bindings::v3_11_0::PyThreadState, 507 | native_thread_id 508 | ) as i64, 509 | // pointer to intermediate structure, PyCFrame. 510 | cframe: offset_of!(py_spy::python_bindings::v3_11_0::PyThreadState, cframe) as i64, 511 | }, 512 | py_cframe: py_perf::bindings::PyCFrame { 513 | current_frame: offset_of!(py_spy::python_bindings::v3_11_0::_PyCFrame, current_frame) 514 | as i64, 515 | }, 516 | py_interpreter_state: py_perf::bindings::PyInterpreterState { 517 | tstate_head: offset_of!( 518 | py_spy::python_bindings::v3_11_0::PyInterpreterState, 519 | threads 520 | ) as i64 521 | + offset_of!(py_spy::python_bindings::v3_11_0::_is_pythreads, head) as i64, 522 | }, 523 | py_runtime_state: py_perf::bindings::PyRuntimeState { 524 | interp_main: offset_of!( 525 | py_spy::python_bindings::v3_11_0::pyruntimestate, 526 | interpreters 527 | ) as i64 528 | + offset_of!( 529 | py_spy::python_bindings::v3_11_0::pyruntimestate_pyinterpreters, 530 | main 531 | ) as i64, 532 | }, 533 | py_frame_object: py_perf::bindings::PyFrameObject { 534 | f_back: offset_of!( 535 | py_spy::python_bindings::v3_11_0::_PyInterpreterFrame, 536 | previous 537 | ) as i64, 538 | f_code: offset_of!( 539 | py_spy::python_bindings::v3_11_0::_PyInterpreterFrame, 540 | f_code 541 | ) as i64, 542 | f_lineno: -1, 543 | f_localsplus: offset_of!( 544 | py_spy::python_bindings::v3_11_0::_PyInterpreterFrame, 545 | localsplus 546 | ) as i64, 547 | }, 548 | py_code_object: py_perf::bindings::PyCodeObject { 549 | co_filename: offset_of!(py_spy::python_bindings::v3_11_0::PyCodeObject, co_filename) 550 | as i64, 551 | co_name: offset_of!(py_spy::python_bindings::v3_11_0::PyCodeObject, co_name) as i64, 552 | co_varnames: offset_of!( 553 | py_spy::python_bindings::v3_11_0::PyCodeObject, 554 | co_localsplusnames 555 | ) as i64, 556 | co_firstlineno: offset_of!( 557 | py_spy::python_bindings::v3_11_0::PyCodeObject, 558 | co_firstlineno 559 | ) as i64, 560 | }, 561 | py_tuple_object: py_perf::bindings::PyTupleObject { 562 | ob_item: offset_of!(py_spy::python_bindings::v3_11_0::PyTupleObject, ob_item) as i64, 563 | }, 564 | }; 565 | 566 | write_to_file("python_3_11_0.yaml", python_3_11_0_offsets) 567 | } 568 | 569 | fn main() { 570 | dump_python_structs_2_7_15(); 571 | 572 | dump_python_structs_3_3_7(); 573 | dump_python_structs_3_5_5(); 574 | 575 | dump_python_structs_3_6_6(); 576 | dump_python_structs_3_7_0(); 577 | dump_python_structs_3_8_0(); 578 | dump_python_structs_3_9_5(); 579 | dump_python_structs_3_10_0(); 580 | dump_python_structs_3_11_0(); 581 | } 582 | --------------------------------------------------------------------------------