├── .gitignore ├── Cargo.lock ├── Cargo.toml ├── LICENSE ├── README.md ├── build.rs ├── images ├── insn_trace.png └── screenshot.png ├── include ├── perf_dlfilter.h └── wrapper.h └── src └── lib.rs /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | -------------------------------------------------------------------------------- /Cargo.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Cargo. 2 | # It is not intended for manual editing. 3 | version = 4 4 | 5 | [[package]] 6 | name = "ahash" 7 | version = "0.7.8" 8 | source = "registry+https://github.com/rust-lang/crates.io-index" 9 | checksum = "891477e0c6a8957309ee5c45a6368af3ae14bb510732d2684ffa19af310920f9" 10 | dependencies = [ 11 | "getrandom", 12 | "once_cell", 13 | "version_check", 14 | ] 15 | 16 | [[package]] 17 | name = "aho-corasick" 18 | version = "1.1.3" 19 | source = "registry+https://github.com/rust-lang/crates.io-index" 20 | checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" 21 | dependencies = [ 22 | "memchr", 23 | ] 24 | 25 | [[package]] 26 | name = "autocfg" 27 | version = "1.4.0" 28 | source = "registry+https://github.com/rust-lang/crates.io-index" 29 | checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" 30 | 31 | [[package]] 32 | name = "bindgen" 33 | version = "0.71.1" 34 | source = "registry+https://github.com/rust-lang/crates.io-index" 35 | checksum = "5f58bf3d7db68cfbac37cfc485a8d711e87e064c3d0fe0435b92f7a407f9d6b3" 36 | dependencies = [ 37 | "bitflags", 38 | "cexpr", 39 | "clang-sys", 40 | "itertools", 41 | "log", 42 | "prettyplease", 43 | "proc-macro2", 44 | "quote", 45 | "regex", 46 | "rustc-hash", 47 | "shlex", 48 | "syn 2.0.98", 49 | ] 50 | 51 | [[package]] 52 | name = "bitflags" 53 | version = "2.8.0" 54 | source = "registry+https://github.com/rust-lang/crates.io-index" 55 | checksum = "8f68f53c83ab957f72c32642f3868eec03eb974d1fb82e453128456482613d36" 56 | 57 | [[package]] 58 | name = "cexpr" 59 | version = "0.6.0" 60 | source = "registry+https://github.com/rust-lang/crates.io-index" 61 | checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766" 62 | dependencies = [ 63 | "nom", 64 | ] 65 | 66 | [[package]] 67 | name = "cfg-if" 68 | version = "1.0.0" 69 | source = "registry+https://github.com/rust-lang/crates.io-index" 70 | checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" 71 | 72 | [[package]] 73 | name = "clang-sys" 74 | version = "1.8.1" 75 | source = "registry+https://github.com/rust-lang/crates.io-index" 76 | checksum = "0b023947811758c97c59bf9d1c188fd619ad4718dcaa767947df1cadb14f39f4" 77 | dependencies = [ 78 | "glob", 79 | "libc", 80 | "libloading", 81 | ] 82 | 83 | [[package]] 84 | name = "either" 85 | version = "1.13.0" 86 | source = "registry+https://github.com/rust-lang/crates.io-index" 87 | checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0" 88 | 89 | [[package]] 90 | name = "getrandom" 91 | version = "0.2.15" 92 | source = "registry+https://github.com/rust-lang/crates.io-index" 93 | checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7" 94 | dependencies = [ 95 | "cfg-if", 96 | "libc", 97 | "wasi", 98 | ] 99 | 100 | [[package]] 101 | name = "glob" 102 | version = "0.3.2" 103 | source = "registry+https://github.com/rust-lang/crates.io-index" 104 | checksum = "a8d1add55171497b4705a648c6b583acafb01d58050a51727785f0b2c8e0a2b2" 105 | 106 | [[package]] 107 | name = "hashbrown" 108 | version = "0.12.3" 109 | source = "registry+https://github.com/rust-lang/crates.io-index" 110 | checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" 111 | dependencies = [ 112 | "ahash", 113 | ] 114 | 115 | [[package]] 116 | name = "heck" 117 | version = "0.4.1" 118 | source = "registry+https://github.com/rust-lang/crates.io-index" 119 | checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" 120 | 121 | [[package]] 122 | name = "itertools" 123 | version = "0.13.0" 124 | source = "registry+https://github.com/rust-lang/crates.io-index" 125 | checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186" 126 | dependencies = [ 127 | "either", 128 | ] 129 | 130 | [[package]] 131 | name = "libc" 132 | version = "0.2.169" 133 | source = "registry+https://github.com/rust-lang/crates.io-index" 134 | checksum = "b5aba8db14291edd000dfcc4d620c7ebfb122c613afb886ca8803fa4e128a20a" 135 | 136 | [[package]] 137 | name = "libloading" 138 | version = "0.8.6" 139 | source = "registry+https://github.com/rust-lang/crates.io-index" 140 | checksum = "fc2f4eb4bc735547cfed7c0a4922cbd04a4655978c09b54f1f7b228750664c34" 141 | dependencies = [ 142 | "cfg-if", 143 | "windows-targets", 144 | ] 145 | 146 | [[package]] 147 | name = "log" 148 | version = "0.4.25" 149 | source = "registry+https://github.com/rust-lang/crates.io-index" 150 | checksum = "04cbf5b083de1c7e0222a7a51dbfdba1cbe1c6ab0b15e29fff3f6c077fd9cd9f" 151 | 152 | [[package]] 153 | name = "lru" 154 | version = "0.7.8" 155 | source = "registry+https://github.com/rust-lang/crates.io-index" 156 | checksum = "e999beba7b6e8345721bd280141ed958096a2e4abdf74f67ff4ce49b4b54e47a" 157 | dependencies = [ 158 | "hashbrown", 159 | ] 160 | 161 | [[package]] 162 | name = "memchr" 163 | version = "2.7.4" 164 | source = "registry+https://github.com/rust-lang/crates.io-index" 165 | checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" 166 | 167 | [[package]] 168 | name = "minimal-lexical" 169 | version = "0.2.1" 170 | source = "registry+https://github.com/rust-lang/crates.io-index" 171 | checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" 172 | 173 | [[package]] 174 | name = "nom" 175 | version = "7.1.3" 176 | source = "registry+https://github.com/rust-lang/crates.io-index" 177 | checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" 178 | dependencies = [ 179 | "memchr", 180 | "minimal-lexical", 181 | ] 182 | 183 | [[package]] 184 | name = "num-traits" 185 | version = "0.2.19" 186 | source = "registry+https://github.com/rust-lang/crates.io-index" 187 | checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" 188 | dependencies = [ 189 | "autocfg", 190 | ] 191 | 192 | [[package]] 193 | name = "numtoa" 194 | version = "0.1.0" 195 | source = "registry+https://github.com/rust-lang/crates.io-index" 196 | checksum = "b8f8bdf33df195859076e54ab11ee78a1b208382d3a26ec40d142ffc1ecc49ef" 197 | 198 | [[package]] 199 | name = "once_cell" 200 | version = "1.20.3" 201 | source = "registry+https://github.com/rust-lang/crates.io-index" 202 | checksum = "945462a4b81e43c4e3ba96bd7b49d834c6f61198356aa858733bc4acf3cbe62e" 203 | 204 | [[package]] 205 | name = "perf2perfetto" 206 | version = "0.1.0" 207 | dependencies = [ 208 | "bindgen", 209 | "libc", 210 | "lru", 211 | "num-traits", 212 | "numtoa", 213 | "strum", 214 | "strum_macros", 215 | ] 216 | 217 | [[package]] 218 | name = "prettyplease" 219 | version = "0.2.29" 220 | source = "registry+https://github.com/rust-lang/crates.io-index" 221 | checksum = "6924ced06e1f7dfe3fa48d57b9f74f55d8915f5036121bef647ef4b204895fac" 222 | dependencies = [ 223 | "proc-macro2", 224 | "syn 2.0.98", 225 | ] 226 | 227 | [[package]] 228 | name = "proc-macro2" 229 | version = "1.0.93" 230 | source = "registry+https://github.com/rust-lang/crates.io-index" 231 | checksum = "60946a68e5f9d28b0dc1c21bb8a97ee7d018a8b322fa57838ba31cc878e22d99" 232 | dependencies = [ 233 | "unicode-ident", 234 | ] 235 | 236 | [[package]] 237 | name = "quote" 238 | version = "1.0.38" 239 | source = "registry+https://github.com/rust-lang/crates.io-index" 240 | checksum = "0e4dccaaaf89514f546c693ddc140f729f958c247918a13380cccc6078391acc" 241 | dependencies = [ 242 | "proc-macro2", 243 | ] 244 | 245 | [[package]] 246 | name = "regex" 247 | version = "1.11.1" 248 | source = "registry+https://github.com/rust-lang/crates.io-index" 249 | checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191" 250 | dependencies = [ 251 | "aho-corasick", 252 | "memchr", 253 | "regex-automata", 254 | "regex-syntax", 255 | ] 256 | 257 | [[package]] 258 | name = "regex-automata" 259 | version = "0.4.9" 260 | source = "registry+https://github.com/rust-lang/crates.io-index" 261 | checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908" 262 | dependencies = [ 263 | "aho-corasick", 264 | "memchr", 265 | "regex-syntax", 266 | ] 267 | 268 | [[package]] 269 | name = "regex-syntax" 270 | version = "0.8.5" 271 | source = "registry+https://github.com/rust-lang/crates.io-index" 272 | checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" 273 | 274 | [[package]] 275 | name = "rustc-hash" 276 | version = "2.1.1" 277 | source = "registry+https://github.com/rust-lang/crates.io-index" 278 | checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d" 279 | 280 | [[package]] 281 | name = "rustversion" 282 | version = "1.0.19" 283 | source = "registry+https://github.com/rust-lang/crates.io-index" 284 | checksum = "f7c45b9784283f1b2e7fb61b42047c2fd678ef0960d4f6f1eba131594cc369d4" 285 | 286 | [[package]] 287 | name = "shlex" 288 | version = "1.3.0" 289 | source = "registry+https://github.com/rust-lang/crates.io-index" 290 | checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" 291 | 292 | [[package]] 293 | name = "strum" 294 | version = "0.24.1" 295 | source = "registry+https://github.com/rust-lang/crates.io-index" 296 | checksum = "063e6045c0e62079840579a7e47a355ae92f60eb74daaf156fb1e84ba164e63f" 297 | 298 | [[package]] 299 | name = "strum_macros" 300 | version = "0.24.3" 301 | source = "registry+https://github.com/rust-lang/crates.io-index" 302 | checksum = "1e385be0d24f186b4ce2f9982191e7101bb737312ad61c1f2f984f34bcf85d59" 303 | dependencies = [ 304 | "heck", 305 | "proc-macro2", 306 | "quote", 307 | "rustversion", 308 | "syn 1.0.109", 309 | ] 310 | 311 | [[package]] 312 | name = "syn" 313 | version = "1.0.109" 314 | source = "registry+https://github.com/rust-lang/crates.io-index" 315 | checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" 316 | dependencies = [ 317 | "proc-macro2", 318 | "quote", 319 | "unicode-ident", 320 | ] 321 | 322 | [[package]] 323 | name = "syn" 324 | version = "2.0.98" 325 | source = "registry+https://github.com/rust-lang/crates.io-index" 326 | checksum = "36147f1a48ae0ec2b5b3bc5b537d267457555a10dc06f3dbc8cb11ba3006d3b1" 327 | dependencies = [ 328 | "proc-macro2", 329 | "quote", 330 | "unicode-ident", 331 | ] 332 | 333 | [[package]] 334 | name = "unicode-ident" 335 | version = "1.0.16" 336 | source = "registry+https://github.com/rust-lang/crates.io-index" 337 | checksum = "a210d160f08b701c8721ba1c726c11662f877ea6b7094007e1ca9a1041945034" 338 | 339 | [[package]] 340 | name = "version_check" 341 | version = "0.9.5" 342 | source = "registry+https://github.com/rust-lang/crates.io-index" 343 | checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" 344 | 345 | [[package]] 346 | name = "wasi" 347 | version = "0.11.0+wasi-snapshot-preview1" 348 | source = "registry+https://github.com/rust-lang/crates.io-index" 349 | checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" 350 | 351 | [[package]] 352 | name = "windows-targets" 353 | version = "0.52.6" 354 | source = "registry+https://github.com/rust-lang/crates.io-index" 355 | checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" 356 | dependencies = [ 357 | "windows_aarch64_gnullvm", 358 | "windows_aarch64_msvc", 359 | "windows_i686_gnu", 360 | "windows_i686_gnullvm", 361 | "windows_i686_msvc", 362 | "windows_x86_64_gnu", 363 | "windows_x86_64_gnullvm", 364 | "windows_x86_64_msvc", 365 | ] 366 | 367 | [[package]] 368 | name = "windows_aarch64_gnullvm" 369 | version = "0.52.6" 370 | source = "registry+https://github.com/rust-lang/crates.io-index" 371 | checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" 372 | 373 | [[package]] 374 | name = "windows_aarch64_msvc" 375 | version = "0.52.6" 376 | source = "registry+https://github.com/rust-lang/crates.io-index" 377 | checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" 378 | 379 | [[package]] 380 | name = "windows_i686_gnu" 381 | version = "0.52.6" 382 | source = "registry+https://github.com/rust-lang/crates.io-index" 383 | checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" 384 | 385 | [[package]] 386 | name = "windows_i686_gnullvm" 387 | version = "0.52.6" 388 | source = "registry+https://github.com/rust-lang/crates.io-index" 389 | checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" 390 | 391 | [[package]] 392 | name = "windows_i686_msvc" 393 | version = "0.52.6" 394 | source = "registry+https://github.com/rust-lang/crates.io-index" 395 | checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" 396 | 397 | [[package]] 398 | name = "windows_x86_64_gnu" 399 | version = "0.52.6" 400 | source = "registry+https://github.com/rust-lang/crates.io-index" 401 | checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" 402 | 403 | [[package]] 404 | name = "windows_x86_64_gnullvm" 405 | version = "0.52.6" 406 | source = "registry+https://github.com/rust-lang/crates.io-index" 407 | checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" 408 | 409 | [[package]] 410 | name = "windows_x86_64_msvc" 411 | version = "0.52.6" 412 | source = "registry+https://github.com/rust-lang/crates.io-index" 413 | checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" 414 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "perf2perfetto" 3 | version = "0.1.0" 4 | edition = "2021" 5 | 6 | [dependencies] 7 | libc = "0.2.121" 8 | num-traits = "0.2.14" 9 | lru = "0.7.3" 10 | strum = "0.24.0" 11 | strum_macros = "0.24.0" 12 | numtoa = "0.1.0" 13 | 14 | [lib] 15 | crate-type = ["dylib"] 16 | 17 | [build-dependencies] 18 | bindgen = "0.71.1" 19 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 2, June 1991 3 | 4 | Copyright (C) 1989, 1991 Free Software Foundation, Inc., 5 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 6 | Everyone is permitted to copy and distribute verbatim copies 7 | of this license document, but changing it is not allowed. 8 | 9 | Preamble 10 | 11 | The licenses for most software are designed to take away your 12 | freedom to share and change it. By contrast, the GNU General Public 13 | License is intended to guarantee your freedom to share and change free 14 | software--to make sure the software is free for all its users. This 15 | General Public License applies to most of the Free Software 16 | Foundation's software and to any other program whose authors commit to 17 | using it. (Some other Free Software Foundation software is covered by 18 | the GNU Lesser General Public License instead.) You can apply it to 19 | your programs, too. 20 | 21 | When we speak of free software, we are referring to freedom, not 22 | price. Our General Public Licenses are designed to make sure that you 23 | have the freedom to distribute copies of free software (and charge for 24 | this service if you wish), that you receive source code or can get it 25 | if you want it, that you can change the software or use pieces of it 26 | in new free programs; and that you know you can do these things. 27 | 28 | To protect your rights, we need to make restrictions that forbid 29 | anyone to deny you these rights or to ask you to surrender the rights. 30 | These restrictions translate to certain responsibilities for you if you 31 | distribute copies of the software, or if you modify it. 32 | 33 | For example, if you distribute copies of such a program, whether 34 | gratis or for a fee, you must give the recipients all the rights that 35 | you have. You must make sure that they, too, receive or can get the 36 | source code. And you must show them these terms so they know their 37 | rights. 38 | 39 | We protect your rights with two steps: (1) copyright the software, and 40 | (2) offer you this license which gives you legal permission to copy, 41 | distribute and/or modify the software. 42 | 43 | Also, for each author's protection and ours, we want to make certain 44 | that everyone understands that there is no warranty for this free 45 | software. If the software is modified by someone else and passed on, we 46 | want its recipients to know that what they have is not the original, so 47 | that any problems introduced by others will not reflect on the original 48 | authors' reputations. 49 | 50 | Finally, any free program is threatened constantly by software 51 | patents. We wish to avoid the danger that redistributors of a free 52 | program will individually obtain patent licenses, in effect making the 53 | program proprietary. To prevent this, we have made it clear that any 54 | patent must be licensed for everyone's free use or not licensed at all. 55 | 56 | The precise terms and conditions for copying, distribution and 57 | modification follow. 58 | 59 | GNU GENERAL PUBLIC LICENSE 60 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 61 | 62 | 0. This License applies to any program or other work which contains 63 | a notice placed by the copyright holder saying it may be distributed 64 | under the terms of this General Public License. The "Program", below, 65 | refers to any such program or work, and a "work based on the Program" 66 | means either the Program or any derivative work under copyright law: 67 | that is to say, a work containing the Program or a portion of it, 68 | either verbatim or with modifications and/or translated into another 69 | language. (Hereinafter, translation is included without limitation in 70 | the term "modification".) Each licensee is addressed as "you". 71 | 72 | Activities other than copying, distribution and modification are not 73 | covered by this License; they are outside its scope. The act of 74 | running the Program is not restricted, and the output from the Program 75 | is covered only if its contents constitute a work based on the 76 | Program (independent of having been made by running the Program). 77 | Whether that is true depends on what the Program does. 78 | 79 | 1. You may copy and distribute verbatim copies of the Program's 80 | source code as you receive it, in any medium, provided that you 81 | conspicuously and appropriately publish on each copy an appropriate 82 | copyright notice and disclaimer of warranty; keep intact all the 83 | notices that refer to this License and to the absence of any warranty; 84 | and give any other recipients of the Program a copy of this License 85 | along with the Program. 86 | 87 | You may charge a fee for the physical act of transferring a copy, and 88 | you may at your option offer warranty protection in exchange for a fee. 89 | 90 | 2. You may modify your copy or copies of the Program or any portion 91 | of it, thus forming a work based on the Program, and copy and 92 | distribute such modifications or work under the terms of Section 1 93 | above, provided that you also meet all of these conditions: 94 | 95 | a) You must cause the modified files to carry prominent notices 96 | stating that you changed the files and the date of any change. 97 | 98 | b) You must cause any work that you distribute or publish, that in 99 | whole or in part contains or is derived from the Program or any 100 | part thereof, to be licensed as a whole at no charge to all third 101 | parties under the terms of this License. 102 | 103 | c) If the modified program normally reads commands interactively 104 | when run, you must cause it, when started running for such 105 | interactive use in the most ordinary way, to print or display an 106 | announcement including an appropriate copyright notice and a 107 | notice that there is no warranty (or else, saying that you provide 108 | a warranty) and that users may redistribute the program under 109 | these conditions, and telling the user how to view a copy of this 110 | License. (Exception: if the Program itself is interactive but 111 | does not normally print such an announcement, your work based on 112 | the Program is not required to print an announcement.) 113 | 114 | These requirements apply to the modified work as a whole. If 115 | identifiable sections of that work are not derived from the Program, 116 | and can be reasonably considered independent and separate works in 117 | themselves, then this License, and its terms, do not apply to those 118 | sections when you distribute them as separate works. But when you 119 | distribute the same sections as part of a whole which is a work based 120 | on the Program, the distribution of the whole must be on the terms of 121 | this License, whose permissions for other licensees extend to the 122 | entire whole, and thus to each and every part regardless of who wrote it. 123 | 124 | Thus, it is not the intent of this section to claim rights or contest 125 | your rights to work written entirely by you; rather, the intent is to 126 | exercise the right to control the distribution of derivative or 127 | collective works based on the Program. 128 | 129 | In addition, mere aggregation of another work not based on the Program 130 | with the Program (or with a work based on the Program) on a volume of 131 | a storage or distribution medium does not bring the other work under 132 | the scope of this License. 133 | 134 | 3. You may copy and distribute the Program (or a work based on it, 135 | under Section 2) in object code or executable form under the terms of 136 | Sections 1 and 2 above provided that you also do one of the following: 137 | 138 | a) Accompany it with the complete corresponding machine-readable 139 | source code, which must be distributed under the terms of Sections 140 | 1 and 2 above on a medium customarily used for software interchange; or, 141 | 142 | b) Accompany it with a written offer, valid for at least three 143 | years, to give any third party, for a charge no more than your 144 | cost of physically performing source distribution, a complete 145 | machine-readable copy of the corresponding source code, to be 146 | distributed under the terms of Sections 1 and 2 above on a medium 147 | customarily used for software interchange; or, 148 | 149 | c) Accompany it with the information you received as to the offer 150 | to distribute corresponding source code. (This alternative is 151 | allowed only for noncommercial distribution and only if you 152 | received the program in object code or executable form with such 153 | an offer, in accord with Subsection b above.) 154 | 155 | The source code for a work means the preferred form of the work for 156 | making modifications to it. For an executable work, complete source 157 | code means all the source code for all modules it contains, plus any 158 | associated interface definition files, plus the scripts used to 159 | control compilation and installation of the executable. However, as a 160 | special exception, the source code distributed need not include 161 | anything that is normally distributed (in either source or binary 162 | form) with the major components (compiler, kernel, and so on) of the 163 | operating system on which the executable runs, unless that component 164 | itself accompanies the executable. 165 | 166 | If distribution of executable or object code is made by offering 167 | access to copy from a designated place, then offering equivalent 168 | access to copy the source code from the same place counts as 169 | distribution of the source code, even though third parties are not 170 | compelled to copy the source along with the object code. 171 | 172 | 4. You may not copy, modify, sublicense, or distribute the Program 173 | except as expressly provided under this License. Any attempt 174 | otherwise to copy, modify, sublicense or distribute the Program is 175 | void, and will automatically terminate your rights under this License. 176 | However, parties who have received copies, or rights, from you under 177 | this License will not have their licenses terminated so long as such 178 | parties remain in full compliance. 179 | 180 | 5. You are not required to accept this License, since you have not 181 | signed it. However, nothing else grants you permission to modify or 182 | distribute the Program or its derivative works. These actions are 183 | prohibited by law if you do not accept this License. Therefore, by 184 | modifying or distributing the Program (or any work based on the 185 | Program), you indicate your acceptance of this License to do so, and 186 | all its terms and conditions for copying, distributing or modifying 187 | the Program or works based on it. 188 | 189 | 6. Each time you redistribute the Program (or any work based on the 190 | Program), the recipient automatically receives a license from the 191 | original licensor to copy, distribute or modify the Program subject to 192 | these terms and conditions. You may not impose any further 193 | restrictions on the recipients' exercise of the rights granted herein. 194 | You are not responsible for enforcing compliance by third parties to 195 | this License. 196 | 197 | 7. If, as a consequence of a court judgment or allegation of patent 198 | infringement or for any other reason (not limited to patent issues), 199 | conditions are imposed on you (whether by court order, agreement or 200 | otherwise) that contradict the conditions of this License, they do not 201 | excuse you from the conditions of this License. If you cannot 202 | distribute so as to satisfy simultaneously your obligations under this 203 | License and any other pertinent obligations, then as a consequence you 204 | may not distribute the Program at all. For example, if a patent 205 | license would not permit royalty-free redistribution of the Program by 206 | all those who receive copies directly or indirectly through you, then 207 | the only way you could satisfy both it and this License would be to 208 | refrain entirely from distribution of the Program. 209 | 210 | If any portion of this section is held invalid or unenforceable under 211 | any particular circumstance, the balance of the section is intended to 212 | apply and the section as a whole is intended to apply in other 213 | circumstances. 214 | 215 | It is not the purpose of this section to induce you to infringe any 216 | patents or other property right claims or to contest validity of any 217 | such claims; this section has the sole purpose of protecting the 218 | integrity of the free software distribution system, which is 219 | implemented by public license practices. Many people have made 220 | generous contributions to the wide range of software distributed 221 | through that system in reliance on consistent application of that 222 | system; it is up to the author/donor to decide if he or she is willing 223 | to distribute software through any other system and a licensee cannot 224 | impose that choice. 225 | 226 | This section is intended to make thoroughly clear what is believed to 227 | be a consequence of the rest of this License. 228 | 229 | 8. If the distribution and/or use of the Program is restricted in 230 | certain countries either by patents or by copyrighted interfaces, the 231 | original copyright holder who places the Program under this License 232 | may add an explicit geographical distribution limitation excluding 233 | those countries, so that distribution is permitted only in or among 234 | countries not thus excluded. In such case, this License incorporates 235 | the limitation as if written in the body of this License. 236 | 237 | 9. The Free Software Foundation may publish revised and/or new versions 238 | of the General Public License from time to time. Such new versions will 239 | be similar in spirit to the present version, but may differ in detail to 240 | address new problems or concerns. 241 | 242 | Each version is given a distinguishing version number. If the Program 243 | specifies a version number of this License which applies to it and "any 244 | later version", you have the option of following the terms and conditions 245 | either of that version or of any later version published by the Free 246 | Software Foundation. If the Program does not specify a version number of 247 | this License, you may choose any version ever published by the Free Software 248 | Foundation. 249 | 250 | 10. If you wish to incorporate parts of the Program into other free 251 | programs whose distribution conditions are different, write to the author 252 | to ask for permission. For software which is copyrighted by the Free 253 | Software Foundation, write to the Free Software Foundation; we sometimes 254 | make exceptions for this. Our decision will be guided by the two goals 255 | of preserving the free status of all derivatives of our free software and 256 | of promoting the sharing and reuse of software generally. 257 | 258 | NO WARRANTY 259 | 260 | 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY 261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN 262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES 263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED 264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS 266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE 267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, 268 | REPAIR OR CORRECTION. 269 | 270 | 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR 272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, 273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING 274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED 275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY 276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER 277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE 278 | POSSIBILITY OF SUCH DAMAGES. 279 | 280 | END OF TERMS AND CONDITIONS 281 | 282 | How to Apply These Terms to Your New Programs 283 | 284 | If you develop a new program, and you want it to be of the greatest 285 | possible use to the public, the best way to achieve this is to make it 286 | free software which everyone can redistribute and change under these terms. 287 | 288 | To do so, attach the following notices to the program. It is safest 289 | to attach them to the start of each source file to most effectively 290 | convey the exclusion of warranty; and each file should have at least 291 | the "copyright" line and a pointer to where the full notice is found. 292 | 293 | 294 | Copyright (C) 295 | 296 | This program is free software; you can redistribute it and/or modify 297 | it under the terms of the GNU General Public License as published by 298 | the Free Software Foundation; either version 2 of the License, or 299 | (at your option) any later version. 300 | 301 | This program is distributed in the hope that it will be useful, 302 | but WITHOUT ANY WARRANTY; without even the implied warranty of 303 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 304 | GNU General Public License for more details. 305 | 306 | You should have received a copy of the GNU General Public License along 307 | with this program; if not, write to the Free Software Foundation, Inc., 308 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 309 | 310 | Also add information on how to contact you by electronic and paper mail. 311 | 312 | If the program is interactive, make it output a short notice like this 313 | when it starts in an interactive mode: 314 | 315 | Gnomovision version 69, Copyright (C) year name of author 316 | Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 317 | This is free software, and you are welcome to redistribute it 318 | under certain conditions; type `show c' for details. 319 | 320 | The hypothetical commands `show w' and `show c' should show the appropriate 321 | parts of the General Public License. Of course, the commands you use may 322 | be called something other than `show w' and `show c'; they could even be 323 | mouse-clicks or menu items--whatever suits your program. 324 | 325 | You should also get your employer (if you work as a programmer) or your 326 | school, if any, to sign a "copyright disclaimer" for the program, if 327 | necessary. Here is a sample; alter the names: 328 | 329 | Yoyodyne, Inc., hereby disclaims all copyright interest in the program 330 | `Gnomovision' (which makes passes at compilers) written by James Hacker. 331 | 332 | , 1 April 1989 333 | Ty Coon, President of Vice 334 | 335 | This General Public License does not permit incorporating your program into 336 | proprietary programs. If your program is a subroutine library, you may 337 | consider it more useful to permit linking proprietary applications with the 338 | library. If this is what you want to do, use the GNU Lesser General 339 | Public License instead of this License. 340 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Introduction 2 | 3 | Recent Intel processors feature the "Intel Processor Trace" feature, which can be used to capture the full execution trace of a program. 4 | It is an amazing tool for debugging, optimization and learning how (natively compiled) programs work. 5 | 6 | Linux supports Intel PT in `perf`. 7 | This repository contains an exporter of Intel PT traces from `perf` to [Fuchsia trace format](https://fuchsia.dev/fuchsia-src/reference/tracing/trace-format) for convenient viewing in [Perfetto](https://perfetto.dev/). 8 | 9 | ## Example output (viewed in Perfetto) 10 | 11 | ![Screenshot of an example output viewed](images/screenshot.png) 12 | 13 | ## How to use 14 | 15 | ### Basic usage 16 | 17 | First collect the trace with `perf record`. For example, to trace a running process `my_prog`, you can do: 18 | 19 | ```bash 20 | TRACE_DURATION_SECS=1 21 | PID=$(pgrep --newest --exact my_prog) 22 | sudo perf record -o perf.data -p $PID -e intel_pt/cyc=1/ -- sleep $TRACE_DURATION_SECS 23 | ``` 24 | 25 | (This records both the kernel space and the user space. `perf record` can be configured to only record one 26 | of those, but this tool doesn't support that right now. It shouldn't be hard to add support for that, 27 | but I didn't get around to it.) 28 | 29 | The above command will generate a perf.data file. 30 | Note that tracing all branches with Intel PT like that can output several hundred MiB per second per core. 31 | Be careful with traces longer than a second so that perf.data doesn't clog up your hard drive. 32 | 33 | `perf record` offers more options that just "trace everything". For example, you can select "trace start" 34 | and "trace stop" addresses, use snapshot mode to save trace snapshots only on signals and on exit, and more. 35 | Refer to `man perf record` for details. 36 | 37 | To decode the trace to `.ftf`, build the dlfilter library with 38 | 39 | ```bash 40 | cargo build --release 41 | DLFILTER_PATH=$(realpath target/release/libperf2perfetto.so) 42 | 43 | ``` 44 | 45 | And run `perf script` with the library passed as a `--dlfilter`. For example with: 46 | 47 | ```bash 48 | #!/bin/bash 49 | # decode_pt.sh 50 | 51 | RELSTART=$1 52 | DURATION=$2 53 | DLFILTER_PATH="$3" 54 | OUT_FILENAME="$4" 55 | MODE="$5" 56 | 57 | ABSSTART=$(perf script -f --itrace=i0ns -Ftime -i perf.data | head -n1 | tr -d ':[:space:]') 58 | START=$(echo "$ABSSTART + $RELSTART" | bc) 59 | END=$(echo "$START + $DURATION" | bc) 60 | 61 | perf script -f --itrace=bei0ns -i perf.data --dlfilter "$DLFILTER_PATH" --time $START,$END --dlarg "$OUT_FILENAME" --dlarg $MODE 62 | ``` 63 | 64 | Note the `bei0ns`. This causes `perf` to emit "branches" (b), "errors" (e) and "instruction" (i, period 0ns) events. 65 | "branches" have to be emitted for this tool to work. "errors" are optional. They will be printed to stderr for your information only and don't affect the tool. 66 | "instructions" are optional. If they are emitted, they are used to calculate more exact instruction counts, though it slows decoding down quite significantly. 67 | I recommend emitting them. 68 | 69 | Usage: 70 | ```bash 71 | ./decode_pt.sh 0.01 0.03 "$DLFILTER_PATH" out_cyc_0.01-0.04.ftf c 72 | ``` 73 | 74 | This will decode the 10ms-40ms span of the trace (relative to the beginning) from `perf.data` to `out_0.01-0.04.ftf`. 75 | The `c` parameter chooses CPU cycles as the time axis in the output. Other options are `t` (timestamp) and `i` (instructions). 76 | 77 | ### Instruction and cycle counters 78 | 79 | The cycle counts emitted in the output are not exact. The cycle information comes from IntelPT packets, so it has the same granularity 80 | as packets. In other words: during decoding, cycle count is only updated on indirect jumps and some conditional jumps (and periodically every 81 | few thousand cycles). Cycle count for an instruction is calculated by substracting the cycle count seen on `ret` from the count seen at `call`. 82 | If there was no update between them, the count will be 0. If there was no updates for a long time before `call`, the count will include many cycles 83 | that passed before call. So the count can be wrong in both ways. 84 | 85 | You can improve that with `noretcomp` (see below), which will force an update on `ret`. (But trying to achieve granularity finer than a few hundred cycles is 86 | unlikely to get you anywhere due to how out-of-order CPUs work.) 87 | 88 | The instruction counts are exact when using `i0ns` (see above). Without it, instruction count is only updated when the cycle count is updated. 89 | 90 | ### noretcomp 91 | 92 | By default `perf record` enables "return compression", which disables the generation of Intel PT packets on `ret` instructions. 93 | Even though the target of `ret` can't be deduced offline in general, well-behaved applications don't modify return addresses, 94 | and the return target can be deduced from preceding calls. This fact can be used to decrease the number (and thus overhead) of 95 | Intel PT packets without losing correctness. 96 | 97 | Even if your application is well-behaved, you can consider disabling return compression with `noretcomp=1`, as in `perf record -e intel_pt/cyc=1,noretcomp=1/ ...`. 98 | This will result in more exact instruction and cycle counts, though it will also increase the overhead of tracing (think: from 3% to 5%). 99 | 100 | ### Output file size 101 | 102 | To view the trace, head to https://ui.perfetto.dev/ and open the `.ftf` file. 103 | 104 | Avoid very big traces to avoid severe lags in Perfetto. Keep them smaller than several hundred megabytes and several hundred thousand function calls. When tracing a single thread of Scylla, this translates to about 25ms. 105 | 106 | You can split longer spans into manageable chunks like so: 107 | ```bash 108 | parallel -j42 ./decode_pt.sh {} 0.025 ./libperf2perfetto.so out_cyc_{}.ftf c ::: $(seq 0.100 0.025 0.299) 109 | ``` 110 | 111 | This will decode the span 100ms-300ms split into 8 files, each covering 25ms. 112 | 113 | ### Drilling down 114 | 115 | If you find something interesting in the trace, you can view a trace of individual instructions like so: 116 | ```bash 117 | perf script -i perf.data --itrace=i0ns -Fip,time,insn,srcline,sym --xed --tid 23612 --time 13161.554437440,13161.554462914 118 | 119 | ``` 120 | where `--tid` and `--time` refer to the interesting part of the trace. 121 | You can find a copypaste-ready time span in "slice details" in perfetto. 122 | (This does not necessarily match the time axis in perfetto. What you seen there can be something other than time (instructions or cycles), depending on the second `--dlarg`.) 123 | This will output a list of all executed instructions in that span, with source code locations, for detailed inspection: 124 | 125 | ![Screenshot of an example output viewed](images/insn_trace.png) 126 | 127 | ### Archiving traces and decoding traces from remote machines 128 | 129 | perf.data isn't a standalone file. 130 | Raw Intel PT data contains only information that can't be deduced offline: 131 | the results of conditional branches (taken/not taken), target addresses of indirect jumps, timing information. 132 | Decoding the raw trace to something useful, (a call-ret trace, an instruction trace, etc.), 133 | requires the access to all binaries executed when the program was traced. 134 | 135 | `perf.data` doesn't embed those binaries, but it contains build-ids of required binaries. 136 | When decoding, `perf` looks for given buildids in system directories (your package manager installs debug info there) 137 | and in "buildid cache", (usually located at `~/.debug`). 138 | If you update the machine or move `perf.data` to other machines, the necessary buildids will likely not be present on the system anymore. 139 | 140 | If you want to trace to be portable across updates, reboots and machines, you should archive the binaries and store them with the trace, 141 | so that you can repopulate the build cache before decoding when necessary. 142 | 143 | Fortunately `perf` has a script that packs all the needed binaries into an archive so you can do that easily. 144 | 145 | On the remote do: 146 | 147 | ```bash 148 | # First, record the trace. 149 | TRACE_DURATION_SECS=1 150 | PID=$(pgrep --newest --exact my_prog) 151 | sudo perf record -o perf.data --kcore -p $PID -e intel_pt/cyc=1/ -- sleep $TRACE_DURATION_SECS 152 | # Note the added `--kcore`. Instead of a `perf.data` file, this will output a `perf.data/` directory containing a 153 | # `data` file and `kcore_dir/` directory with a copy of the kernel image. 154 | # `perf script` understands this directory scheme. Don't pass `-i perf.data/data` to it, just `-i perf.data`. 155 | # 156 | # (The kernel image is passed separately from the buildid cache mechanism. 157 | # Even if it's in the cache, you still need to manually tell `perf script` to use it using `--kallsyms`, 158 | # or use the `kcore_dir/` directory scheme. I don't know why it doesn't just want to behave like any other 159 | # binaries.) 160 | 161 | # And collect all relevant binaries into an archive. 162 | sudo perf archive 163 | # This will create `perf.data.tar.bz2` 164 | ``` 165 | 166 | Then on your workstation: 167 | ```bash 168 | # Download the trace and the archive. 169 | rsync -rz --progress --rsync-path="sudo rsync" remote:perf.data . 170 | rsync -r --progress remote:perf.data.tar.bz2 . 171 | 172 | # Unpack the binaries into a place searched by perf when decoding. `~/.debug` is the default. 173 | lbzip2 -dc perf.data.tar.bz2 | tar x -C ~/.debug 174 | ``` 175 | 176 | Now you can `perf script -i perf.data` as usual. 177 | 178 | Note that every user has their own buildid-cache. If you are going to `sudo perf script`, you have to unpack the archive 179 | to `/root/.debug`, not `~/.debug`. 180 | 181 | If your `perf` distribution doesn't have `perf archive`, just grab `tools/perf/perf-archive.sh` from the Linux repository. 182 | 183 | ## Troubleshooting 184 | 185 | I have encountered some programs (e.g. Firefox on Fedora) that I can't trace because the decoding fails with SIGSEGV. 186 | This is a problem with `perf`, not this dlfilter. It happens when `perf` tries to read something (symbol names or instructions, 187 | I'm not sure) from library segments with `PROT_NONE`. I'm not sure what causes this. 188 | 189 | ## Related projects 190 | 191 | ### magic-trace 192 | 193 | [magic-trace](https://github.com/janestreet/magic-trace) provides the same general functionality (export from perf to .ftf). AFAIK the main differences are: 194 | - magic-trace exposes its own CLI and does the necessary `perf` invocations under the hood 195 | - magic-trace has some features for interactive choice of the recording target (PID, symbol to collect snapshots on, etc.) using [fzf](https://github.com/junegunn/fzf) 196 | - magic-trace is written in Ocaml 197 | - magic-trace parses the text output of `perf script` instead of using its binary API (the `dlfilter` feature) 198 | - this project shows some additional info in the trace: instructions, cycles, instruction cache footprint 199 | - the call-stack simulation logic may differ (in particular the handling of gaps in the trace) 200 | 201 | I wrote my own converter instead of using magic-trace because I wanted instruction and cycle counts, a separation of recording and decoding (for example, to visualize traces collected on remote machines), and it failed with some regex errors the first time I tried it (on a C++ project with elaborate template symbols). 202 | -------------------------------------------------------------------------------- /build.rs: -------------------------------------------------------------------------------- 1 | extern crate bindgen; 2 | 3 | use std::env; 4 | use std::path::PathBuf; 5 | 6 | fn main() { 7 | // Tell cargo to invalidate the built crate whenever the wrapper changes 8 | println!("cargo:rerun-if-changed=include/wrapper.h"); 9 | 10 | // The bindgen::Builder is the main entry point 11 | // to bindgen, and lets you build up options for 12 | // the resulting bindings. 13 | let bindings = bindgen::Builder::default() 14 | // The input header we would like to generate 15 | // bindings for. 16 | .header("include/wrapper.h") 17 | // Tell cargo to invalidate the built crate whenever any of the 18 | // included header files changed. 19 | .parse_callbacks(Box::new(bindgen::CargoCallbacks::new())) 20 | // Finish the builder and generate the bindings. 21 | .generate() 22 | // Unwrap the Result and panic on failure. 23 | .expect("Unable to generate bindings"); 24 | 25 | // Write the bindings to the $OUT_DIR/bindings.rs file. 26 | let out_path = PathBuf::from(env::var("OUT_DIR").unwrap()); 27 | bindings 28 | .write_to_file(out_path.join("bindings.rs")) 29 | .expect("Couldn't write bindings!"); 30 | } 31 | -------------------------------------------------------------------------------- /images/insn_trace.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michoecho/perf2perfetto/b1e82573b1daa9e46e288334566459f42110e108/images/insn_trace.png -------------------------------------------------------------------------------- /images/screenshot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michoecho/perf2perfetto/b1e82573b1daa9e46e288334566459f42110e108/images/screenshot.png -------------------------------------------------------------------------------- /include/perf_dlfilter.h: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: GPL-2.0 */ 2 | /* 3 | * perf_dlfilter.h: API for perf --dlfilter shared object 4 | * Copyright (c) 2021, Intel Corporation. 5 | */ 6 | #ifndef _LINUX_PERF_DLFILTER_H 7 | #define _LINUX_PERF_DLFILTER_H 8 | 9 | #include 10 | #include 11 | 12 | /* Definitions for perf_dlfilter_sample flags */ 13 | enum { 14 | PERF_DLFILTER_FLAG_BRANCH = 1ULL << 0, 15 | PERF_DLFILTER_FLAG_CALL = 1ULL << 1, 16 | PERF_DLFILTER_FLAG_RETURN = 1ULL << 2, 17 | PERF_DLFILTER_FLAG_CONDITIONAL = 1ULL << 3, 18 | PERF_DLFILTER_FLAG_SYSCALLRET = 1ULL << 4, 19 | PERF_DLFILTER_FLAG_ASYNC = 1ULL << 5, 20 | PERF_DLFILTER_FLAG_INTERRUPT = 1ULL << 6, 21 | PERF_DLFILTER_FLAG_TX_ABORT = 1ULL << 7, 22 | PERF_DLFILTER_FLAG_TRACE_BEGIN = 1ULL << 8, 23 | PERF_DLFILTER_FLAG_TRACE_END = 1ULL << 9, 24 | PERF_DLFILTER_FLAG_IN_TX = 1ULL << 10, 25 | PERF_DLFILTER_FLAG_VMENTRY = 1ULL << 11, 26 | PERF_DLFILTER_FLAG_VMEXIT = 1ULL << 12, 27 | }; 28 | 29 | /* 30 | * perf sample event information (as per perf script and ) 31 | */ 32 | struct perf_dlfilter_sample { 33 | __u32 size; /* Size of this structure (for compatibility checking) */ 34 | __u16 ins_lat; /* Refer PERF_SAMPLE_WEIGHT_TYPE in */ 35 | __u16 p_stage_cyc; /* Refer PERF_SAMPLE_WEIGHT_TYPE in */ 36 | __u64 ip; 37 | __s32 pid; 38 | __s32 tid; 39 | __u64 time; 40 | __u64 addr; 41 | __u64 id; 42 | __u64 stream_id; 43 | __u64 period; 44 | __u64 weight; /* Refer PERF_SAMPLE_WEIGHT_TYPE in */ 45 | __u64 transaction; /* Refer PERF_SAMPLE_TRANSACTION in */ 46 | __u64 insn_cnt; /* For instructions-per-cycle (IPC) */ 47 | __u64 cyc_cnt; /* For instructions-per-cycle (IPC) */ 48 | __s32 cpu; 49 | __u32 flags; /* Refer PERF_DLFILTER_FLAG_* above */ 50 | __u64 data_src; /* Refer PERF_SAMPLE_DATA_SRC in */ 51 | __u64 phys_addr; /* Refer PERF_SAMPLE_PHYS_ADDR in */ 52 | __u64 data_page_size; /* Refer PERF_SAMPLE_DATA_PAGE_SIZE in */ 53 | __u64 code_page_size; /* Refer PERF_SAMPLE_CODE_PAGE_SIZE in */ 54 | __u64 cgroup; /* Refer PERF_SAMPLE_CGROUP in */ 55 | __u8 cpumode; /* Refer CPUMODE_MASK etc in */ 56 | __u8 addr_correlates_sym; /* True => resolve_addr() can be called */ 57 | __u16 misc; /* Refer perf_event_header in */ 58 | __u32 raw_size; /* Refer PERF_SAMPLE_RAW in */ 59 | const void *raw_data; /* Refer PERF_SAMPLE_RAW in */ 60 | __u64 brstack_nr; /* Number of brstack entries */ 61 | const struct perf_branch_entry *brstack; /* Refer */ 62 | __u64 raw_callchain_nr; /* Number of raw_callchain entries */ 63 | const __u64 *raw_callchain; /* Refer */ 64 | const char *event; 65 | }; 66 | 67 | /* 68 | * Address location (as per perf script) 69 | */ 70 | struct perf_dlfilter_al { 71 | __u32 size; /* Size of this structure (for compatibility checking) */ 72 | __u32 symoff; 73 | const char *sym; 74 | __u64 addr; /* Mapped address (from dso) */ 75 | __u64 sym_start; 76 | __u64 sym_end; 77 | const char *dso; 78 | __u8 sym_binding; /* STB_LOCAL, STB_GLOBAL or STB_WEAK, refer */ 79 | __u8 is_64_bit; /* Only valid if dso is not NULL */ 80 | __u8 is_kernel_ip; /* True if in kernel space */ 81 | __u32 buildid_size; 82 | __u8 *buildid; 83 | /* Below members are only populated by resolve_ip() */ 84 | __u8 filtered; /* True if this sample event will be filtered out */ 85 | const char *comm; 86 | }; 87 | 88 | struct perf_dlfilter_fns { 89 | /* Return information about ip */ 90 | const struct perf_dlfilter_al *(*resolve_ip)(void *ctx); 91 | /* Return information about addr (if addr_correlates_sym) */ 92 | const struct perf_dlfilter_al *(*resolve_addr)(void *ctx); 93 | /* Return arguments from --dlarg option */ 94 | char **(*args)(void *ctx, int *dlargc); 95 | /* 96 | * Return information about address (al->size must be set before 97 | * calling). Returns 0 on success, -1 otherwise. 98 | */ 99 | __s32 (*resolve_address)(void *ctx, __u64 address, struct perf_dlfilter_al *al); 100 | /* Return instruction bytes and length */ 101 | const __u8 *(*insn)(void *ctx, __u32 *length); 102 | /* Return source file name and line number */ 103 | const char *(*srcline)(void *ctx, __u32 *line_number); 104 | /* Return perf_event_attr, refer */ 105 | struct perf_event_attr *(*attr)(void *ctx); 106 | /* Read object code, return numbers of bytes read */ 107 | __s32 (*object_code)(void *ctx, __u64 ip, void *buf, __u32 len); 108 | /* Reserved */ 109 | void *(*reserved[120])(void *); 110 | }; 111 | 112 | /* 113 | * If implemented, 'start' will be called at the beginning, 114 | * before any calls to 'filter_event'. Return 0 to indicate success, 115 | * or return a negative error code. '*data' can be assigned for use 116 | * by other functions. 'ctx' is needed for calls to perf_dlfilter_fns, 117 | * but most perf_dlfilter_fns are not valid when called from 'start'. 118 | */ 119 | int start(void **data, void *ctx); 120 | 121 | /* 122 | * If implemented, 'stop' will be called at the end, 123 | * after any calls to 'filter_event'. Return 0 to indicate success, or 124 | * return a negative error code. 'data' is set by start(). 'ctx' is 125 | * needed for calls to perf_dlfilter_fns, but most perf_dlfilter_fns 126 | * are not valid when called from 'stop'. 127 | */ 128 | int stop(void *data, void *ctx); 129 | 130 | /* 131 | * If implemented, 'filter_event' will be called for each sample 132 | * event. Return 0 to keep the sample event, 1 to filter it out, or 133 | * return a negative error code. 'data' is set by start(). 'ctx' is 134 | * needed for calls to perf_dlfilter_fns. 135 | */ 136 | int filter_event(void *data, const struct perf_dlfilter_sample *sample, void *ctx); 137 | 138 | /* 139 | * The same as 'filter_event' except it is called before internal 140 | * filtering. 141 | */ 142 | int filter_event_early(void *data, const struct perf_dlfilter_sample *sample, void *ctx); 143 | 144 | /* 145 | * If implemented, return a one-line description of the filter, and optionally 146 | * a longer description. 147 | */ 148 | const char *filter_description(const char **long_description); 149 | 150 | #endif 151 | -------------------------------------------------------------------------------- /include/wrapper.h: -------------------------------------------------------------------------------- 1 | #include "perf_dlfilter.h" 2 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | mod perf { 2 | #![allow(non_camel_case_types)] 3 | #![allow(non_upper_case_globals)] 4 | #![allow(non_snake_case)] 5 | #![allow(dead_code)] 6 | include!(concat!(env!("OUT_DIR"), "/bindings.rs")); 7 | } 8 | 9 | use ::std::os::raw::{c_int, c_void}; 10 | use std::os::unix::prelude::OsStrExt; 11 | use ftf::Caches; 12 | use numtoa::NumToA; 13 | use std::collections::HashSet; 14 | use std::fs::File; 15 | use std::io::prelude::*; 16 | use std::io::BufWriter; 17 | 18 | #[no_mangle] 19 | pub static mut perf_dlfilter_fns: std::mem::MaybeUninit = 20 | std::mem::MaybeUninit::::uninit(); 21 | 22 | mod libc_helpers { 23 | #![allow(dead_code)] 24 | 25 | extern "C" { 26 | static stdout: *mut libc::FILE; 27 | } 28 | 29 | unsafe fn libc_print(s: &str) { 30 | libc::fwrite(s.as_ptr() as *const libc::c_void, 1, s.len(), stdout); 31 | libc::fflush(stdout); 32 | } 33 | } 34 | 35 | fn merge_hashsets(mut a: HashSet, mut b: HashSet) -> HashSet { 36 | if a.len() < b.len() { 37 | std::mem::swap(&mut a, &mut b) 38 | } 39 | a.extend(b.into_iter()); 40 | a 41 | } 42 | 43 | type CacheLine = u64; 44 | 45 | #[derive(Default)] 46 | struct FrameData { 47 | start_insn_cnt: u64, 48 | start_cyc_cnt: u64, 49 | start_timestamp: u64, 50 | footprint: HashSet, 51 | } 52 | 53 | struct ThreadState { 54 | insn_cnt: u64, 55 | cyc_cnt: u64, 56 | ip: u64, 57 | stack: Vec, 58 | last_seen_time: u64, 59 | pid_tid: (u64, u64), 60 | } 61 | 62 | #[derive(Clone, Copy)] 63 | enum TimestampMode { 64 | Time, 65 | Cycles, 66 | Instructions, 67 | } 68 | 69 | impl TimestampMode { 70 | fn choose(&self, time: u64, cyc: u64, insns: u64) -> u64 { 71 | match &self { 72 | TimestampMode::Time => time, 73 | TimestampMode::Cycles => cyc, 74 | TimestampMode::Instructions => insns, 75 | } 76 | } 77 | } 78 | 79 | struct State { 80 | w: BufWriter, 81 | c: ftf::Caches, 82 | has_insns_events: bool, 83 | threads: std::collections::HashMap, 84 | mode: TimestampMode, 85 | } 86 | 87 | unsafe fn resolve_addr<'a>( 88 | sample: &perf::perf_dlfilter_sample, 89 | ctx: *mut c_void, 90 | buf: &'a mut [u8], 91 | ) -> &'a str { 92 | if sample.addr_correlates_sym != 0 { 93 | let raw_symbol = perf_dlfilter_fns.assume_init().resolve_addr.unwrap()(ctx); 94 | if !(*raw_symbol).sym.is_null() { 95 | let symbol = std::ffi::CStr::from_ptr((*raw_symbol).sym); 96 | symbol.to_str().unwrap_or("Non UTF-8 symbol") 97 | } else { 98 | sample.addr.numtoa(16, buf); 99 | &std::str::from_utf8_unchecked(&buf[4..]) 100 | } 101 | } else { 102 | sample.addr.numtoa(16, buf); 103 | &std::str::from_utf8_unchecked(&buf[4..]) 104 | } 105 | } 106 | 107 | unsafe fn resolve_ip<'a>( 108 | sample: &perf::perf_dlfilter_sample, 109 | ctx: *mut c_void, 110 | buf: &'a mut [u8], 111 | ) -> &'a str { 112 | let raw_symbol = perf_dlfilter_fns.assume_init().resolve_ip.unwrap()(ctx); 113 | if !(*raw_symbol).sym.is_null() { 114 | let symbol = std::ffi::CStr::from_ptr((*raw_symbol).sym); 115 | symbol.to_str().unwrap_or("Non UTF-8 symbol") 116 | } else { 117 | sample.ip.numtoa(16, buf); 118 | &std::str::from_utf8_unchecked(&buf[4..]) 119 | } 120 | } 121 | const CACHE_LINE_SIZE: u64 = 64; 122 | 123 | unsafe fn pop_frame(w: &mut dyn Write, c: &mut Caches, mode: TimestampMode, tstate: &mut ThreadState, _ctx: *mut c_void) { 124 | let frame = tstate.stack.last().unwrap(); 125 | ftf::write_frame_end( 126 | w, 127 | c, 128 | mode.choose(tstate.last_seen_time, tstate.cyc_cnt, tstate.insn_cnt), 129 | tstate.pid_tid, 130 | tstate.insn_cnt - frame.start_insn_cnt, 131 | tstate.cyc_cnt - frame.start_cyc_cnt, 132 | frame.footprint.len() as u64 * CACHE_LINE_SIZE, 133 | frame.start_timestamp, 134 | tstate.last_seen_time, 135 | ) 136 | .unwrap(); 137 | let top_footprint = tstate.stack.pop().unwrap().footprint; 138 | let merged = merge_hashsets( 139 | std::mem::take(&mut tstate.stack.last_mut().unwrap().footprint), 140 | top_footprint, 141 | ); 142 | tstate.stack.last_mut().unwrap().footprint = merged; 143 | } 144 | 145 | unsafe fn pop_unknown_frame(w: &mut dyn Write, c: &mut Caches, mode: TimestampMode, tstate: &mut ThreadState, sample: &perf::perf_dlfilter_sample, ctx: *mut c_void) { 146 | let frame = tstate.stack.last().unwrap(); 147 | let mut buffer = ['0' as u8; 20]; 148 | let sym = resolve_ip(sample, ctx, &mut buffer[..]); 149 | let ts = mode.choose(tstate.last_seen_time, tstate.cyc_cnt, tstate.insn_cnt); 150 | ftf::write_frame_full( 151 | w, 152 | c, 153 | ts, 154 | tstate.pid_tid, 155 | tstate.insn_cnt - frame.start_insn_cnt, 156 | tstate.cyc_cnt - frame.start_cyc_cnt, 157 | frame.footprint.len() as u64 * CACHE_LINE_SIZE, 158 | sym, 159 | ts, 160 | frame.start_timestamp, 161 | sample.time, 162 | ) 163 | .unwrap(); 164 | } 165 | 166 | unsafe fn push_frame(w: &mut dyn Write, c: &mut Caches, mode: TimestampMode, tstate: &mut ThreadState, sample: &perf::perf_dlfilter_sample, ctx: *mut c_void) { 167 | let mut buffer = ['0' as u8; 20]; 168 | 169 | let sym = if tstate.stack.len() > 1 { 170 | resolve_addr(sample, ctx, &mut buffer[..]) 171 | } else { 172 | "TRACE" 173 | }; 174 | 175 | ftf::write_frame_start( 176 | w, 177 | c, 178 | mode.choose(tstate.last_seen_time, tstate.cyc_cnt, tstate.insn_cnt), 179 | (sample.pid as u64, sample.tid as u64), 180 | sym, 181 | ) 182 | .unwrap(); 183 | 184 | tstate.stack.push(FrameData { 185 | start_cyc_cnt: tstate.cyc_cnt, 186 | start_insn_cnt: tstate.insn_cnt, 187 | start_timestamp: sample.time, 188 | footprint: HashSet::new(), 189 | }); 190 | } 191 | 192 | #[no_mangle] 193 | pub unsafe extern "C" fn filter_event_early( 194 | raw_state: *mut c_void, 195 | sample: &perf::perf_dlfilter_sample, 196 | ctx: *mut c_void, 197 | ) -> c_int { 198 | // The semantics of the `stack` in ThreadState are as follows: 199 | // Frame 0 contains counters for the entire trace. 200 | // Frame 1 contains counters for the current contiguous trace segment. It is closed 201 | // and reopened on `tr end` and errors. 202 | // Frames 2.. contain the call stack as it is known. They are opened on calls (or interrupts) 203 | // and closed (merged into the parent frame) on returns. 204 | // 205 | // Since the current trace segment could have started in the middle of a real 206 | // stack frame, ancestors of the starting frame are not here. 207 | // They will be only noticed when they return and printed with counters taken from frame 1. 208 | 209 | let state: &mut State = &mut *raw_state.cast::(); 210 | let tstate: &mut ThreadState = 211 | &mut state 212 | .threads 213 | .entry(sample.tid as u64) 214 | .or_insert_with(|| ThreadState { 215 | cyc_cnt: 0, 216 | insn_cnt: 0, 217 | ip: 0, 218 | stack: vec![FrameData::default(), FrameData::default()], 219 | last_seen_time: 0, 220 | pid_tid: (sample.pid as u64, sample.tid as u64), 221 | }); 222 | 223 | tstate.last_seen_time = sample.time; 224 | 225 | if (*sample.event) == 'b' as i8 { 226 | // 'branches' event 227 | if !state.has_insns_events { 228 | // If the user has piped instruction events to the filter, 229 | // then we do an exact count of instructions. 230 | // Otherwise we use the approximate (updated on CYC packets) count 231 | // provided by perf. 232 | tstate.insn_cnt += sample.insn_cnt; 233 | } 234 | tstate.cyc_cnt += sample.cyc_cnt; 235 | // Not all decoded errors cause a 'tr end'. Some are recovered from, 236 | // and the output continues. 237 | // Unfortunately perf doesn't notify the filter about that, 238 | // so we need to cope with gaps in the input that appear sometimes. 239 | // 240 | // Here we try to guess when a gap occured. If `ip` became smaller 241 | // since the last sample or it grew by more than BAD_JUMP_HEURISITC, 242 | // we guess that an error has occured. 243 | const BAD_JUMP_HEURISTIC: u64 = 0x1000; 244 | if sample.ip.wrapping_sub(tstate.ip) > BAD_JUMP_HEURISTIC { 245 | tstate.ip = 0; 246 | } 247 | 248 | if tstate.ip != 0 && sample.ip != 0 { 249 | // No errors. The normal path. We update the cache footprint info. 250 | const CACHE_LINE_MASK: u64 = !(CACHE_LINE_SIZE - 1); 251 | let cache_line_start = tstate.ip & CACHE_LINE_MASK; 252 | let cache_line_end = (sample.ip) & CACHE_LINE_MASK; 253 | let mut cache_line = cache_line_start; 254 | // Here we update the icache footprint set of the current frame. 255 | // The current implementation is dumb and just inserts all touched lines into 256 | // a set. 257 | while cache_line <= cache_line_end { 258 | tstate 259 | .stack 260 | .last_mut() 261 | .unwrap() 262 | .footprint 263 | .insert(cache_line); 264 | cache_line += CACHE_LINE_SIZE; 265 | } 266 | } else { 267 | // `ip` equal to 0 means that a contiguous trace segment has ended (`tr end`) 268 | // or an error has occured. 269 | // We close all open stack frames. 270 | // We also close the special frame 1, (the current contiguous trace segment) and reopen it. 271 | while tstate.stack.len() > 1 { 272 | pop_frame(&mut state.w, &mut state.c, state.mode, tstate, ctx); 273 | } 274 | push_frame(&mut state.w, &mut state.c, state.mode, tstate, sample, ctx); 275 | } 276 | 277 | tstate.ip = sample.addr; 278 | 279 | if sample.flags & perf::PERF_DLFILTER_FLAG_CALL != 0 { 280 | push_frame(&mut state.w, &mut state.c, state.mode, tstate, sample, ctx); 281 | } else if sample.flags & perf::PERF_DLFILTER_FLAG_RETURN != 0 { 282 | if tstate.stack.len() > 2 { 283 | // This return matches a previously seen call. 284 | pop_frame(&mut state.w, &mut state.c, state.mode, tstate, ctx); 285 | } else { 286 | // This return does not match a previous call, so the current trace fragment 287 | // started inside the frame. 288 | // 289 | // The current implementation handles that by writing a single point in time (the end of the frame) 290 | // to output. 291 | // It would be better to show the full known time span it in the trace, but we only learn about it at the end, 292 | // and ftf requires spans to be nested properly. In other words, to represent those front-truncated frames properly 293 | // we would have to delay all output until the current trace fragment ends. 294 | pop_unknown_frame(&mut state.w, &mut state.c, state.mode, tstate, sample, ctx); 295 | } 296 | } 297 | } else { 298 | // 'instructions' event 299 | state.has_insns_events = true; 300 | tstate.insn_cnt += 1; 301 | } 302 | return 1; 303 | } 304 | 305 | #[no_mangle] 306 | pub unsafe extern "C" fn start(data: &mut *mut c_void, ctx: *mut c_void) -> c_int { 307 | let mut argc: c_int = 0; 308 | let argv = (perf_dlfilter_fns.assume_init().args.unwrap())(ctx, &mut argc as *mut c_int); 309 | let args = std::slice::from_raw_parts(argv, argc as usize); 310 | 311 | let filename = { 312 | if args.len() >= 1 { 313 | std::ffi::OsStr::from_bytes(std::ffi::CStr::from_ptr(args[0]).to_bytes()) 314 | } else { 315 | std::ffi::OsStr::new("out.ftf") 316 | } 317 | }; 318 | let mode = { 319 | let default_mode = TimestampMode::Instructions; 320 | if args.len() >= 2 { 321 | match *args[1] as u8 as char { 322 | 'c' => TimestampMode::Cycles, 323 | 't' => TimestampMode::Time, 324 | 'i' => TimestampMode::Instructions, 325 | _ => default_mode, 326 | } 327 | } else { 328 | default_mode 329 | } 330 | }; 331 | 332 | let file = File::create(filename).unwrap(); 333 | let mut w = BufWriter::new(file); 334 | ftf::write_header(&mut w).unwrap(); 335 | let state = State { 336 | threads: std::collections::HashMap::new(), 337 | w, 338 | c: ftf::Caches::default(), 339 | has_insns_events: false, 340 | mode, 341 | }; 342 | *data = Box::into_raw(Box::new(state)).cast::(); 343 | 0 344 | } 345 | 346 | #[no_mangle] 347 | pub unsafe extern "C" fn stop(raw_state: *mut c_void, ctx: *mut c_void) -> c_int { 348 | let state: &mut State = &mut *raw_state.cast::(); 349 | for (_tid, tstate) in &mut state.threads { 350 | while tstate.stack.len() > 1 { 351 | pop_frame(&mut state.w, &mut state.c, state.mode, tstate, ctx); 352 | } 353 | } 354 | state.w.flush().unwrap(); 355 | drop(Box::from_raw(raw_state)); 356 | 0 357 | } 358 | 359 | mod ftf { 360 | use lru::LruCache; 361 | use std::cmp::min; 362 | 363 | pub enum CacheRef { 364 | Idx(u64), 365 | } 366 | 367 | pub struct StringCache { 368 | lru: LruCache, 369 | } 370 | 371 | impl Default for StringCache { 372 | fn default() -> Self { 373 | const STRING_TABLE_SIZE: usize = 32 * 1024 - StringCache::RESERVED as usize; 374 | let lru = LruCache::new(STRING_TABLE_SIZE); 375 | Self { lru } 376 | } 377 | } 378 | 379 | fn write_string_record(w: &mut dyn Write, idx: u64, s: &str) -> std::io::Result<()> { 380 | const MAX_STRING_LEN: usize = 32000; 381 | let s_len = min(s.len(), MAX_STRING_LEN); 382 | let rsize = 1 + words_for_bytes(s_len); 383 | let rtype = 2; 384 | write_u64( 385 | w, 386 | rtype | rsize << 4 | (idx as u64) << 16 | (s_len as u64) << 32, 387 | )?; 388 | write_string(w, &s.as_bytes()[..s_len])?; 389 | Ok(()) 390 | } 391 | 392 | impl StringCache { 393 | const RESERVED: u64 = InternalString::COUNT as u64; 394 | pub fn get_ref(&mut self, w: &mut dyn Write, s: &str) -> std::io::Result { 395 | if s.is_empty() { 396 | return Ok(CacheRef::Idx(0)); 397 | } 398 | if let Some(idx) = self.lru.get(s) { 399 | let out = *idx as u64 + Self::RESERVED; 400 | return Ok(CacheRef::Idx(out)); 401 | } else { 402 | let idx = if self.lru.len() < self.lru.cap() { 403 | self.lru.len() as u16 404 | } else { 405 | self.lru.pop_lru().unwrap().1 406 | }; 407 | self.lru.put(s.to_string(), idx); 408 | let out = idx as u64 + Self::RESERVED; 409 | write_string_record(w, out, s)?; 410 | return Ok(CacheRef::Idx(out)); 411 | } 412 | } 413 | } 414 | 415 | pub struct ThreadCache { 416 | lru: LruCache<(u64, u64), u8>, 417 | } 418 | 419 | impl Default for ThreadCache { 420 | fn default() -> Self { 421 | const THREAD_TABLE_SIZE: usize = 256 - ThreadCache::RESERVED as usize; 422 | let lru = LruCache::new(THREAD_TABLE_SIZE); 423 | Self { lru } 424 | } 425 | } 426 | 427 | fn write_thread_record( 428 | w: &mut dyn Write, 429 | idx: u64, 430 | pid_tid: (u64, u64), 431 | ) -> std::io::Result<()> { 432 | let rsize = 3; 433 | let rtype = 3; 434 | write_u64(w, rtype | rsize << 4 | (idx as u64) << 16)?; 435 | write_u64(w, pid_tid.0)?; 436 | write_u64(w, pid_tid.1)?; 437 | Ok(()) 438 | } 439 | 440 | impl ThreadCache { 441 | const RESERVED: u64 = 1; 442 | pub fn get_ref( 443 | &mut self, 444 | w: &mut dyn Write, 445 | pid_tid: (u64, u64), 446 | ) -> std::io::Result { 447 | if let Some(idx) = self.lru.get(&pid_tid) { 448 | return Ok(CacheRef::Idx(*idx as u64 + Self::RESERVED)); 449 | } else { 450 | let idx = if self.lru.len() < self.lru.cap() { 451 | self.lru.len() as u8 452 | } else { 453 | self.lru.pop_lru().unwrap().1 454 | }; 455 | self.lru.put(pid_tid, idx); 456 | let out = idx as u64 + Self::RESERVED; 457 | write_thread_record(w, out, pid_tid)?; 458 | return Ok(CacheRef::Idx(out)); 459 | } 460 | } 461 | } 462 | 463 | #[derive(Default)] 464 | pub struct Caches { 465 | string_cache: StringCache, 466 | thread_cache: ThreadCache, 467 | } 468 | 469 | use std::io::Write; 470 | 471 | struct EventHeader<'call> { 472 | name: &'call str, 473 | category: &'call str, 474 | pid_tid: (u64, u64), 475 | timestamp: u64, 476 | nargs: u8, 477 | etype: u8, 478 | extra_data_size: usize, 479 | } 480 | 481 | fn write_u64(w: &mut dyn Write, x: u64) -> std::io::Result<()> { 482 | w.write_all(&x.to_ne_bytes())?; 483 | Ok(()) 484 | } 485 | fn write_string(w: &mut dyn Write, s: &[u8]) -> std::io::Result<()> { 486 | w.write_all(s)?; 487 | if s.len() % 8 != 0 { 488 | w.write_all(&[0; 8][..8 - s.len() % 8])?; // Pad to 8 bytes. 489 | } 490 | Ok(()) 491 | } 492 | 493 | use strum::{EnumCount, IntoEnumIterator}; 494 | use strum_macros::{EnumCount, EnumIter}; 495 | 496 | #[derive(EnumIter, EnumCount, Clone, Copy)] 497 | enum InternalString { 498 | Empty, 499 | Instructions, 500 | Cycles, 501 | Footprint, 502 | Symbol, 503 | Timespan, 504 | } 505 | 506 | fn internal_string(x: InternalString) -> &'static str { 507 | match x { 508 | InternalString::Empty => "", 509 | InternalString::Instructions => "Instructions", 510 | InternalString::Cycles => "Cycles", 511 | InternalString::Footprint => "Footprint", 512 | InternalString::Symbol => "Symbol", 513 | InternalString::Timespan => "Timespan", 514 | } 515 | } 516 | 517 | fn write_event_header( 518 | c: &mut Caches, 519 | w: &mut dyn Write, 520 | e: EventHeader, 521 | ) -> std::io::Result<()> { 522 | let rtype = 4 as u64; 523 | let rsize = 2 + e.extra_data_size as u64; 524 | let CacheRef::Idx(name_ref) = c.string_cache.get_ref(w, e.name)?; 525 | let CacheRef::Idx(category_ref) = c.string_cache.get_ref(w, e.category)?; 526 | let CacheRef::Idx(thread_ref) = c.thread_cache.get_ref(w, e.pid_tid)?; 527 | write_u64( 528 | w, 529 | rtype 530 | | rsize << 4 531 | | (e.etype as u64) << 16 532 | | (e.nargs as u64) << 20 533 | | thread_ref << 24 534 | | category_ref << 32 535 | | name_ref << 48, 536 | )?; 537 | write_u64(w, e.timestamp)?; 538 | Ok(()) 539 | } 540 | 541 | fn div_up( 542 | a: T, 543 | b: T, 544 | ) -> T { 545 | (a / b) 546 | + if a % b != T::zero() { 547 | T::one() 548 | } else { 549 | T::zero() 550 | } 551 | } 552 | 553 | fn words_for_bytes(x: usize) -> u64 { 554 | div_up(x, 8) as u64 555 | } 556 | 557 | // Prints a timestamp in this format: 1234.567890000 558 | fn print_timestamp(buf: &mut [u8], mut nanos: u64) -> &[u8] { 559 | let mut i = 0; 560 | while i < 9 { 561 | buf[buf.len() - i - 1] = '0' as u8 + (nanos % 10) as u8; 562 | nanos /= 10; 563 | i += 1; 564 | } 565 | buf[buf.len() - i - 1] = '.' as u8; 566 | i += 1; 567 | while i < 11 || nanos > 0 { 568 | buf[buf.len() - 1 - i] = '0' as u8 + (nanos % 10) as u8; 569 | nanos /= 10; 570 | i += 1; 571 | } 572 | &buf[buf.len() - i..] 573 | } 574 | 575 | // Prints a timespan in this format: 1234.567890000,2345.678912340 576 | pub fn print_timespan(buf: &mut [u8], timespan: (u64, u64)) -> &str { 577 | let len_1 = print_timestamp(buf, timespan.1).len(); 578 | buf[buf.len() - len_1 - 1] = ',' as u8; 579 | let full_len = buf.len(); 580 | let len_0 = print_timestamp(&mut buf[..full_len - len_1 - 1], timespan.0).len(); 581 | unsafe { &std::str::from_utf8_unchecked(&buf[buf.len() - len_1 - 1 - len_0..]) } 582 | } 583 | 584 | pub fn write_info_args( 585 | w: &mut dyn Write, 586 | insns: u64, 587 | cycles: u64, 588 | footprint: u64, 589 | timespan: &str, 590 | ) -> std::io::Result<()> { 591 | write_u64(w, 4 | 2 << 4 | (InternalString::Instructions as u64) << 16)?; 592 | write_u64(w, insns)?; 593 | 594 | write_u64(w, 4 | 2 << 4 | (InternalString::Cycles as u64) << 16)?; 595 | write_u64(w, cycles)?; 596 | 597 | write_u64(w, 4 | 2 << 4 | (InternalString::Footprint as u64) << 16)?; 598 | write_u64(w, footprint)?; 599 | 600 | let ts_size = 1 + words_for_bytes(timespan.len()); 601 | write_u64( 602 | w, 603 | 6 | ts_size << 4 604 | | (InternalString::Timespan as u64) << 16 605 | | (timespan.len() as u64) << 32 606 | | 1 << 47, 607 | )?; 608 | write_string(w, timespan.as_bytes())?; 609 | 610 | Ok(()) 611 | } 612 | 613 | // Number of arguments, total size (not counting strings) 614 | pub fn info_nargs_size(timespan: &str) -> (u8, usize) { 615 | (4, 7 + words_for_bytes(timespan.len()) as usize) 616 | } 617 | 618 | pub fn write_header(w: &mut dyn Write) -> std::io::Result<()> { 619 | // Magic number. 620 | write_u64(w, 0x0016547846040010_u64)?; 621 | 622 | // Provider info metadata. 623 | let rtype = 0; 624 | let mtype = 1; 625 | let name = "scylla"; 626 | let name_len = name.len() as u64; 627 | let rsize = 1 + words_for_bytes(name.len()); 628 | let provider_id = 0; // The only provider. 629 | write_u64( 630 | w, 631 | rtype | rsize << 4 | mtype << 16 | provider_id << 20 | name_len << 52, 632 | )?; 633 | write_string(w, name.as_bytes())?; 634 | 635 | // Provider section metadata. 636 | let rtype = 0; 637 | let rsize = 1; 638 | let mtype = 2; 639 | write_u64(w, rtype | rsize << 4 | mtype << 16 | provider_id << 20)?; 640 | 641 | // Internal strings 642 | for i in InternalString::iter().skip(1) { 643 | write_string_record(w, i as u64, internal_string(i))?; 644 | } 645 | Ok(()) 646 | } 647 | 648 | pub fn write_frame_start( 649 | w: &mut dyn Write, 650 | c: &mut Caches, 651 | timestamp: u64, 652 | pid_tid: (u64, u64), 653 | symbol: &str, 654 | ) -> std::io::Result<()> { 655 | write_event_header( 656 | c, 657 | w, 658 | EventHeader { 659 | etype: 2, // Duration start 660 | nargs: 0, 661 | timestamp, 662 | pid_tid, 663 | category: "Misc", 664 | name: symbol, 665 | extra_data_size: 0, 666 | }, 667 | ) 668 | } 669 | 670 | pub fn write_frame_end( 671 | w: &mut dyn Write, 672 | c: &mut Caches, 673 | timestamp: u64, 674 | pid_tid: (u64, u64), 675 | insns: u64, 676 | cycles: u64, 677 | footprint: u64, 678 | ts_start: u64, 679 | ts_end: u64, 680 | ) -> std::io::Result<()> { 681 | let mut buf = [0u8; 48]; 682 | let ts = print_timespan(&mut buf, (ts_start, ts_end)); 683 | write_event_header( 684 | c, 685 | w, 686 | EventHeader { 687 | etype: 3, // Duration start 688 | nargs: info_nargs_size(ts).0, 689 | timestamp, 690 | pid_tid, 691 | category: "Misc", 692 | name: "", 693 | extra_data_size: info_nargs_size(ts).1, 694 | }, 695 | )?; 696 | write_info_args(w, insns, cycles, footprint, ts) 697 | } 698 | 699 | pub fn write_frame_full( 700 | w: &mut dyn Write, 701 | c: &mut Caches, 702 | timestamp: u64, 703 | pid_tid: (u64, u64), 704 | insns: u64, 705 | cycles: u64, 706 | footprint: u64, 707 | symbol: &str, 708 | end_timestamp: u64, 709 | ts_start: u64, 710 | ts_end: u64, 711 | ) -> std::io::Result<()> { 712 | let mut buf = [0u8; 48]; 713 | let ts = print_timespan(&mut buf, (ts_start, ts_end)); 714 | write_event_header( 715 | c, 716 | w, 717 | EventHeader { 718 | etype: 4, 719 | nargs: info_nargs_size(ts).0, 720 | timestamp, 721 | pid_tid, 722 | category: "Misc", 723 | name: symbol, 724 | extra_data_size: 1 + info_nargs_size(ts).1, 725 | }, 726 | )?; 727 | write_info_args(w, insns, cycles, footprint, ts)?; 728 | write_u64(w, end_timestamp) 729 | } 730 | } 731 | --------------------------------------------------------------------------------