├── .gitignore ├── Cargo.lock ├── Cargo.toml ├── LICENSE ├── README.md ├── build.rs ├── format.sh ├── readme-assets ├── cudatrace-19-01.png ├── memleak-19-01.png └── memleaks-grafana.png └── src ├── bpf ├── .clang-format ├── .gitignore ├── Makefile └── gpuprobe.bpf.c ├── gpuprobe ├── cuda_error.rs ├── gpuprobe_bandwidth_util.rs ├── gpuprobe_cudatrace.rs ├── gpuprobe_memleak.rs ├── metrics.rs ├── mod.rs ├── process_state.rs └── uprobe_data.rs └── main.rs /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | *.cache 3 | *.skel.rs 4 | *.o 5 | compile_commands.json 6 | -------------------------------------------------------------------------------- /Cargo.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Cargo. 2 | # It is not intended for manual editing. 3 | version = 3 4 | 5 | [[package]] 6 | name = "addr2line" 7 | version = "0.24.2" 8 | source = "registry+https://github.com/rust-lang/crates.io-index" 9 | checksum = "dfbe277e56a376000877090da837660b4427aad530e3028d44e0bffe4f89a1c1" 10 | dependencies = [ 11 | "gimli", 12 | ] 13 | 14 | [[package]] 15 | name = "adler2" 16 | version = "2.0.0" 17 | source = "registry+https://github.com/rust-lang/crates.io-index" 18 | checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627" 19 | 20 | [[package]] 21 | name = "aho-corasick" 22 | version = "1.1.3" 23 | source = "registry+https://github.com/rust-lang/crates.io-index" 24 | checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" 25 | dependencies = [ 26 | "memchr", 27 | ] 28 | 29 | [[package]] 30 | name = "android-tzdata" 31 | version = "0.1.1" 32 | source = "registry+https://github.com/rust-lang/crates.io-index" 33 | checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0" 34 | 35 | [[package]] 36 | name = "android_system_properties" 37 | version = "0.1.5" 38 | source = "registry+https://github.com/rust-lang/crates.io-index" 39 | checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" 40 | dependencies = [ 41 | "libc", 42 | ] 43 | 44 | [[package]] 45 | name = "anstream" 46 | version = "0.6.15" 47 | source = "registry+https://github.com/rust-lang/crates.io-index" 48 | checksum = "64e15c1ab1f89faffbf04a634d5e1962e9074f2741eef6d97f3c4e322426d526" 49 | dependencies = [ 50 | "anstyle", 51 | "anstyle-parse", 52 | "anstyle-query", 53 | "anstyle-wincon", 54 | "colorchoice", 55 | "is_terminal_polyfill", 56 | "utf8parse", 57 | ] 58 | 59 | [[package]] 60 | name = "anstyle" 61 | version = "1.0.8" 62 | source = "registry+https://github.com/rust-lang/crates.io-index" 63 | checksum = "1bec1de6f59aedf83baf9ff929c98f2ad654b97c9510f4e70cf6f661d49fd5b1" 64 | 65 | [[package]] 66 | name = "anstyle-parse" 67 | version = "0.2.5" 68 | source = "registry+https://github.com/rust-lang/crates.io-index" 69 | checksum = "eb47de1e80c2b463c735db5b217a0ddc39d612e7ac9e2e96a5aed1f57616c1cb" 70 | dependencies = [ 71 | "utf8parse", 72 | ] 73 | 74 | [[package]] 75 | name = "anstyle-query" 76 | version = "1.1.1" 77 | source = "registry+https://github.com/rust-lang/crates.io-index" 78 | checksum = "6d36fc52c7f6c869915e99412912f22093507da8d9e942ceaf66fe4b7c14422a" 79 | dependencies = [ 80 | "windows-sys 0.52.0", 81 | ] 82 | 83 | [[package]] 84 | name = "anstyle-wincon" 85 | version = "3.0.4" 86 | source = "registry+https://github.com/rust-lang/crates.io-index" 87 | checksum = "5bf74e1b6e971609db8ca7a9ce79fd5768ab6ae46441c572e46cf596f59e57f8" 88 | dependencies = [ 89 | "anstyle", 90 | "windows-sys 0.52.0", 91 | ] 92 | 93 | [[package]] 94 | name = "anyhow" 95 | version = "1.0.90" 96 | source = "registry+https://github.com/rust-lang/crates.io-index" 97 | checksum = "37bf3594c4c988a53154954629820791dde498571819ae4ca50ca811e060cc95" 98 | 99 | [[package]] 100 | name = "async-trait" 101 | version = "0.1.83" 102 | source = "registry+https://github.com/rust-lang/crates.io-index" 103 | checksum = "721cae7de5c34fbb2acd27e21e6d2cf7b886dce0c27388d46c4e6c47ea4318dd" 104 | dependencies = [ 105 | "proc-macro2", 106 | "quote", 107 | "syn", 108 | ] 109 | 110 | [[package]] 111 | name = "autocfg" 112 | version = "1.4.0" 113 | source = "registry+https://github.com/rust-lang/crates.io-index" 114 | checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" 115 | 116 | [[package]] 117 | name = "axum" 118 | version = "0.7.7" 119 | source = "registry+https://github.com/rust-lang/crates.io-index" 120 | checksum = "504e3947307ac8326a5437504c517c4b56716c9d98fac0028c2acc7ca47d70ae" 121 | dependencies = [ 122 | "async-trait", 123 | "axum-core", 124 | "bytes", 125 | "futures-util", 126 | "http", 127 | "http-body", 128 | "http-body-util", 129 | "hyper", 130 | "hyper-util", 131 | "itoa", 132 | "matchit", 133 | "memchr", 134 | "mime", 135 | "percent-encoding", 136 | "pin-project-lite", 137 | "rustversion", 138 | "serde", 139 | "serde_json", 140 | "serde_path_to_error", 141 | "serde_urlencoded", 142 | "sync_wrapper 1.0.1", 143 | "tokio", 144 | "tower", 145 | "tower-layer", 146 | "tower-service", 147 | "tracing", 148 | ] 149 | 150 | [[package]] 151 | name = "axum-core" 152 | version = "0.4.5" 153 | source = "registry+https://github.com/rust-lang/crates.io-index" 154 | checksum = "09f2bd6146b97ae3359fa0cc6d6b376d9539582c7b4220f041a33ec24c226199" 155 | dependencies = [ 156 | "async-trait", 157 | "bytes", 158 | "futures-util", 159 | "http", 160 | "http-body", 161 | "http-body-util", 162 | "mime", 163 | "pin-project-lite", 164 | "rustversion", 165 | "sync_wrapper 1.0.1", 166 | "tower-layer", 167 | "tower-service", 168 | "tracing", 169 | ] 170 | 171 | [[package]] 172 | name = "backtrace" 173 | version = "0.3.74" 174 | source = "registry+https://github.com/rust-lang/crates.io-index" 175 | checksum = "8d82cb332cdfaed17ae235a638438ac4d4839913cc2af585c3c6746e8f8bee1a" 176 | dependencies = [ 177 | "addr2line", 178 | "cfg-if", 179 | "libc", 180 | "miniz_oxide", 181 | "object", 182 | "rustc-demangle", 183 | "windows-targets", 184 | ] 185 | 186 | [[package]] 187 | name = "bindgen" 188 | version = "0.70.1" 189 | source = "registry+https://github.com/rust-lang/crates.io-index" 190 | checksum = "f49d8fed880d473ea71efb9bf597651e77201bdd4893efe54c9e5d65ae04ce6f" 191 | dependencies = [ 192 | "bitflags", 193 | "cexpr", 194 | "clang-sys", 195 | "itertools", 196 | "log", 197 | "prettyplease", 198 | "proc-macro2", 199 | "quote", 200 | "regex", 201 | "rustc-hash", 202 | "shlex", 203 | "syn", 204 | ] 205 | 206 | [[package]] 207 | name = "bitflags" 208 | version = "2.6.0" 209 | source = "registry+https://github.com/rust-lang/crates.io-index" 210 | checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de" 211 | 212 | [[package]] 213 | name = "bumpalo" 214 | version = "3.16.0" 215 | source = "registry+https://github.com/rust-lang/crates.io-index" 216 | checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" 217 | 218 | [[package]] 219 | name = "bytes" 220 | version = "1.8.0" 221 | source = "registry+https://github.com/rust-lang/crates.io-index" 222 | checksum = "9ac0150caa2ae65ca5bd83f25c7de183dea78d4d366469f148435e2acfbad0da" 223 | 224 | [[package]] 225 | name = "camino" 226 | version = "1.1.9" 227 | source = "registry+https://github.com/rust-lang/crates.io-index" 228 | checksum = "8b96ec4966b5813e2c0507c1f86115c8c5abaadc3980879c3424042a02fd1ad3" 229 | dependencies = [ 230 | "serde", 231 | ] 232 | 233 | [[package]] 234 | name = "cargo-platform" 235 | version = "0.1.8" 236 | source = "registry+https://github.com/rust-lang/crates.io-index" 237 | checksum = "24b1f0365a6c6bb4020cd05806fd0d33c44d38046b8bd7f0e40814b9763cabfc" 238 | dependencies = [ 239 | "serde", 240 | ] 241 | 242 | [[package]] 243 | name = "cargo_metadata" 244 | version = "0.15.4" 245 | source = "registry+https://github.com/rust-lang/crates.io-index" 246 | checksum = "eee4243f1f26fc7a42710e7439c149e2b10b05472f88090acce52632f231a73a" 247 | dependencies = [ 248 | "camino", 249 | "cargo-platform", 250 | "semver", 251 | "serde", 252 | "serde_json", 253 | "thiserror", 254 | ] 255 | 256 | [[package]] 257 | name = "cc" 258 | version = "1.1.31" 259 | source = "registry+https://github.com/rust-lang/crates.io-index" 260 | checksum = "c2e7962b54006dcfcc61cb72735f4d89bb97061dd6a7ed882ec6b8ee53714c6f" 261 | dependencies = [ 262 | "shlex", 263 | ] 264 | 265 | [[package]] 266 | name = "cexpr" 267 | version = "0.6.0" 268 | source = "registry+https://github.com/rust-lang/crates.io-index" 269 | checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766" 270 | dependencies = [ 271 | "nom", 272 | ] 273 | 274 | [[package]] 275 | name = "cfg-if" 276 | version = "1.0.0" 277 | source = "registry+https://github.com/rust-lang/crates.io-index" 278 | checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" 279 | 280 | [[package]] 281 | name = "cfg_aliases" 282 | version = "0.2.1" 283 | source = "registry+https://github.com/rust-lang/crates.io-index" 284 | checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" 285 | 286 | [[package]] 287 | name = "chrono" 288 | version = "0.4.38" 289 | source = "registry+https://github.com/rust-lang/crates.io-index" 290 | checksum = "a21f936df1771bf62b77f047b726c4625ff2e8aa607c01ec06e5a05bd8463401" 291 | dependencies = [ 292 | "android-tzdata", 293 | "iana-time-zone", 294 | "js-sys", 295 | "num-traits", 296 | "wasm-bindgen", 297 | "windows-targets", 298 | ] 299 | 300 | [[package]] 301 | name = "clang-sys" 302 | version = "1.8.1" 303 | source = "registry+https://github.com/rust-lang/crates.io-index" 304 | checksum = "0b023947811758c97c59bf9d1c188fd619ad4718dcaa767947df1cadb14f39f4" 305 | dependencies = [ 306 | "glob", 307 | "libc", 308 | "libloading", 309 | ] 310 | 311 | [[package]] 312 | name = "clap" 313 | version = "4.5.20" 314 | source = "registry+https://github.com/rust-lang/crates.io-index" 315 | checksum = "b97f376d85a664d5837dbae44bf546e6477a679ff6610010f17276f686d867e8" 316 | dependencies = [ 317 | "clap_builder", 318 | "clap_derive", 319 | ] 320 | 321 | [[package]] 322 | name = "clap_builder" 323 | version = "4.5.20" 324 | source = "registry+https://github.com/rust-lang/crates.io-index" 325 | checksum = "19bc80abd44e4bed93ca373a0704ccbd1b710dc5749406201bb018272808dc54" 326 | dependencies = [ 327 | "anstream", 328 | "anstyle", 329 | "clap_lex", 330 | "strsim", 331 | ] 332 | 333 | [[package]] 334 | name = "clap_derive" 335 | version = "4.5.18" 336 | source = "registry+https://github.com/rust-lang/crates.io-index" 337 | checksum = "4ac6a0c7b1a9e9a5186361f67dfa1b88213572f427fb9ab038efb2bd8c582dab" 338 | dependencies = [ 339 | "heck", 340 | "proc-macro2", 341 | "quote", 342 | "syn", 343 | ] 344 | 345 | [[package]] 346 | name = "clap_lex" 347 | version = "0.7.2" 348 | source = "registry+https://github.com/rust-lang/crates.io-index" 349 | checksum = "1462739cb27611015575c0c11df5df7601141071f07518d56fcc1be504cbec97" 350 | 351 | [[package]] 352 | name = "colorchoice" 353 | version = "1.0.2" 354 | source = "registry+https://github.com/rust-lang/crates.io-index" 355 | checksum = "d3fd119d74b830634cea2a0f58bbd0d54540518a14397557951e79340abc28c0" 356 | 357 | [[package]] 358 | name = "core-foundation-sys" 359 | version = "0.8.7" 360 | source = "registry+https://github.com/rust-lang/crates.io-index" 361 | checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" 362 | 363 | [[package]] 364 | name = "dtoa" 365 | version = "1.0.9" 366 | source = "registry+https://github.com/rust-lang/crates.io-index" 367 | checksum = "dcbb2bf8e87535c23f7a8a321e364ce21462d0ff10cb6407820e8e96dfff6653" 368 | 369 | [[package]] 370 | name = "either" 371 | version = "1.13.0" 372 | source = "registry+https://github.com/rust-lang/crates.io-index" 373 | checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0" 374 | 375 | [[package]] 376 | name = "errno" 377 | version = "0.3.9" 378 | source = "registry+https://github.com/rust-lang/crates.io-index" 379 | checksum = "534c5cf6194dfab3db3242765c03bbe257cf92f22b38f6bc0c58d59108a820ba" 380 | dependencies = [ 381 | "libc", 382 | "windows-sys 0.52.0", 383 | ] 384 | 385 | [[package]] 386 | name = "fastrand" 387 | version = "2.1.1" 388 | source = "registry+https://github.com/rust-lang/crates.io-index" 389 | checksum = "e8c02a5121d4ea3eb16a80748c74f5549a5665e4c21333c6098f283870fbdea6" 390 | 391 | [[package]] 392 | name = "fnv" 393 | version = "1.0.7" 394 | source = "registry+https://github.com/rust-lang/crates.io-index" 395 | checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" 396 | 397 | [[package]] 398 | name = "form_urlencoded" 399 | version = "1.2.1" 400 | source = "registry+https://github.com/rust-lang/crates.io-index" 401 | checksum = "e13624c2627564efccf4934284bdd98cbaa14e79b0b5a141218e507b3a823456" 402 | dependencies = [ 403 | "percent-encoding", 404 | ] 405 | 406 | [[package]] 407 | name = "futures-channel" 408 | version = "0.3.31" 409 | source = "registry+https://github.com/rust-lang/crates.io-index" 410 | checksum = "2dff15bf788c671c1934e366d07e30c1814a8ef514e1af724a602e8a2fbe1b10" 411 | dependencies = [ 412 | "futures-core", 413 | ] 414 | 415 | [[package]] 416 | name = "futures-core" 417 | version = "0.3.31" 418 | source = "registry+https://github.com/rust-lang/crates.io-index" 419 | checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e" 420 | 421 | [[package]] 422 | name = "futures-task" 423 | version = "0.3.31" 424 | source = "registry+https://github.com/rust-lang/crates.io-index" 425 | checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988" 426 | 427 | [[package]] 428 | name = "futures-util" 429 | version = "0.3.31" 430 | source = "registry+https://github.com/rust-lang/crates.io-index" 431 | checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81" 432 | dependencies = [ 433 | "futures-core", 434 | "futures-task", 435 | "pin-project-lite", 436 | "pin-utils", 437 | ] 438 | 439 | [[package]] 440 | name = "gimli" 441 | version = "0.31.1" 442 | source = "registry+https://github.com/rust-lang/crates.io-index" 443 | checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f" 444 | 445 | [[package]] 446 | name = "glob" 447 | version = "0.3.1" 448 | source = "registry+https://github.com/rust-lang/crates.io-index" 449 | checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" 450 | 451 | [[package]] 452 | name = "goblin" 453 | version = "0.9.2" 454 | source = "registry+https://github.com/rust-lang/crates.io-index" 455 | checksum = "53ab3f32d1d77146981dea5d6b1e8fe31eedcb7013e5e00d6ccd1259a4b4d923" 456 | dependencies = [ 457 | "log", 458 | "plain", 459 | "scroll", 460 | ] 461 | 462 | [[package]] 463 | name = "gpu_probe" 464 | version = "0.1.0" 465 | dependencies = [ 466 | "axum", 467 | "chrono", 468 | "clap", 469 | "goblin", 470 | "libbpf-cargo", 471 | "libbpf-rs", 472 | "libc", 473 | "nix", 474 | "proc-maps", 475 | "prometheus-client", 476 | "tokio", 477 | ] 478 | 479 | [[package]] 480 | name = "heck" 481 | version = "0.5.0" 482 | source = "registry+https://github.com/rust-lang/crates.io-index" 483 | checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" 484 | 485 | [[package]] 486 | name = "hermit-abi" 487 | version = "0.3.9" 488 | source = "registry+https://github.com/rust-lang/crates.io-index" 489 | checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024" 490 | 491 | [[package]] 492 | name = "http" 493 | version = "1.1.0" 494 | source = "registry+https://github.com/rust-lang/crates.io-index" 495 | checksum = "21b9ddb458710bc376481b842f5da65cdf31522de232c1ca8146abce2a358258" 496 | dependencies = [ 497 | "bytes", 498 | "fnv", 499 | "itoa", 500 | ] 501 | 502 | [[package]] 503 | name = "http-body" 504 | version = "1.0.1" 505 | source = "registry+https://github.com/rust-lang/crates.io-index" 506 | checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184" 507 | dependencies = [ 508 | "bytes", 509 | "http", 510 | ] 511 | 512 | [[package]] 513 | name = "http-body-util" 514 | version = "0.1.2" 515 | source = "registry+https://github.com/rust-lang/crates.io-index" 516 | checksum = "793429d76616a256bcb62c2a2ec2bed781c8307e797e2598c50010f2bee2544f" 517 | dependencies = [ 518 | "bytes", 519 | "futures-util", 520 | "http", 521 | "http-body", 522 | "pin-project-lite", 523 | ] 524 | 525 | [[package]] 526 | name = "httparse" 527 | version = "1.9.5" 528 | source = "registry+https://github.com/rust-lang/crates.io-index" 529 | checksum = "7d71d3574edd2771538b901e6549113b4006ece66150fb69c0fb6d9a2adae946" 530 | 531 | [[package]] 532 | name = "httpdate" 533 | version = "1.0.3" 534 | source = "registry+https://github.com/rust-lang/crates.io-index" 535 | checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" 536 | 537 | [[package]] 538 | name = "hyper" 539 | version = "1.5.0" 540 | source = "registry+https://github.com/rust-lang/crates.io-index" 541 | checksum = "bbbff0a806a4728c99295b254c8838933b5b082d75e3cb70c8dab21fdfbcfa9a" 542 | dependencies = [ 543 | "bytes", 544 | "futures-channel", 545 | "futures-util", 546 | "http", 547 | "http-body", 548 | "httparse", 549 | "httpdate", 550 | "itoa", 551 | "pin-project-lite", 552 | "smallvec", 553 | "tokio", 554 | ] 555 | 556 | [[package]] 557 | name = "hyper-util" 558 | version = "0.1.10" 559 | source = "registry+https://github.com/rust-lang/crates.io-index" 560 | checksum = "df2dcfbe0677734ab2f3ffa7fa7bfd4706bfdc1ef393f2ee30184aed67e631b4" 561 | dependencies = [ 562 | "bytes", 563 | "futures-util", 564 | "http", 565 | "http-body", 566 | "hyper", 567 | "pin-project-lite", 568 | "tokio", 569 | "tower-service", 570 | ] 571 | 572 | [[package]] 573 | name = "iana-time-zone" 574 | version = "0.1.61" 575 | source = "registry+https://github.com/rust-lang/crates.io-index" 576 | checksum = "235e081f3925a06703c2d0117ea8b91f042756fd6e7a6e5d901e8ca1a996b220" 577 | dependencies = [ 578 | "android_system_properties", 579 | "core-foundation-sys", 580 | "iana-time-zone-haiku", 581 | "js-sys", 582 | "wasm-bindgen", 583 | "windows-core", 584 | ] 585 | 586 | [[package]] 587 | name = "iana-time-zone-haiku" 588 | version = "0.1.2" 589 | source = "registry+https://github.com/rust-lang/crates.io-index" 590 | checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" 591 | dependencies = [ 592 | "cc", 593 | ] 594 | 595 | [[package]] 596 | name = "is_terminal_polyfill" 597 | version = "1.70.1" 598 | source = "registry+https://github.com/rust-lang/crates.io-index" 599 | checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" 600 | 601 | [[package]] 602 | name = "itertools" 603 | version = "0.13.0" 604 | source = "registry+https://github.com/rust-lang/crates.io-index" 605 | checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186" 606 | dependencies = [ 607 | "either", 608 | ] 609 | 610 | [[package]] 611 | name = "itoa" 612 | version = "1.0.11" 613 | source = "registry+https://github.com/rust-lang/crates.io-index" 614 | checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" 615 | 616 | [[package]] 617 | name = "js-sys" 618 | version = "0.3.72" 619 | source = "registry+https://github.com/rust-lang/crates.io-index" 620 | checksum = "6a88f1bda2bd75b0452a14784937d796722fdebfe50df998aeb3f0b7603019a9" 621 | dependencies = [ 622 | "wasm-bindgen", 623 | ] 624 | 625 | [[package]] 626 | name = "libbpf-cargo" 627 | version = "0.24.6" 628 | source = "registry+https://github.com/rust-lang/crates.io-index" 629 | checksum = "9e728b7b8be47b3650fdce768075691c69c8798022eb1bab6167cce20f82b10c" 630 | dependencies = [ 631 | "anyhow", 632 | "cargo_metadata", 633 | "clap", 634 | "libbpf-rs", 635 | "memmap2", 636 | "regex", 637 | "semver", 638 | "serde", 639 | "serde_json", 640 | "tempfile", 641 | ] 642 | 643 | [[package]] 644 | name = "libbpf-rs" 645 | version = "0.24.6" 646 | source = "registry+https://github.com/rust-lang/crates.io-index" 647 | checksum = "73d2e61404e42ba2d97a9acbc24d046cfae978393e21b428e780adbc997504d0" 648 | dependencies = [ 649 | "bitflags", 650 | "libbpf-sys", 651 | "libc", 652 | "vsprintf", 653 | ] 654 | 655 | [[package]] 656 | name = "libbpf-sys" 657 | version = "1.4.5+v1.4.5" 658 | source = "registry+https://github.com/rust-lang/crates.io-index" 659 | checksum = "5cabee52b6f7e73308d6fd4f8e6bbbdcb97670f49f6e581c5897e4d2410b6019" 660 | dependencies = [ 661 | "cc", 662 | "nix", 663 | "pkg-config", 664 | ] 665 | 666 | [[package]] 667 | name = "libc" 668 | version = "0.2.161" 669 | source = "registry+https://github.com/rust-lang/crates.io-index" 670 | checksum = "8e9489c2807c139ffd9c1794f4af0ebe86a828db53ecdc7fea2111d0fed085d1" 671 | 672 | [[package]] 673 | name = "libloading" 674 | version = "0.8.6" 675 | source = "registry+https://github.com/rust-lang/crates.io-index" 676 | checksum = "fc2f4eb4bc735547cfed7c0a4922cbd04a4655978c09b54f1f7b228750664c34" 677 | dependencies = [ 678 | "cfg-if", 679 | "windows-targets", 680 | ] 681 | 682 | [[package]] 683 | name = "libproc" 684 | version = "0.14.10" 685 | source = "registry+https://github.com/rust-lang/crates.io-index" 686 | checksum = "e78a09b56be5adbcad5aa1197371688dc6bb249a26da3bca2011ee2fb987ebfb" 687 | dependencies = [ 688 | "bindgen", 689 | "errno", 690 | "libc", 691 | ] 692 | 693 | [[package]] 694 | name = "linux-raw-sys" 695 | version = "0.4.14" 696 | source = "registry+https://github.com/rust-lang/crates.io-index" 697 | checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89" 698 | 699 | [[package]] 700 | name = "lock_api" 701 | version = "0.4.12" 702 | source = "registry+https://github.com/rust-lang/crates.io-index" 703 | checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17" 704 | dependencies = [ 705 | "autocfg", 706 | "scopeguard", 707 | ] 708 | 709 | [[package]] 710 | name = "log" 711 | version = "0.4.22" 712 | source = "registry+https://github.com/rust-lang/crates.io-index" 713 | checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" 714 | 715 | [[package]] 716 | name = "mach2" 717 | version = "0.4.2" 718 | source = "registry+https://github.com/rust-lang/crates.io-index" 719 | checksum = "19b955cdeb2a02b9117f121ce63aa52d08ade45de53e48fe6a38b39c10f6f709" 720 | dependencies = [ 721 | "libc", 722 | ] 723 | 724 | [[package]] 725 | name = "matchit" 726 | version = "0.7.3" 727 | source = "registry+https://github.com/rust-lang/crates.io-index" 728 | checksum = "0e7465ac9959cc2b1404e8e2367b43684a6d13790fe23056cc8c6c5a6b7bcb94" 729 | 730 | [[package]] 731 | name = "memchr" 732 | version = "2.7.4" 733 | source = "registry+https://github.com/rust-lang/crates.io-index" 734 | checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" 735 | 736 | [[package]] 737 | name = "memmap2" 738 | version = "0.5.10" 739 | source = "registry+https://github.com/rust-lang/crates.io-index" 740 | checksum = "83faa42c0a078c393f6b29d5db232d8be22776a891f8f56e5284faee4a20b327" 741 | dependencies = [ 742 | "libc", 743 | ] 744 | 745 | [[package]] 746 | name = "mime" 747 | version = "0.3.17" 748 | source = "registry+https://github.com/rust-lang/crates.io-index" 749 | checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" 750 | 751 | [[package]] 752 | name = "minimal-lexical" 753 | version = "0.2.1" 754 | source = "registry+https://github.com/rust-lang/crates.io-index" 755 | checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" 756 | 757 | [[package]] 758 | name = "miniz_oxide" 759 | version = "0.8.0" 760 | source = "registry+https://github.com/rust-lang/crates.io-index" 761 | checksum = "e2d80299ef12ff69b16a84bb182e3b9df68b5a91574d3d4fa6e41b65deec4df1" 762 | dependencies = [ 763 | "adler2", 764 | ] 765 | 766 | [[package]] 767 | name = "mio" 768 | version = "1.0.2" 769 | source = "registry+https://github.com/rust-lang/crates.io-index" 770 | checksum = "80e04d1dcff3aae0704555fe5fee3bcfaf3d1fdf8a7e521d5b9d2b42acb52cec" 771 | dependencies = [ 772 | "hermit-abi", 773 | "libc", 774 | "wasi", 775 | "windows-sys 0.52.0", 776 | ] 777 | 778 | [[package]] 779 | name = "nix" 780 | version = "0.29.0" 781 | source = "registry+https://github.com/rust-lang/crates.io-index" 782 | checksum = "71e2746dc3a24dd78b3cfcb7be93368c6de9963d30f43a6a73998a9cf4b17b46" 783 | dependencies = [ 784 | "bitflags", 785 | "cfg-if", 786 | "cfg_aliases", 787 | "libc", 788 | ] 789 | 790 | [[package]] 791 | name = "nom" 792 | version = "7.1.3" 793 | source = "registry+https://github.com/rust-lang/crates.io-index" 794 | checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" 795 | dependencies = [ 796 | "memchr", 797 | "minimal-lexical", 798 | ] 799 | 800 | [[package]] 801 | name = "num-traits" 802 | version = "0.2.19" 803 | source = "registry+https://github.com/rust-lang/crates.io-index" 804 | checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" 805 | dependencies = [ 806 | "autocfg", 807 | ] 808 | 809 | [[package]] 810 | name = "object" 811 | version = "0.36.5" 812 | source = "registry+https://github.com/rust-lang/crates.io-index" 813 | checksum = "aedf0a2d09c573ed1d8d85b30c119153926a2b36dce0ab28322c09a117a4683e" 814 | dependencies = [ 815 | "memchr", 816 | ] 817 | 818 | [[package]] 819 | name = "once_cell" 820 | version = "1.20.2" 821 | source = "registry+https://github.com/rust-lang/crates.io-index" 822 | checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775" 823 | 824 | [[package]] 825 | name = "parking_lot" 826 | version = "0.12.3" 827 | source = "registry+https://github.com/rust-lang/crates.io-index" 828 | checksum = "f1bf18183cf54e8d6059647fc3063646a1801cf30896933ec2311622cc4b9a27" 829 | dependencies = [ 830 | "lock_api", 831 | "parking_lot_core", 832 | ] 833 | 834 | [[package]] 835 | name = "parking_lot_core" 836 | version = "0.9.10" 837 | source = "registry+https://github.com/rust-lang/crates.io-index" 838 | checksum = "1e401f977ab385c9e4e3ab30627d6f26d00e2c73eef317493c4ec6d468726cf8" 839 | dependencies = [ 840 | "cfg-if", 841 | "libc", 842 | "redox_syscall", 843 | "smallvec", 844 | "windows-targets", 845 | ] 846 | 847 | [[package]] 848 | name = "percent-encoding" 849 | version = "2.3.1" 850 | source = "registry+https://github.com/rust-lang/crates.io-index" 851 | checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" 852 | 853 | [[package]] 854 | name = "pin-project-lite" 855 | version = "0.2.15" 856 | source = "registry+https://github.com/rust-lang/crates.io-index" 857 | checksum = "915a1e146535de9163f3987b8944ed8cf49a18bb0056bcebcdcece385cece4ff" 858 | 859 | [[package]] 860 | name = "pin-utils" 861 | version = "0.1.0" 862 | source = "registry+https://github.com/rust-lang/crates.io-index" 863 | checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" 864 | 865 | [[package]] 866 | name = "pkg-config" 867 | version = "0.3.31" 868 | source = "registry+https://github.com/rust-lang/crates.io-index" 869 | checksum = "953ec861398dccce10c670dfeaf3ec4911ca479e9c02154b3a215178c5f566f2" 870 | 871 | [[package]] 872 | name = "plain" 873 | version = "0.2.3" 874 | source = "registry+https://github.com/rust-lang/crates.io-index" 875 | checksum = "b4596b6d070b27117e987119b4dac604f3c58cfb0b191112e24771b2faeac1a6" 876 | 877 | [[package]] 878 | name = "prettyplease" 879 | version = "0.2.25" 880 | source = "registry+https://github.com/rust-lang/crates.io-index" 881 | checksum = "64d1ec885c64d0457d564db4ec299b2dae3f9c02808b8ad9c3a089c591b18033" 882 | dependencies = [ 883 | "proc-macro2", 884 | "syn", 885 | ] 886 | 887 | [[package]] 888 | name = "proc-macro2" 889 | version = "1.0.88" 890 | source = "registry+https://github.com/rust-lang/crates.io-index" 891 | checksum = "7c3a7fc5db1e57d5a779a352c8cdb57b29aa4c40cc69c3a68a7fedc815fbf2f9" 892 | dependencies = [ 893 | "unicode-ident", 894 | ] 895 | 896 | [[package]] 897 | name = "proc-maps" 898 | version = "0.4.0" 899 | source = "registry+https://github.com/rust-lang/crates.io-index" 900 | checksum = "3db44c5aa60e193a25fcd93bb9ed27423827e8f118897866f946e2cf936c44fb" 901 | dependencies = [ 902 | "anyhow", 903 | "bindgen", 904 | "libc", 905 | "libproc", 906 | "mach2", 907 | "winapi", 908 | ] 909 | 910 | [[package]] 911 | name = "prometheus-client" 912 | version = "0.22.3" 913 | source = "registry+https://github.com/rust-lang/crates.io-index" 914 | checksum = "504ee9ff529add891127c4827eb481bd69dc0ebc72e9a682e187db4caa60c3ca" 915 | dependencies = [ 916 | "dtoa", 917 | "itoa", 918 | "parking_lot", 919 | "prometheus-client-derive-encode", 920 | ] 921 | 922 | [[package]] 923 | name = "prometheus-client-derive-encode" 924 | version = "0.4.2" 925 | source = "registry+https://github.com/rust-lang/crates.io-index" 926 | checksum = "440f724eba9f6996b75d63681b0a92b06947f1457076d503a4d2e2c8f56442b8" 927 | dependencies = [ 928 | "proc-macro2", 929 | "quote", 930 | "syn", 931 | ] 932 | 933 | [[package]] 934 | name = "quote" 935 | version = "1.0.37" 936 | source = "registry+https://github.com/rust-lang/crates.io-index" 937 | checksum = "b5b9d34b8991d19d98081b46eacdd8eb58c6f2b201139f7c5f643cc155a633af" 938 | dependencies = [ 939 | "proc-macro2", 940 | ] 941 | 942 | [[package]] 943 | name = "redox_syscall" 944 | version = "0.5.7" 945 | source = "registry+https://github.com/rust-lang/crates.io-index" 946 | checksum = "9b6dfecf2c74bce2466cabf93f6664d6998a69eb21e39f4207930065b27b771f" 947 | dependencies = [ 948 | "bitflags", 949 | ] 950 | 951 | [[package]] 952 | name = "regex" 953 | version = "1.11.0" 954 | source = "registry+https://github.com/rust-lang/crates.io-index" 955 | checksum = "38200e5ee88914975b69f657f0801b6f6dccafd44fd9326302a4aaeecfacb1d8" 956 | dependencies = [ 957 | "aho-corasick", 958 | "memchr", 959 | "regex-automata", 960 | "regex-syntax", 961 | ] 962 | 963 | [[package]] 964 | name = "regex-automata" 965 | version = "0.4.8" 966 | source = "registry+https://github.com/rust-lang/crates.io-index" 967 | checksum = "368758f23274712b504848e9d5a6f010445cc8b87a7cdb4d7cbee666c1288da3" 968 | dependencies = [ 969 | "aho-corasick", 970 | "memchr", 971 | "regex-syntax", 972 | ] 973 | 974 | [[package]] 975 | name = "regex-syntax" 976 | version = "0.8.5" 977 | source = "registry+https://github.com/rust-lang/crates.io-index" 978 | checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" 979 | 980 | [[package]] 981 | name = "rustc-demangle" 982 | version = "0.1.24" 983 | source = "registry+https://github.com/rust-lang/crates.io-index" 984 | checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f" 985 | 986 | [[package]] 987 | name = "rustc-hash" 988 | version = "1.1.0" 989 | source = "registry+https://github.com/rust-lang/crates.io-index" 990 | checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" 991 | 992 | [[package]] 993 | name = "rustix" 994 | version = "0.38.37" 995 | source = "registry+https://github.com/rust-lang/crates.io-index" 996 | checksum = "8acb788b847c24f28525660c4d7758620a7210875711f79e7f663cc152726811" 997 | dependencies = [ 998 | "bitflags", 999 | "errno", 1000 | "libc", 1001 | "linux-raw-sys", 1002 | "windows-sys 0.52.0", 1003 | ] 1004 | 1005 | [[package]] 1006 | name = "rustversion" 1007 | version = "1.0.18" 1008 | source = "registry+https://github.com/rust-lang/crates.io-index" 1009 | checksum = "0e819f2bc632f285be6d7cd36e25940d45b2391dd6d9b939e79de557f7014248" 1010 | 1011 | [[package]] 1012 | name = "ryu" 1013 | version = "1.0.18" 1014 | source = "registry+https://github.com/rust-lang/crates.io-index" 1015 | checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f" 1016 | 1017 | [[package]] 1018 | name = "scopeguard" 1019 | version = "1.2.0" 1020 | source = "registry+https://github.com/rust-lang/crates.io-index" 1021 | checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" 1022 | 1023 | [[package]] 1024 | name = "scroll" 1025 | version = "0.12.0" 1026 | source = "registry+https://github.com/rust-lang/crates.io-index" 1027 | checksum = "6ab8598aa408498679922eff7fa985c25d58a90771bd6be794434c5277eab1a6" 1028 | dependencies = [ 1029 | "scroll_derive", 1030 | ] 1031 | 1032 | [[package]] 1033 | name = "scroll_derive" 1034 | version = "0.12.0" 1035 | source = "registry+https://github.com/rust-lang/crates.io-index" 1036 | checksum = "7f81c2fde025af7e69b1d1420531c8a8811ca898919db177141a85313b1cb932" 1037 | dependencies = [ 1038 | "proc-macro2", 1039 | "quote", 1040 | "syn", 1041 | ] 1042 | 1043 | [[package]] 1044 | name = "semver" 1045 | version = "1.0.23" 1046 | source = "registry+https://github.com/rust-lang/crates.io-index" 1047 | checksum = "61697e0a1c7e512e84a621326239844a24d8207b4669b41bc18b32ea5cbf988b" 1048 | dependencies = [ 1049 | "serde", 1050 | ] 1051 | 1052 | [[package]] 1053 | name = "serde" 1054 | version = "1.0.210" 1055 | source = "registry+https://github.com/rust-lang/crates.io-index" 1056 | checksum = "c8e3592472072e6e22e0a54d5904d9febf8508f65fb8552499a1abc7d1078c3a" 1057 | dependencies = [ 1058 | "serde_derive", 1059 | ] 1060 | 1061 | [[package]] 1062 | name = "serde_derive" 1063 | version = "1.0.210" 1064 | source = "registry+https://github.com/rust-lang/crates.io-index" 1065 | checksum = "243902eda00fad750862fc144cea25caca5e20d615af0a81bee94ca738f1df1f" 1066 | dependencies = [ 1067 | "proc-macro2", 1068 | "quote", 1069 | "syn", 1070 | ] 1071 | 1072 | [[package]] 1073 | name = "serde_json" 1074 | version = "1.0.132" 1075 | source = "registry+https://github.com/rust-lang/crates.io-index" 1076 | checksum = "d726bfaff4b320266d395898905d0eba0345aae23b54aee3a737e260fd46db03" 1077 | dependencies = [ 1078 | "itoa", 1079 | "memchr", 1080 | "ryu", 1081 | "serde", 1082 | ] 1083 | 1084 | [[package]] 1085 | name = "serde_path_to_error" 1086 | version = "0.1.16" 1087 | source = "registry+https://github.com/rust-lang/crates.io-index" 1088 | checksum = "af99884400da37c88f5e9146b7f1fd0fbcae8f6eec4e9da38b67d05486f814a6" 1089 | dependencies = [ 1090 | "itoa", 1091 | "serde", 1092 | ] 1093 | 1094 | [[package]] 1095 | name = "serde_urlencoded" 1096 | version = "0.7.1" 1097 | source = "registry+https://github.com/rust-lang/crates.io-index" 1098 | checksum = "d3491c14715ca2294c4d6a88f15e84739788c1d030eed8c110436aafdaa2f3fd" 1099 | dependencies = [ 1100 | "form_urlencoded", 1101 | "itoa", 1102 | "ryu", 1103 | "serde", 1104 | ] 1105 | 1106 | [[package]] 1107 | name = "shlex" 1108 | version = "1.3.0" 1109 | source = "registry+https://github.com/rust-lang/crates.io-index" 1110 | checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" 1111 | 1112 | [[package]] 1113 | name = "smallvec" 1114 | version = "1.13.2" 1115 | source = "registry+https://github.com/rust-lang/crates.io-index" 1116 | checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" 1117 | 1118 | [[package]] 1119 | name = "socket2" 1120 | version = "0.5.7" 1121 | source = "registry+https://github.com/rust-lang/crates.io-index" 1122 | checksum = "ce305eb0b4296696835b71df73eb912e0f1ffd2556a501fcede6e0c50349191c" 1123 | dependencies = [ 1124 | "libc", 1125 | "windows-sys 0.52.0", 1126 | ] 1127 | 1128 | [[package]] 1129 | name = "strsim" 1130 | version = "0.11.1" 1131 | source = "registry+https://github.com/rust-lang/crates.io-index" 1132 | checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" 1133 | 1134 | [[package]] 1135 | name = "syn" 1136 | version = "2.0.82" 1137 | source = "registry+https://github.com/rust-lang/crates.io-index" 1138 | checksum = "83540f837a8afc019423a8edb95b52a8effe46957ee402287f4292fae35be021" 1139 | dependencies = [ 1140 | "proc-macro2", 1141 | "quote", 1142 | "unicode-ident", 1143 | ] 1144 | 1145 | [[package]] 1146 | name = "sync_wrapper" 1147 | version = "0.1.2" 1148 | source = "registry+https://github.com/rust-lang/crates.io-index" 1149 | checksum = "2047c6ded9c721764247e62cd3b03c09ffc529b2ba5b10ec482ae507a4a70160" 1150 | 1151 | [[package]] 1152 | name = "sync_wrapper" 1153 | version = "1.0.1" 1154 | source = "registry+https://github.com/rust-lang/crates.io-index" 1155 | checksum = "a7065abeca94b6a8a577f9bd45aa0867a2238b74e8eb67cf10d492bc39351394" 1156 | 1157 | [[package]] 1158 | name = "tempfile" 1159 | version = "3.13.0" 1160 | source = "registry+https://github.com/rust-lang/crates.io-index" 1161 | checksum = "f0f2c9fc62d0beef6951ccffd757e241266a2c833136efbe35af6cd2567dca5b" 1162 | dependencies = [ 1163 | "cfg-if", 1164 | "fastrand", 1165 | "once_cell", 1166 | "rustix", 1167 | "windows-sys 0.59.0", 1168 | ] 1169 | 1170 | [[package]] 1171 | name = "thiserror" 1172 | version = "1.0.64" 1173 | source = "registry+https://github.com/rust-lang/crates.io-index" 1174 | checksum = "d50af8abc119fb8bb6dbabcfa89656f46f84aa0ac7688088608076ad2b459a84" 1175 | dependencies = [ 1176 | "thiserror-impl", 1177 | ] 1178 | 1179 | [[package]] 1180 | name = "thiserror-impl" 1181 | version = "1.0.64" 1182 | source = "registry+https://github.com/rust-lang/crates.io-index" 1183 | checksum = "08904e7672f5eb876eaaf87e0ce17857500934f4981c4a0ab2b4aa98baac7fc3" 1184 | dependencies = [ 1185 | "proc-macro2", 1186 | "quote", 1187 | "syn", 1188 | ] 1189 | 1190 | [[package]] 1191 | name = "tokio" 1192 | version = "1.41.0" 1193 | source = "registry+https://github.com/rust-lang/crates.io-index" 1194 | checksum = "145f3413504347a2be84393cc8a7d2fb4d863b375909ea59f2158261aa258bbb" 1195 | dependencies = [ 1196 | "backtrace", 1197 | "libc", 1198 | "mio", 1199 | "pin-project-lite", 1200 | "socket2", 1201 | "tokio-macros", 1202 | "windows-sys 0.52.0", 1203 | ] 1204 | 1205 | [[package]] 1206 | name = "tokio-macros" 1207 | version = "2.4.0" 1208 | source = "registry+https://github.com/rust-lang/crates.io-index" 1209 | checksum = "693d596312e88961bc67d7f1f97af8a70227d9f90c31bba5806eec004978d752" 1210 | dependencies = [ 1211 | "proc-macro2", 1212 | "quote", 1213 | "syn", 1214 | ] 1215 | 1216 | [[package]] 1217 | name = "tower" 1218 | version = "0.5.1" 1219 | source = "registry+https://github.com/rust-lang/crates.io-index" 1220 | checksum = "2873938d487c3cfb9aed7546dc9f2711d867c9f90c46b889989a2cb84eba6b4f" 1221 | dependencies = [ 1222 | "futures-core", 1223 | "futures-util", 1224 | "pin-project-lite", 1225 | "sync_wrapper 0.1.2", 1226 | "tokio", 1227 | "tower-layer", 1228 | "tower-service", 1229 | "tracing", 1230 | ] 1231 | 1232 | [[package]] 1233 | name = "tower-layer" 1234 | version = "0.3.3" 1235 | source = "registry+https://github.com/rust-lang/crates.io-index" 1236 | checksum = "121c2a6cda46980bb0fcd1647ffaf6cd3fc79a013de288782836f6df9c48780e" 1237 | 1238 | [[package]] 1239 | name = "tower-service" 1240 | version = "0.3.3" 1241 | source = "registry+https://github.com/rust-lang/crates.io-index" 1242 | checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3" 1243 | 1244 | [[package]] 1245 | name = "tracing" 1246 | version = "0.1.40" 1247 | source = "registry+https://github.com/rust-lang/crates.io-index" 1248 | checksum = "c3523ab5a71916ccf420eebdf5521fcef02141234bbc0b8a49f2fdc4544364ef" 1249 | dependencies = [ 1250 | "log", 1251 | "pin-project-lite", 1252 | "tracing-core", 1253 | ] 1254 | 1255 | [[package]] 1256 | name = "tracing-core" 1257 | version = "0.1.32" 1258 | source = "registry+https://github.com/rust-lang/crates.io-index" 1259 | checksum = "c06d3da6113f116aaee68e4d601191614c9053067f9ab7f6edbcb161237daa54" 1260 | dependencies = [ 1261 | "once_cell", 1262 | ] 1263 | 1264 | [[package]] 1265 | name = "unicode-ident" 1266 | version = "1.0.13" 1267 | source = "registry+https://github.com/rust-lang/crates.io-index" 1268 | checksum = "e91b56cd4cadaeb79bbf1a5645f6b4f8dc5bde8834ad5894a8db35fda9efa1fe" 1269 | 1270 | [[package]] 1271 | name = "utf8parse" 1272 | version = "0.2.2" 1273 | source = "registry+https://github.com/rust-lang/crates.io-index" 1274 | checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" 1275 | 1276 | [[package]] 1277 | name = "vsprintf" 1278 | version = "2.0.0" 1279 | source = "registry+https://github.com/rust-lang/crates.io-index" 1280 | checksum = "aec2f81b75ca063294776b4f7e8da71d1d5ae81c2b1b149c8d89969230265d63" 1281 | dependencies = [ 1282 | "cc", 1283 | "libc", 1284 | ] 1285 | 1286 | [[package]] 1287 | name = "wasi" 1288 | version = "0.11.0+wasi-snapshot-preview1" 1289 | source = "registry+https://github.com/rust-lang/crates.io-index" 1290 | checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" 1291 | 1292 | [[package]] 1293 | name = "wasm-bindgen" 1294 | version = "0.2.95" 1295 | source = "registry+https://github.com/rust-lang/crates.io-index" 1296 | checksum = "128d1e363af62632b8eb57219c8fd7877144af57558fb2ef0368d0087bddeb2e" 1297 | dependencies = [ 1298 | "cfg-if", 1299 | "once_cell", 1300 | "wasm-bindgen-macro", 1301 | ] 1302 | 1303 | [[package]] 1304 | name = "wasm-bindgen-backend" 1305 | version = "0.2.95" 1306 | source = "registry+https://github.com/rust-lang/crates.io-index" 1307 | checksum = "cb6dd4d3ca0ddffd1dd1c9c04f94b868c37ff5fac97c30b97cff2d74fce3a358" 1308 | dependencies = [ 1309 | "bumpalo", 1310 | "log", 1311 | "once_cell", 1312 | "proc-macro2", 1313 | "quote", 1314 | "syn", 1315 | "wasm-bindgen-shared", 1316 | ] 1317 | 1318 | [[package]] 1319 | name = "wasm-bindgen-macro" 1320 | version = "0.2.95" 1321 | source = "registry+https://github.com/rust-lang/crates.io-index" 1322 | checksum = "e79384be7f8f5a9dd5d7167216f022090cf1f9ec128e6e6a482a2cb5c5422c56" 1323 | dependencies = [ 1324 | "quote", 1325 | "wasm-bindgen-macro-support", 1326 | ] 1327 | 1328 | [[package]] 1329 | name = "wasm-bindgen-macro-support" 1330 | version = "0.2.95" 1331 | source = "registry+https://github.com/rust-lang/crates.io-index" 1332 | checksum = "26c6ab57572f7a24a4985830b120de1594465e5d500f24afe89e16b4e833ef68" 1333 | dependencies = [ 1334 | "proc-macro2", 1335 | "quote", 1336 | "syn", 1337 | "wasm-bindgen-backend", 1338 | "wasm-bindgen-shared", 1339 | ] 1340 | 1341 | [[package]] 1342 | name = "wasm-bindgen-shared" 1343 | version = "0.2.95" 1344 | source = "registry+https://github.com/rust-lang/crates.io-index" 1345 | checksum = "65fc09f10666a9f147042251e0dda9c18f166ff7de300607007e96bdebc1068d" 1346 | 1347 | [[package]] 1348 | name = "winapi" 1349 | version = "0.3.9" 1350 | source = "registry+https://github.com/rust-lang/crates.io-index" 1351 | checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" 1352 | dependencies = [ 1353 | "winapi-i686-pc-windows-gnu", 1354 | "winapi-x86_64-pc-windows-gnu", 1355 | ] 1356 | 1357 | [[package]] 1358 | name = "winapi-i686-pc-windows-gnu" 1359 | version = "0.4.0" 1360 | source = "registry+https://github.com/rust-lang/crates.io-index" 1361 | checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" 1362 | 1363 | [[package]] 1364 | name = "winapi-x86_64-pc-windows-gnu" 1365 | version = "0.4.0" 1366 | source = "registry+https://github.com/rust-lang/crates.io-index" 1367 | checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" 1368 | 1369 | [[package]] 1370 | name = "windows-core" 1371 | version = "0.52.0" 1372 | source = "registry+https://github.com/rust-lang/crates.io-index" 1373 | checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9" 1374 | dependencies = [ 1375 | "windows-targets", 1376 | ] 1377 | 1378 | [[package]] 1379 | name = "windows-sys" 1380 | version = "0.52.0" 1381 | source = "registry+https://github.com/rust-lang/crates.io-index" 1382 | checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" 1383 | dependencies = [ 1384 | "windows-targets", 1385 | ] 1386 | 1387 | [[package]] 1388 | name = "windows-sys" 1389 | version = "0.59.0" 1390 | source = "registry+https://github.com/rust-lang/crates.io-index" 1391 | checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" 1392 | dependencies = [ 1393 | "windows-targets", 1394 | ] 1395 | 1396 | [[package]] 1397 | name = "windows-targets" 1398 | version = "0.52.6" 1399 | source = "registry+https://github.com/rust-lang/crates.io-index" 1400 | checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" 1401 | dependencies = [ 1402 | "windows_aarch64_gnullvm", 1403 | "windows_aarch64_msvc", 1404 | "windows_i686_gnu", 1405 | "windows_i686_gnullvm", 1406 | "windows_i686_msvc", 1407 | "windows_x86_64_gnu", 1408 | "windows_x86_64_gnullvm", 1409 | "windows_x86_64_msvc", 1410 | ] 1411 | 1412 | [[package]] 1413 | name = "windows_aarch64_gnullvm" 1414 | version = "0.52.6" 1415 | source = "registry+https://github.com/rust-lang/crates.io-index" 1416 | checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" 1417 | 1418 | [[package]] 1419 | name = "windows_aarch64_msvc" 1420 | version = "0.52.6" 1421 | source = "registry+https://github.com/rust-lang/crates.io-index" 1422 | checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" 1423 | 1424 | [[package]] 1425 | name = "windows_i686_gnu" 1426 | version = "0.52.6" 1427 | source = "registry+https://github.com/rust-lang/crates.io-index" 1428 | checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" 1429 | 1430 | [[package]] 1431 | name = "windows_i686_gnullvm" 1432 | version = "0.52.6" 1433 | source = "registry+https://github.com/rust-lang/crates.io-index" 1434 | checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" 1435 | 1436 | [[package]] 1437 | name = "windows_i686_msvc" 1438 | version = "0.52.6" 1439 | source = "registry+https://github.com/rust-lang/crates.io-index" 1440 | checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" 1441 | 1442 | [[package]] 1443 | name = "windows_x86_64_gnu" 1444 | version = "0.52.6" 1445 | source = "registry+https://github.com/rust-lang/crates.io-index" 1446 | checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" 1447 | 1448 | [[package]] 1449 | name = "windows_x86_64_gnullvm" 1450 | version = "0.52.6" 1451 | source = "registry+https://github.com/rust-lang/crates.io-index" 1452 | checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" 1453 | 1454 | [[package]] 1455 | name = "windows_x86_64_msvc" 1456 | version = "0.52.6" 1457 | source = "registry+https://github.com/rust-lang/crates.io-index" 1458 | checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" 1459 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "gpu_probe" 3 | version = "0.1.0" 4 | edition = "2021" 5 | 6 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 7 | [build-dependencies] 8 | libbpf-cargo = "0.24" 9 | 10 | [dependencies] 11 | clap = { version = "4.5.20", features = ["derive"] } 12 | libbpf-rs = "0.24" 13 | libc = "0.2" 14 | prometheus-client = "0.22.3" 15 | axum = "0.7.7" 16 | tokio = { version = "1.41.0", features = ["rt-multi-thread", "macros"] } 17 | chrono = "0.4.38" 18 | nix = { version = "0.29.0", features = ["process", "signal"] } 19 | goblin = "0.9.2" 20 | proc-maps = "0.4.0" 21 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Ethan G. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # GPUprobe 2 | 3 | GPUprobe *(GPU probe, GP-uprobe)* provides utilities for observability 4 | of GPU behavior via their interaction with the Cuda runtime API by leveraging 5 | eBPF uprobes. 6 | 7 | ## Motivation 8 | 9 | GPU monitoring and debugging traditionally requires either heavyweight 10 | profiling tools like Nsight (which significantly impacts performance), or 11 | high-level monitoring solutions like DCGM (which lack granular insights into 12 | application behavior). This creates a gap for developers who need detailed GPU 13 | runtime information without the overhead of full profiling or code 14 | instrumentation. 15 | 16 | GPUprobe fills this gap by leveraging [eBPF](https://ebpf.io/) to provide: 17 | 18 | - Real-time memory leak detection at the CUDA runtime level 19 | - Kernel launch frequency tracking 20 | - Memory bandwidth utilization metrics 21 | 22 | The key advantage of GPUprobe's approach is that it requires zero modification 23 | to existing code bases. Whether you're running production ML pipelines, 24 | handling complex GPU computations, or debugging CUDA applications, GPUprobe can 25 | monitor multiple running processes calling the CUDA runtime API and provide 26 | granular insights without any changes to your CUDA kernels or application code. 27 | 28 | By hooking directly into the CUDA runtime API through eBPF uprobes, GPUprobe 29 | maintains a lightweight footprint while still offering detailed observability 30 | into GPU behavior - making it suitable for both development and production 31 | environments. 32 | 33 | This repository provides the source code for `gpuprobe-daemon` - a lightweight 34 | binary that implements these capabilities. While the project is experimental, 35 | it already offers several powerful features described below. 36 | 37 | For information on building and running, refer to the 38 | [short guide](#building-and-running) on the subject. 39 | 40 | ## Usage 41 | 42 | ``` 43 | Usage: gpu_probe [OPTIONS] 44 | 45 | Options: 46 | --memleak 47 | Attaches memleak program: detects leaking calls to cudaMalloc from the CUDA runtime API 48 | --cudatrace 49 | Attaches the cudatrace program: maintains per-process histograms of cuda kernel launches and their frequencies 50 | --bandwidth-util 51 | Attaches the bandwidth util program: approximates bandwidth utilization of cudaMemcpy 52 | --metrics-addr 53 | Address for the Prometheus metrics endpoint [default: 0.0.0.0:9000] 54 | --display-interval 55 | Interval in seconds for displaying metrics to stdout [default: 5] 56 | --libcudart-path 57 | The path of the libcudart.so dynamic lib that is monitored [default: /usr/local/cuda/lib64/libcudart.so] 58 | -h, --help 59 | Print help 60 | -V, --version 61 | Print version 62 | ``` 63 | 64 | ## Intended use-case 65 | 66 | Metrics are exported in [OpenMetrics](https://github.com/prometheus/OpenMetrics/blob/main/specification/OpenMetrics.md) 67 | format via an http handler, which is intended to be scraped by Prometheus. This 68 | allows for seamless integration with your favorite observability stack, e.g. 69 | Grafana. 70 | 71 | ![Simple `memleak` visualization in Grafana](readme-assets/memleak-19-01.png) 72 | 73 | `memleak:` memory maps displayed for a process' memory allocations in real-time 74 | alongside an aggregate seen in orange representing the process' total CUDA 75 | memory utilization. 76 | 77 | ![Simple `cudatrace` visualization in Grafana](readme-assets/cudatrace-19-01.png) 78 | 79 | `cudatrace:` kernel launches made by a process shown in real time, with kernel 80 | names resolved for better readability 81 | 82 | These metrics are also displayed periodically to stdout. 83 | 84 | ``` 85 | 2024-12-12 16:32:46 86 | 87 | num_successful_mallocs: 6 88 | num_failed_mallocs: 0 89 | num_successful_frees: 2 90 | num_failed_frees: 0 91 | per-process memory maps: 92 | process 365159 93 | 0x0000793a44000000: 8000000 Bytes 94 | 0x0000793a48c00000: 8000000 Bytes 95 | 0x0000793a49400000: 8000000 Bytes 96 | process 365306 97 | 0x000078fd20000000: 8000000 Bytes 98 | 0x000078fd24c00000: 0 Bytes 99 | 0x000078fd25400000: 0 Bytes 100 | 101 | total kernel launches: 1490 102 | pid: 365306 103 | 0x5823e39efa50 (unknown kernel) -> 10 104 | 0x5823e39efb30 (unknown kernel) -> 10 105 | pid: 365159 106 | 0x5de98f9fba50 (_Z27optimized_convolution_part1PdS_i) -> 735 107 | 0x5de98f9fbb30 (_Z27optimized_convolution_part2PdS_i) -> 735 108 | 109 | ``` 110 | 111 | The various features are opt-in via command-line arguments passed to the 112 | program at launch. 113 | 114 | **E.g.** running `gpuprobe --memleak` will only attach the uprobes needed for 115 | the memleak feature, and only display/export relevant metrics. 116 | 117 | ## Memleak feature 118 | 119 | This utility correlates a call to `cudaFree()` to the associated call to 120 | `cudaMalloc()`, allowing for a measurement of the number of leaked bytes 121 | related to a Cuda virtual address. 122 | 123 | ## CudaTrace feature 124 | 125 | This utility keeps stats on the launched kernels and number of times that they 126 | were launched as a pair `(func_addr, count)`. It can be thought of and 127 | aggregated as a histogram of the frequencies of kernel launches. 128 | 129 | ## Bandwidth utilization feature 130 | 131 | This feature approximates bandwidth utilization on the bus between host and 132 | device as a function of execution time and size of a `cudaMemcpy()` call. 133 | 134 | This is computed naively with: `throughput = count / (end - start)` 135 | 136 | Note that this only plausibly works for host-to-device *(H2D)* and 137 | device-to-host *(D2H)* copies, as only these calls provide any guarantees of 138 | synchronicity. 139 | 140 | This feature is not yet exported. Below you will find a sample output of an 141 | older iteration that simply wrote the results to stdout. 142 | 143 | ``` 144 | GPUprobe bandwidth_util utility 145 | ======================== 146 | 147 | 148 | Traced 1 cudaMemcpy calls 149 | H2D 3045740550.87548 bytes/sec for 0.00263 secs 150 | ======================== 151 | 152 | Traced 2 cudaMemcpy calls 153 | H2D 2981869117.56429 bytes/sec for 0.00268 secs 154 | D2H 3039108386.38160 bytes/sec for 0.00263 secs 155 | ======================== 156 | ``` 157 | 158 | ## Building and Running 159 | 160 | An eBPF compatible Linux kernel version is required for running GPUprobe, as 161 | well as `bpftool`. 162 | 163 | A `vmlinux.h` file is required for the build process, which can be created 164 | by executing the following command from the project root: 165 | 166 | ```bash 167 | bpftool btf dump file /sys/kernel/btf/vmlinux format c > src/bpf/vmlinux.h 168 | ``` 169 | 170 | Following that, you should be able to build the project. 171 | 172 | ```bash 173 | cargo build 174 | ``` 175 | 176 | Root privileges are required to run the project due to its attaching of eBPF 177 | uprobes. 178 | 179 | ```bash 180 | sudo ./gpu_probe # --options 181 | ``` 182 | -------------------------------------------------------------------------------- /build.rs: -------------------------------------------------------------------------------- 1 | use std::env; 2 | use std::ffi::OsStr; 3 | use std::path::PathBuf; 4 | 5 | use libbpf_cargo::SkeletonBuilder; 6 | 7 | const SRC: &str = "src/bpf/gpuprobe.bpf.c"; 8 | 9 | fn main() { 10 | let skel_out = PathBuf::from( 11 | env::var_os("CARGO_MANIFEST_DIR").expect("CARGO_MANIFEST_DIR must be set in build script"), 12 | ) 13 | .join("src") 14 | .join("bpf") 15 | .join("gpuprobe.skel.rs"); 16 | 17 | SkeletonBuilder::new() 18 | .source(SRC) 19 | .build_and_generate(&skel_out) 20 | .unwrap(); 21 | println!("cargo:rerun-if-changed={SRC}"); 22 | } 23 | -------------------------------------------------------------------------------- /format.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/bash 2 | 3 | rustfmt --edition 2021 src/*.rs 4 | rustfmt --edition 2021 src/gpuprobe/*.rs 5 | -------------------------------------------------------------------------------- /readme-assets/cudatrace-19-01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GPUprobe/gpuprobe-daemon/7dfb22e90d68f0a7bb81538a58a3b9ba5731097b/readme-assets/cudatrace-19-01.png -------------------------------------------------------------------------------- /readme-assets/memleak-19-01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GPUprobe/gpuprobe-daemon/7dfb22e90d68f0a7bb81538a58a3b9ba5731097b/readme-assets/memleak-19-01.png -------------------------------------------------------------------------------- /readme-assets/memleaks-grafana.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GPUprobe/gpuprobe-daemon/7dfb22e90d68f0a7bb81538a58a3b9ba5731097b/readme-assets/memleaks-grafana.png -------------------------------------------------------------------------------- /src/bpf/.clang-format: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: GPL-2.0 2 | # 3 | # clang-format configuration file. Intended for clang-format >= 11. 4 | # 5 | # For more information, see: 6 | # 7 | # Documentation/dev-tools/clang-format.rst 8 | # https://clang.llvm.org/docs/ClangFormat.html 9 | # https://clang.llvm.org/docs/ClangFormatStyleOptions.html 10 | # 11 | --- 12 | AccessModifierOffset: -4 13 | AlignAfterOpenBracket: Align 14 | AlignConsecutiveAssignments: false 15 | AlignConsecutiveDeclarations: false 16 | AlignEscapedNewlines: Left 17 | AlignOperands: true 18 | AlignTrailingComments: false 19 | AllowAllParametersOfDeclarationOnNextLine: false 20 | AllowShortBlocksOnASingleLine: false 21 | AllowShortCaseLabelsOnASingleLine: false 22 | AllowShortFunctionsOnASingleLine: None 23 | AllowShortIfStatementsOnASingleLine: false 24 | AllowShortLoopsOnASingleLine: false 25 | AlwaysBreakAfterDefinitionReturnType: None 26 | AlwaysBreakAfterReturnType: None 27 | AlwaysBreakBeforeMultilineStrings: false 28 | AlwaysBreakTemplateDeclarations: false 29 | BinPackArguments: true 30 | BinPackParameters: true 31 | BraceWrapping: 32 | AfterClass: false 33 | AfterControlStatement: false 34 | AfterEnum: false 35 | AfterFunction: true 36 | AfterNamespace: true 37 | AfterObjCDeclaration: false 38 | AfterStruct: false 39 | AfterUnion: false 40 | AfterExternBlock: false 41 | BeforeCatch: false 42 | BeforeElse: false 43 | IndentBraces: false 44 | SplitEmptyFunction: true 45 | SplitEmptyRecord: true 46 | SplitEmptyNamespace: true 47 | BreakBeforeBinaryOperators: None 48 | BreakBeforeBraces: Custom 49 | BreakBeforeInheritanceComma: false 50 | BreakBeforeTernaryOperators: false 51 | BreakConstructorInitializersBeforeComma: false 52 | BreakConstructorInitializers: BeforeComma 53 | BreakAfterJavaFieldAnnotations: false 54 | BreakStringLiterals: false 55 | ColumnLimit: 80 56 | CommentPragmas: '^ IWYU pragma:' 57 | CompactNamespaces: false 58 | ConstructorInitializerAllOnOneLineOrOnePerLine: false 59 | ConstructorInitializerIndentWidth: 8 60 | ContinuationIndentWidth: 8 61 | Cpp11BracedListStyle: false 62 | DerivePointerAlignment: false 63 | DisableFormat: false 64 | ExperimentalAutoDetectBinPacking: false 65 | FixNamespaceComments: false 66 | 67 | # Taken from: 68 | # git grep -h '^#define [^[:space:]]*for_each[^[:space:]]*(' include/ tools/ \ 69 | # | sed "s,^#define \([^[:space:]]*for_each[^[:space:]]*\)(.*$, - '\1'," \ 70 | # | LC_ALL=C sort -u 71 | ForEachMacros: 72 | - '__ata_qc_for_each' 73 | - '__bio_for_each_bvec' 74 | - '__bio_for_each_segment' 75 | - '__evlist__for_each_entry' 76 | - '__evlist__for_each_entry_continue' 77 | - '__evlist__for_each_entry_from' 78 | - '__evlist__for_each_entry_reverse' 79 | - '__evlist__for_each_entry_safe' 80 | - '__for_each_mem_range' 81 | - '__for_each_mem_range_rev' 82 | - '__for_each_thread' 83 | - '__hlist_for_each_rcu' 84 | - '__map__for_each_symbol_by_name' 85 | - '__pci_bus_for_each_res0' 86 | - '__pci_bus_for_each_res1' 87 | - '__pci_dev_for_each_res0' 88 | - '__pci_dev_for_each_res1' 89 | - '__perf_evlist__for_each_entry' 90 | - '__perf_evlist__for_each_entry_reverse' 91 | - '__perf_evlist__for_each_entry_safe' 92 | - '__rq_for_each_bio' 93 | - '__shost_for_each_device' 94 | - '__sym_for_each' 95 | - 'apei_estatus_for_each_section' 96 | - 'ata_for_each_dev' 97 | - 'ata_for_each_link' 98 | - 'ata_qc_for_each' 99 | - 'ata_qc_for_each_raw' 100 | - 'ata_qc_for_each_with_internal' 101 | - 'ax25_for_each' 102 | - 'ax25_uid_for_each' 103 | - 'bio_for_each_bvec' 104 | - 'bio_for_each_bvec_all' 105 | - 'bio_for_each_folio_all' 106 | - 'bio_for_each_integrity_vec' 107 | - 'bio_for_each_segment' 108 | - 'bio_for_each_segment_all' 109 | - 'bio_list_for_each' 110 | - 'bip_for_each_vec' 111 | - 'bond_for_each_slave' 112 | - 'bond_for_each_slave_rcu' 113 | - 'bpf_for_each' 114 | - 'bpf_for_each_reg_in_vstate' 115 | - 'bpf_for_each_reg_in_vstate_mask' 116 | - 'bpf_for_each_spilled_reg' 117 | - 'bpf_object__for_each_map' 118 | - 'bpf_object__for_each_program' 119 | - 'btree_for_each_safe128' 120 | - 'btree_for_each_safe32' 121 | - 'btree_for_each_safe64' 122 | - 'btree_for_each_safel' 123 | - 'card_for_each_dev' 124 | - 'cgroup_taskset_for_each' 125 | - 'cgroup_taskset_for_each_leader' 126 | - 'cpu_aggr_map__for_each_idx' 127 | - 'cpufreq_for_each_efficient_entry_idx' 128 | - 'cpufreq_for_each_entry' 129 | - 'cpufreq_for_each_entry_idx' 130 | - 'cpufreq_for_each_valid_entry' 131 | - 'cpufreq_for_each_valid_entry_idx' 132 | - 'css_for_each_child' 133 | - 'css_for_each_descendant_post' 134 | - 'css_for_each_descendant_pre' 135 | - 'damon_for_each_region' 136 | - 'damon_for_each_region_from' 137 | - 'damon_for_each_region_safe' 138 | - 'damon_for_each_scheme' 139 | - 'damon_for_each_scheme_safe' 140 | - 'damon_for_each_target' 141 | - 'damon_for_each_target_safe' 142 | - 'damos_for_each_filter' 143 | - 'damos_for_each_filter_safe' 144 | - 'data__for_each_file' 145 | - 'data__for_each_file_new' 146 | - 'data__for_each_file_start' 147 | - 'device_for_each_child_node' 148 | - 'displayid_iter_for_each' 149 | - 'dma_fence_array_for_each' 150 | - 'dma_fence_chain_for_each' 151 | - 'dma_fence_unwrap_for_each' 152 | - 'dma_resv_for_each_fence' 153 | - 'dma_resv_for_each_fence_unlocked' 154 | - 'do_for_each_ftrace_op' 155 | - 'drm_atomic_crtc_for_each_plane' 156 | - 'drm_atomic_crtc_state_for_each_plane' 157 | - 'drm_atomic_crtc_state_for_each_plane_state' 158 | - 'drm_atomic_for_each_plane_damage' 159 | - 'drm_client_for_each_connector_iter' 160 | - 'drm_client_for_each_modeset' 161 | - 'drm_connector_for_each_possible_encoder' 162 | - 'drm_exec_for_each_locked_object' 163 | - 'drm_exec_for_each_locked_object_reverse' 164 | - 'drm_for_each_bridge_in_chain' 165 | - 'drm_for_each_connector_iter' 166 | - 'drm_for_each_crtc' 167 | - 'drm_for_each_crtc_reverse' 168 | - 'drm_for_each_encoder' 169 | - 'drm_for_each_encoder_mask' 170 | - 'drm_for_each_fb' 171 | - 'drm_for_each_legacy_plane' 172 | - 'drm_for_each_plane' 173 | - 'drm_for_each_plane_mask' 174 | - 'drm_for_each_privobj' 175 | - 'drm_gem_for_each_gpuva' 176 | - 'drm_gem_for_each_gpuva_safe' 177 | - 'drm_gpuva_for_each_op' 178 | - 'drm_gpuva_for_each_op_from_reverse' 179 | - 'drm_gpuva_for_each_op_safe' 180 | - 'drm_gpuvm_for_each_va' 181 | - 'drm_gpuvm_for_each_va_range' 182 | - 'drm_gpuvm_for_each_va_range_safe' 183 | - 'drm_gpuvm_for_each_va_safe' 184 | - 'drm_mm_for_each_hole' 185 | - 'drm_mm_for_each_node' 186 | - 'drm_mm_for_each_node_in_range' 187 | - 'drm_mm_for_each_node_safe' 188 | - 'dsa_switch_for_each_available_port' 189 | - 'dsa_switch_for_each_cpu_port' 190 | - 'dsa_switch_for_each_cpu_port_continue_reverse' 191 | - 'dsa_switch_for_each_port' 192 | - 'dsa_switch_for_each_port_continue_reverse' 193 | - 'dsa_switch_for_each_port_safe' 194 | - 'dsa_switch_for_each_user_port' 195 | - 'dsa_tree_for_each_cpu_port' 196 | - 'dsa_tree_for_each_user_port' 197 | - 'dsa_tree_for_each_user_port_continue_reverse' 198 | - 'dso__for_each_symbol' 199 | - 'dsos__for_each_with_build_id' 200 | - 'elf_hash_for_each_possible' 201 | - 'elf_symtab__for_each_symbol' 202 | - 'evlist__for_each_cpu' 203 | - 'evlist__for_each_entry' 204 | - 'evlist__for_each_entry_continue' 205 | - 'evlist__for_each_entry_from' 206 | - 'evlist__for_each_entry_reverse' 207 | - 'evlist__for_each_entry_safe' 208 | - 'flow_action_for_each' 209 | - 'for_each_acpi_consumer_dev' 210 | - 'for_each_acpi_dev_match' 211 | - 'for_each_active_dev_scope' 212 | - 'for_each_active_drhd_unit' 213 | - 'for_each_active_iommu' 214 | - 'for_each_active_route' 215 | - 'for_each_aggr_pgid' 216 | - 'for_each_and_bit' 217 | - 'for_each_andnot_bit' 218 | - 'for_each_available_child_of_node' 219 | - 'for_each_bench' 220 | - 'for_each_bio' 221 | - 'for_each_board_func_rsrc' 222 | - 'for_each_btf_ext_rec' 223 | - 'for_each_btf_ext_sec' 224 | - 'for_each_bvec' 225 | - 'for_each_card_auxs' 226 | - 'for_each_card_auxs_safe' 227 | - 'for_each_card_components' 228 | - 'for_each_card_dapms' 229 | - 'for_each_card_pre_auxs' 230 | - 'for_each_card_prelinks' 231 | - 'for_each_card_rtds' 232 | - 'for_each_card_rtds_safe' 233 | - 'for_each_card_widgets' 234 | - 'for_each_card_widgets_safe' 235 | - 'for_each_cgroup_storage_type' 236 | - 'for_each_child_of_node' 237 | - 'for_each_clear_bit' 238 | - 'for_each_clear_bit_from' 239 | - 'for_each_clear_bitrange' 240 | - 'for_each_clear_bitrange_from' 241 | - 'for_each_cmd' 242 | - 'for_each_cmsghdr' 243 | - 'for_each_collection' 244 | - 'for_each_comp_order' 245 | - 'for_each_compatible_node' 246 | - 'for_each_component_dais' 247 | - 'for_each_component_dais_safe' 248 | - 'for_each_conduit' 249 | - 'for_each_console' 250 | - 'for_each_console_srcu' 251 | - 'for_each_cpu' 252 | - 'for_each_cpu_and' 253 | - 'for_each_cpu_andnot' 254 | - 'for_each_cpu_or' 255 | - 'for_each_cpu_wrap' 256 | - 'for_each_dapm_widgets' 257 | - 'for_each_dedup_cand' 258 | - 'for_each_dev_addr' 259 | - 'for_each_dev_scope' 260 | - 'for_each_dma_cap_mask' 261 | - 'for_each_dpcm_be' 262 | - 'for_each_dpcm_be_rollback' 263 | - 'for_each_dpcm_be_safe' 264 | - 'for_each_dpcm_fe' 265 | - 'for_each_drhd_unit' 266 | - 'for_each_dss_dev' 267 | - 'for_each_efi_memory_desc' 268 | - 'for_each_efi_memory_desc_in_map' 269 | - 'for_each_element' 270 | - 'for_each_element_extid' 271 | - 'for_each_element_id' 272 | - 'for_each_endpoint_of_node' 273 | - 'for_each_event' 274 | - 'for_each_event_tps' 275 | - 'for_each_evictable_lru' 276 | - 'for_each_fib6_node_rt_rcu' 277 | - 'for_each_fib6_walker_rt' 278 | - 'for_each_free_mem_pfn_range_in_zone' 279 | - 'for_each_free_mem_pfn_range_in_zone_from' 280 | - 'for_each_free_mem_range' 281 | - 'for_each_free_mem_range_reverse' 282 | - 'for_each_func_rsrc' 283 | - 'for_each_gpiochip_node' 284 | - 'for_each_group_evsel' 285 | - 'for_each_group_evsel_head' 286 | - 'for_each_group_member' 287 | - 'for_each_group_member_head' 288 | - 'for_each_hstate' 289 | - 'for_each_if' 290 | - 'for_each_inject_fn' 291 | - 'for_each_insn' 292 | - 'for_each_insn_prefix' 293 | - 'for_each_intid' 294 | - 'for_each_iommu' 295 | - 'for_each_ip_tunnel_rcu' 296 | - 'for_each_irq_nr' 297 | - 'for_each_lang' 298 | - 'for_each_link_codecs' 299 | - 'for_each_link_cpus' 300 | - 'for_each_link_platforms' 301 | - 'for_each_lru' 302 | - 'for_each_matching_node' 303 | - 'for_each_matching_node_and_match' 304 | - 'for_each_media_entity_data_link' 305 | - 'for_each_mem_pfn_range' 306 | - 'for_each_mem_range' 307 | - 'for_each_mem_range_rev' 308 | - 'for_each_mem_region' 309 | - 'for_each_member' 310 | - 'for_each_memory' 311 | - 'for_each_migratetype_order' 312 | - 'for_each_missing_reg' 313 | - 'for_each_mle_subelement' 314 | - 'for_each_mod_mem_type' 315 | - 'for_each_net' 316 | - 'for_each_net_continue_reverse' 317 | - 'for_each_net_rcu' 318 | - 'for_each_netdev' 319 | - 'for_each_netdev_continue' 320 | - 'for_each_netdev_continue_rcu' 321 | - 'for_each_netdev_continue_reverse' 322 | - 'for_each_netdev_dump' 323 | - 'for_each_netdev_feature' 324 | - 'for_each_netdev_in_bond_rcu' 325 | - 'for_each_netdev_rcu' 326 | - 'for_each_netdev_reverse' 327 | - 'for_each_netdev_safe' 328 | - 'for_each_new_connector_in_state' 329 | - 'for_each_new_crtc_in_state' 330 | - 'for_each_new_mst_mgr_in_state' 331 | - 'for_each_new_plane_in_state' 332 | - 'for_each_new_plane_in_state_reverse' 333 | - 'for_each_new_private_obj_in_state' 334 | - 'for_each_new_reg' 335 | - 'for_each_node' 336 | - 'for_each_node_by_name' 337 | - 'for_each_node_by_type' 338 | - 'for_each_node_mask' 339 | - 'for_each_node_state' 340 | - 'for_each_node_with_cpus' 341 | - 'for_each_node_with_property' 342 | - 'for_each_nonreserved_multicast_dest_pgid' 343 | - 'for_each_numa_hop_mask' 344 | - 'for_each_of_allnodes' 345 | - 'for_each_of_allnodes_from' 346 | - 'for_each_of_cpu_node' 347 | - 'for_each_of_pci_range' 348 | - 'for_each_old_connector_in_state' 349 | - 'for_each_old_crtc_in_state' 350 | - 'for_each_old_mst_mgr_in_state' 351 | - 'for_each_old_plane_in_state' 352 | - 'for_each_old_private_obj_in_state' 353 | - 'for_each_oldnew_connector_in_state' 354 | - 'for_each_oldnew_crtc_in_state' 355 | - 'for_each_oldnew_mst_mgr_in_state' 356 | - 'for_each_oldnew_plane_in_state' 357 | - 'for_each_oldnew_plane_in_state_reverse' 358 | - 'for_each_oldnew_private_obj_in_state' 359 | - 'for_each_online_cpu' 360 | - 'for_each_online_node' 361 | - 'for_each_online_pgdat' 362 | - 'for_each_or_bit' 363 | - 'for_each_path' 364 | - 'for_each_pci_bridge' 365 | - 'for_each_pci_dev' 366 | - 'for_each_pcm_streams' 367 | - 'for_each_physmem_range' 368 | - 'for_each_populated_zone' 369 | - 'for_each_possible_cpu' 370 | - 'for_each_present_blessed_reg' 371 | - 'for_each_present_cpu' 372 | - 'for_each_prime_number' 373 | - 'for_each_prime_number_from' 374 | - 'for_each_probe_cache_entry' 375 | - 'for_each_process' 376 | - 'for_each_process_thread' 377 | - 'for_each_prop_codec_conf' 378 | - 'for_each_prop_dai_codec' 379 | - 'for_each_prop_dai_cpu' 380 | - 'for_each_prop_dlc_codecs' 381 | - 'for_each_prop_dlc_cpus' 382 | - 'for_each_prop_dlc_platforms' 383 | - 'for_each_property_of_node' 384 | - 'for_each_reg' 385 | - 'for_each_reg_filtered' 386 | - 'for_each_reloc' 387 | - 'for_each_reloc_from' 388 | - 'for_each_requested_gpio' 389 | - 'for_each_requested_gpio_in_range' 390 | - 'for_each_reserved_mem_range' 391 | - 'for_each_reserved_mem_region' 392 | - 'for_each_rtd_codec_dais' 393 | - 'for_each_rtd_components' 394 | - 'for_each_rtd_cpu_dais' 395 | - 'for_each_rtd_dais' 396 | - 'for_each_sband_iftype_data' 397 | - 'for_each_script' 398 | - 'for_each_sec' 399 | - 'for_each_set_bit' 400 | - 'for_each_set_bit_from' 401 | - 'for_each_set_bit_wrap' 402 | - 'for_each_set_bitrange' 403 | - 'for_each_set_bitrange_from' 404 | - 'for_each_set_clump8' 405 | - 'for_each_sg' 406 | - 'for_each_sg_dma_page' 407 | - 'for_each_sg_page' 408 | - 'for_each_sgtable_dma_page' 409 | - 'for_each_sgtable_dma_sg' 410 | - 'for_each_sgtable_page' 411 | - 'for_each_sgtable_sg' 412 | - 'for_each_sibling_event' 413 | - 'for_each_sta_active_link' 414 | - 'for_each_subelement' 415 | - 'for_each_subelement_extid' 416 | - 'for_each_subelement_id' 417 | - 'for_each_sublist' 418 | - 'for_each_subsystem' 419 | - 'for_each_supported_activate_fn' 420 | - 'for_each_supported_inject_fn' 421 | - 'for_each_sym' 422 | - 'for_each_test' 423 | - 'for_each_thread' 424 | - 'for_each_token' 425 | - 'for_each_unicast_dest_pgid' 426 | - 'for_each_valid_link' 427 | - 'for_each_vif_active_link' 428 | - 'for_each_vma' 429 | - 'for_each_vma_range' 430 | - 'for_each_vsi' 431 | - 'for_each_wakeup_source' 432 | - 'for_each_zone' 433 | - 'for_each_zone_zonelist' 434 | - 'for_each_zone_zonelist_nodemask' 435 | - 'func_for_each_insn' 436 | - 'fwnode_for_each_available_child_node' 437 | - 'fwnode_for_each_child_node' 438 | - 'fwnode_for_each_parent_node' 439 | - 'fwnode_graph_for_each_endpoint' 440 | - 'gadget_for_each_ep' 441 | - 'genradix_for_each' 442 | - 'genradix_for_each_from' 443 | - 'genradix_for_each_reverse' 444 | - 'hash_for_each' 445 | - 'hash_for_each_possible' 446 | - 'hash_for_each_possible_rcu' 447 | - 'hash_for_each_possible_rcu_notrace' 448 | - 'hash_for_each_possible_safe' 449 | - 'hash_for_each_rcu' 450 | - 'hash_for_each_safe' 451 | - 'hashmap__for_each_entry' 452 | - 'hashmap__for_each_entry_safe' 453 | - 'hashmap__for_each_key_entry' 454 | - 'hashmap__for_each_key_entry_safe' 455 | - 'hctx_for_each_ctx' 456 | - 'hists__for_each_format' 457 | - 'hists__for_each_sort_list' 458 | - 'hlist_bl_for_each_entry' 459 | - 'hlist_bl_for_each_entry_rcu' 460 | - 'hlist_bl_for_each_entry_safe' 461 | - 'hlist_for_each' 462 | - 'hlist_for_each_entry' 463 | - 'hlist_for_each_entry_continue' 464 | - 'hlist_for_each_entry_continue_rcu' 465 | - 'hlist_for_each_entry_continue_rcu_bh' 466 | - 'hlist_for_each_entry_from' 467 | - 'hlist_for_each_entry_from_rcu' 468 | - 'hlist_for_each_entry_rcu' 469 | - 'hlist_for_each_entry_rcu_bh' 470 | - 'hlist_for_each_entry_rcu_notrace' 471 | - 'hlist_for_each_entry_safe' 472 | - 'hlist_for_each_entry_srcu' 473 | - 'hlist_for_each_safe' 474 | - 'hlist_nulls_for_each_entry' 475 | - 'hlist_nulls_for_each_entry_from' 476 | - 'hlist_nulls_for_each_entry_rcu' 477 | - 'hlist_nulls_for_each_entry_safe' 478 | - 'i3c_bus_for_each_i2cdev' 479 | - 'i3c_bus_for_each_i3cdev' 480 | - 'idr_for_each_entry' 481 | - 'idr_for_each_entry_continue' 482 | - 'idr_for_each_entry_continue_ul' 483 | - 'idr_for_each_entry_ul' 484 | - 'in_dev_for_each_ifa_rcu' 485 | - 'in_dev_for_each_ifa_rtnl' 486 | - 'inet_bind_bucket_for_each' 487 | - 'interval_tree_for_each_span' 488 | - 'intlist__for_each_entry' 489 | - 'intlist__for_each_entry_safe' 490 | - 'kcore_copy__for_each_phdr' 491 | - 'key_for_each' 492 | - 'key_for_each_safe' 493 | - 'klp_for_each_func' 494 | - 'klp_for_each_func_safe' 495 | - 'klp_for_each_func_static' 496 | - 'klp_for_each_object' 497 | - 'klp_for_each_object_safe' 498 | - 'klp_for_each_object_static' 499 | - 'kunit_suite_for_each_test_case' 500 | - 'kvm_for_each_memslot' 501 | - 'kvm_for_each_memslot_in_gfn_range' 502 | - 'kvm_for_each_vcpu' 503 | - 'libbpf_nla_for_each_attr' 504 | - 'list_for_each' 505 | - 'list_for_each_codec' 506 | - 'list_for_each_codec_safe' 507 | - 'list_for_each_continue' 508 | - 'list_for_each_entry' 509 | - 'list_for_each_entry_continue' 510 | - 'list_for_each_entry_continue_rcu' 511 | - 'list_for_each_entry_continue_reverse' 512 | - 'list_for_each_entry_from' 513 | - 'list_for_each_entry_from_rcu' 514 | - 'list_for_each_entry_from_reverse' 515 | - 'list_for_each_entry_lockless' 516 | - 'list_for_each_entry_rcu' 517 | - 'list_for_each_entry_reverse' 518 | - 'list_for_each_entry_safe' 519 | - 'list_for_each_entry_safe_continue' 520 | - 'list_for_each_entry_safe_from' 521 | - 'list_for_each_entry_safe_reverse' 522 | - 'list_for_each_entry_srcu' 523 | - 'list_for_each_from' 524 | - 'list_for_each_prev' 525 | - 'list_for_each_prev_safe' 526 | - 'list_for_each_rcu' 527 | - 'list_for_each_reverse' 528 | - 'list_for_each_safe' 529 | - 'llist_for_each' 530 | - 'llist_for_each_entry' 531 | - 'llist_for_each_entry_safe' 532 | - 'llist_for_each_safe' 533 | - 'lwq_for_each_safe' 534 | - 'map__for_each_symbol' 535 | - 'map__for_each_symbol_by_name' 536 | - 'maps__for_each_entry' 537 | - 'maps__for_each_entry_safe' 538 | - 'mas_for_each' 539 | - 'mci_for_each_dimm' 540 | - 'media_device_for_each_entity' 541 | - 'media_device_for_each_intf' 542 | - 'media_device_for_each_link' 543 | - 'media_device_for_each_pad' 544 | - 'media_entity_for_each_pad' 545 | - 'media_pipeline_for_each_entity' 546 | - 'media_pipeline_for_each_pad' 547 | - 'mlx5_lag_for_each_peer_mdev' 548 | - 'msi_domain_for_each_desc' 549 | - 'msi_for_each_desc' 550 | - 'mt_for_each' 551 | - 'nanddev_io_for_each_page' 552 | - 'netdev_for_each_lower_dev' 553 | - 'netdev_for_each_lower_private' 554 | - 'netdev_for_each_lower_private_rcu' 555 | - 'netdev_for_each_mc_addr' 556 | - 'netdev_for_each_synced_mc_addr' 557 | - 'netdev_for_each_synced_uc_addr' 558 | - 'netdev_for_each_uc_addr' 559 | - 'netdev_for_each_upper_dev_rcu' 560 | - 'netdev_hw_addr_list_for_each' 561 | - 'nft_rule_for_each_expr' 562 | - 'nla_for_each_attr' 563 | - 'nla_for_each_nested' 564 | - 'nlmsg_for_each_attr' 565 | - 'nlmsg_for_each_msg' 566 | - 'nr_neigh_for_each' 567 | - 'nr_neigh_for_each_safe' 568 | - 'nr_node_for_each' 569 | - 'nr_node_for_each_safe' 570 | - 'of_for_each_phandle' 571 | - 'of_property_for_each_string' 572 | - 'of_property_for_each_u32' 573 | - 'pci_bus_for_each_resource' 574 | - 'pci_dev_for_each_resource' 575 | - 'pcl_for_each_chunk' 576 | - 'pcl_for_each_segment' 577 | - 'pcm_for_each_format' 578 | - 'perf_config_items__for_each_entry' 579 | - 'perf_config_sections__for_each_entry' 580 | - 'perf_config_set__for_each_entry' 581 | - 'perf_cpu_map__for_each_cpu' 582 | - 'perf_cpu_map__for_each_idx' 583 | - 'perf_evlist__for_each_entry' 584 | - 'perf_evlist__for_each_entry_reverse' 585 | - 'perf_evlist__for_each_entry_safe' 586 | - 'perf_evlist__for_each_evsel' 587 | - 'perf_evlist__for_each_mmap' 588 | - 'perf_hpp_list__for_each_format' 589 | - 'perf_hpp_list__for_each_format_safe' 590 | - 'perf_hpp_list__for_each_sort_list' 591 | - 'perf_hpp_list__for_each_sort_list_safe' 592 | - 'perf_tool_event__for_each_event' 593 | - 'plist_for_each' 594 | - 'plist_for_each_continue' 595 | - 'plist_for_each_entry' 596 | - 'plist_for_each_entry_continue' 597 | - 'plist_for_each_entry_safe' 598 | - 'plist_for_each_safe' 599 | - 'pnp_for_each_card' 600 | - 'pnp_for_each_dev' 601 | - 'protocol_for_each_card' 602 | - 'protocol_for_each_dev' 603 | - 'queue_for_each_hw_ctx' 604 | - 'radix_tree_for_each_slot' 605 | - 'radix_tree_for_each_tagged' 606 | - 'rb_for_each' 607 | - 'rbtree_postorder_for_each_entry_safe' 608 | - 'rdma_for_each_block' 609 | - 'rdma_for_each_port' 610 | - 'rdma_umem_for_each_dma_block' 611 | - 'resort_rb__for_each_entry' 612 | - 'resource_list_for_each_entry' 613 | - 'resource_list_for_each_entry_safe' 614 | - 'rhl_for_each_entry_rcu' 615 | - 'rhl_for_each_rcu' 616 | - 'rht_for_each' 617 | - 'rht_for_each_entry' 618 | - 'rht_for_each_entry_from' 619 | - 'rht_for_each_entry_rcu' 620 | - 'rht_for_each_entry_rcu_from' 621 | - 'rht_for_each_entry_safe' 622 | - 'rht_for_each_from' 623 | - 'rht_for_each_rcu' 624 | - 'rht_for_each_rcu_from' 625 | - 'rq_for_each_bvec' 626 | - 'rq_for_each_segment' 627 | - 'rq_list_for_each' 628 | - 'rq_list_for_each_safe' 629 | - 'sample_read_group__for_each' 630 | - 'scsi_for_each_prot_sg' 631 | - 'scsi_for_each_sg' 632 | - 'sctp_for_each_hentry' 633 | - 'sctp_skb_for_each' 634 | - 'sec_for_each_insn' 635 | - 'sec_for_each_insn_continue' 636 | - 'sec_for_each_insn_from' 637 | - 'sec_for_each_sym' 638 | - 'shdma_for_each_chan' 639 | - 'shost_for_each_device' 640 | - 'sk_for_each' 641 | - 'sk_for_each_bound' 642 | - 'sk_for_each_bound_bhash2' 643 | - 'sk_for_each_entry_offset_rcu' 644 | - 'sk_for_each_from' 645 | - 'sk_for_each_rcu' 646 | - 'sk_for_each_safe' 647 | - 'sk_nulls_for_each' 648 | - 'sk_nulls_for_each_from' 649 | - 'sk_nulls_for_each_rcu' 650 | - 'snd_array_for_each' 651 | - 'snd_pcm_group_for_each_entry' 652 | - 'snd_soc_dapm_widget_for_each_path' 653 | - 'snd_soc_dapm_widget_for_each_path_safe' 654 | - 'snd_soc_dapm_widget_for_each_sink_path' 655 | - 'snd_soc_dapm_widget_for_each_source_path' 656 | - 'strlist__for_each_entry' 657 | - 'strlist__for_each_entry_safe' 658 | - 'sym_for_each_insn' 659 | - 'sym_for_each_insn_continue_reverse' 660 | - 'symbols__for_each_entry' 661 | - 'tb_property_for_each' 662 | - 'tcf_act_for_each_action' 663 | - 'tcf_exts_for_each_action' 664 | - 'ttm_resource_manager_for_each_res' 665 | - 'twsk_for_each_bound_bhash2' 666 | - 'udp_portaddr_for_each_entry' 667 | - 'udp_portaddr_for_each_entry_rcu' 668 | - 'usb_hub_for_each_child' 669 | - 'v4l2_device_for_each_subdev' 670 | - 'v4l2_m2m_for_each_dst_buf' 671 | - 'v4l2_m2m_for_each_dst_buf_safe' 672 | - 'v4l2_m2m_for_each_src_buf' 673 | - 'v4l2_m2m_for_each_src_buf_safe' 674 | - 'virtio_device_for_each_vq' 675 | - 'while_for_each_ftrace_op' 676 | - 'xa_for_each' 677 | - 'xa_for_each_marked' 678 | - 'xa_for_each_range' 679 | - 'xa_for_each_start' 680 | - 'xas_for_each' 681 | - 'xas_for_each_conflict' 682 | - 'xas_for_each_marked' 683 | - 'xbc_array_for_each_value' 684 | - 'xbc_for_each_key_value' 685 | - 'xbc_node_for_each_array_value' 686 | - 'xbc_node_for_each_child' 687 | - 'xbc_node_for_each_key_value' 688 | - 'xbc_node_for_each_subkey' 689 | - 'zorro_for_each_dev' 690 | 691 | IncludeBlocks: Preserve 692 | IncludeCategories: 693 | - Regex: '.*' 694 | Priority: 1 695 | IncludeIsMainRegex: '(Test)?$' 696 | IndentCaseLabels: false 697 | IndentGotoLabels: false 698 | IndentPPDirectives: None 699 | IndentWidth: 8 700 | IndentWrappedFunctionNames: false 701 | JavaScriptQuotes: Leave 702 | JavaScriptWrapImports: true 703 | KeepEmptyLinesAtTheStartOfBlocks: false 704 | MacroBlockBegin: '' 705 | MacroBlockEnd: '' 706 | MaxEmptyLinesToKeep: 1 707 | NamespaceIndentation: None 708 | ObjCBinPackProtocolList: Auto 709 | ObjCBlockIndentWidth: 8 710 | ObjCSpaceAfterProperty: true 711 | ObjCSpaceBeforeProtocolList: true 712 | 713 | # Taken from git's rules 714 | PenaltyBreakAssignment: 10 715 | PenaltyBreakBeforeFirstCallParameter: 30 716 | PenaltyBreakComment: 10 717 | PenaltyBreakFirstLessLess: 0 718 | PenaltyBreakString: 10 719 | PenaltyExcessCharacter: 100 720 | PenaltyReturnTypeOnItsOwnLine: 60 721 | 722 | PointerAlignment: Right 723 | ReflowComments: false 724 | SortIncludes: false 725 | SortUsingDeclarations: false 726 | SpaceAfterCStyleCast: false 727 | SpaceAfterTemplateKeyword: true 728 | SpaceBeforeAssignmentOperators: true 729 | SpaceBeforeCtorInitializerColon: true 730 | SpaceBeforeInheritanceColon: true 731 | SpaceBeforeParens: ControlStatementsExceptForEachMacros 732 | SpaceBeforeRangeBasedForLoopColon: true 733 | SpaceInEmptyParentheses: false 734 | SpacesBeforeTrailingComments: 1 735 | SpacesInAngles: false 736 | SpacesInContainerLiterals: false 737 | SpacesInCStyleCastParentheses: false 738 | SpacesInParentheses: false 739 | SpacesInSquareBrackets: false 740 | Standard: Cpp03 741 | TabWidth: 8 742 | UseTab: Always 743 | ... 744 | -------------------------------------------------------------------------------- /src/bpf/.gitignore: -------------------------------------------------------------------------------- 1 | *.skel.h 2 | *.skel.rs 3 | vmlinux.h 4 | compile_commands.json 5 | -------------------------------------------------------------------------------- /src/bpf/Makefile: -------------------------------------------------------------------------------- 1 | CLANG ?= clang 2 | BPFTOOL ?= bpftool 3 | ARCH := x86 4 | 5 | CFLAGS := -O2 -g --target=bpf -Wno-compare-distinct-pointer-types 6 | CFLAGS += -D__TARGET_ARCH_$(ARCH) 7 | INCLUDES := -Iinclude -I. -I../../../libbpf/src/root/usr/include 8 | 9 | all: gpuprobe.bpf.o gpuprobe.skel.h 10 | 11 | %.bpf.o: %.bpf.c vmlinux.h 12 | @$(CLANG) $(CFLAGS) $(INCLUDES) -c $< -o $@ 13 | 14 | gpuprobe.skel.h: gpuprobe.bpf.o 15 | @$(BPFTOOL) gen skeleton gpuprobe.bpf.o name gpuprobe > gpuprobe.skel.h 16 | 17 | vmlinux.h: 18 | @$(BPFTOOL) btf dump file /sys/kernel/btf/vmlinux format c > vmlinux.h 19 | 20 | # .PHONY: clean 21 | clean: 22 | @rm -rf *.bpf.o *.skel.h 23 | -------------------------------------------------------------------------------- /src/bpf/gpuprobe.bpf.c: -------------------------------------------------------------------------------- 1 | #include "vmlinux.h" 2 | #include 3 | #include 4 | #include 5 | 6 | enum memleak_event_t { 7 | CUDA_MALLOC = 0, 8 | CUDA_FREE, 9 | }; 10 | 11 | /** 12 | * Wraps the arguments passed to `cudaMalloc` or `cudaFree`, and return code, 13 | * and some metadata 14 | */ 15 | struct memleak_event { 16 | __u64 start; 17 | __u64 end; 18 | __u64 device_addr; 19 | __u64 size; 20 | __u32 pid; 21 | __s32 ret; 22 | enum memleak_event_t event_type; 23 | }; 24 | 25 | /** 26 | * Several required data and metadata fields of a memleak event can only be 27 | * read from the initial uprobe, but are needed in order to emit events from 28 | * the uretprobe on return. We map pid to the started event, which is then 29 | * read and cleared from the uretprobe. This works under the assumption that 30 | * only one instance of either `cudaMalloc` or `cudaFree` is being executed at 31 | * a time per process. 32 | */ 33 | struct { 34 | __uint(type, BPF_MAP_TYPE_HASH); 35 | __type(key, __u32); 36 | __type(value, struct memleak_event); 37 | __uint(max_entries, 1024); 38 | } memleak_pid_to_event SEC(".maps"); 39 | 40 | struct { 41 | __uint(type, BPF_MAP_TYPE_HASH); 42 | __type(key, __u32); 43 | __type(value, __u64); 44 | __uint(max_entries, 1024); 45 | } memleak_pid_to_dev_ptr SEC(".maps"); 46 | 47 | /** 48 | * Queue of memleak events that are updated from eBPF space, then dequeued 49 | * and processed from userspace by the GPUprobe daemon. 50 | */ 51 | struct { 52 | __uint(type, BPF_MAP_TYPE_QUEUE); 53 | __uint(key_size, 0); 54 | __type(value, struct memleak_event); 55 | __uint(max_entries, 1024); 56 | } memleak_events_queue SEC(".maps"); 57 | 58 | /// uprobe triggered by a call to `cudaMalloc` 59 | SEC("uprobe/cudaMalloc") 60 | int memleak_cuda_malloc(struct pt_regs *ctx) 61 | { 62 | struct memleak_event e = { 0 }; 63 | __u64 dev_ptr; 64 | __u32 pid, key0 = 0; 65 | 66 | e.size = (__u64)PT_REGS_PARM2(ctx); 67 | dev_ptr = (__u64) PT_REGS_PARM1(ctx); 68 | pid = (__u32)bpf_get_current_pid_tgid(); 69 | 70 | e.event_type = CUDA_MALLOC; 71 | e.start = bpf_ktime_get_ns(); 72 | e.pid = pid; 73 | 74 | if (bpf_map_update_elem(&memleak_pid_to_event, &pid, &e, 0)) { 75 | return -1; 76 | } 77 | 78 | return bpf_map_update_elem(&memleak_pid_to_dev_ptr, &pid, &dev_ptr, 0); 79 | } 80 | 81 | /// uretprobe triggered when `cudaMalloc` returns 82 | SEC("uretprobe/cudaMalloc") 83 | int memleak_cuda_malloc_ret(struct pt_regs *ctx) 84 | { 85 | __s32 cuda_malloc_ret; 86 | __u32 pid; 87 | struct memleak_event *e; 88 | __u64 dev_ptr, map_ptr; 89 | 90 | cuda_malloc_ret = (__s32)PT_REGS_RC(ctx); 91 | pid = (__u32)bpf_get_current_pid_tgid(); 92 | 93 | e = bpf_map_lookup_elem(&memleak_pid_to_event, &pid); 94 | if (!e) { 95 | return -1; 96 | } 97 | 98 | e->ret = cuda_malloc_ret; 99 | 100 | // lookup the value of `devPtr` passed to `cudaMalloc` by this process 101 | map_ptr = (__u64)bpf_map_lookup_elem(&memleak_pid_to_dev_ptr, &pid); 102 | if (!map_ptr) { 103 | return -1; 104 | } 105 | dev_ptr = *(__u64*)map_ptr; 106 | 107 | // read the value copied into `*devPtr` by `cudaMalloc` from userspace 108 | if (bpf_probe_read_user(&e->device_addr, sizeof(void *), (void*)dev_ptr)) { 109 | return -1; 110 | } 111 | 112 | e->end = bpf_ktime_get_ns(); 113 | 114 | return bpf_map_push_elem(&memleak_events_queue, e, 0); 115 | } 116 | 117 | /// uprobe triggered by a call to `cudaFree` 118 | SEC("uprobe/cudaFree") 119 | int trace_cuda_free(struct pt_regs *ctx) 120 | { 121 | struct memleak_event e = { 0 }; 122 | 123 | e.event_type = CUDA_FREE; 124 | e.pid = (u32)bpf_get_current_pid_tgid(); 125 | e.start = bpf_ktime_get_ns(); 126 | e.device_addr = (__u64)PT_REGS_PARM1(ctx); 127 | 128 | return bpf_map_update_elem(&memleak_pid_to_event, &e.pid, &e, 0); 129 | } 130 | 131 | /// uretprobe triggered when `cudaFree` returns 132 | SEC("uretprobe/cudaFree") 133 | int trace_cuda_free_ret(struct pt_regs *ctx) 134 | { 135 | __s32 cuda_free_ret; 136 | __u32 pid; 137 | struct memleak_event *e; 138 | 139 | pid = (__u32)bpf_get_current_pid_tgid(); 140 | 141 | e = (struct memleak_event *)bpf_map_lookup_elem(&memleak_pid_to_event, 142 | &pid); 143 | if (!e) { 144 | return -1; 145 | } 146 | 147 | e->end = bpf_ktime_get_ns(); 148 | e->ret = PT_REGS_RC(ctx); 149 | 150 | return bpf_map_push_elem(&memleak_events_queue, e, 0); 151 | } 152 | 153 | struct kernel_launch_event { 154 | __u64 timestamp; 155 | __u64 kern_offset; 156 | __u32 pid; 157 | }; 158 | 159 | struct { 160 | __uint(type, BPF_MAP_TYPE_QUEUE); 161 | __uint(key_size, 0); 162 | __type(value, struct kernel_launch_event); 163 | __uint(max_entries, 10240); 164 | } kernel_launch_events_queue SEC(".maps"); 165 | 166 | SEC("uprobe/cudaKernelLaunch") 167 | int trace_cuda_launch_kernel(struct pt_regs *ctx) 168 | { 169 | struct kernel_launch_event e; 170 | void *kern_offset; 171 | 172 | e.timestamp = bpf_ktime_get_ns(); 173 | e.kern_offset = (__u64)PT_REGS_PARM1(ctx); 174 | e.pid = (__u32)bpf_get_current_pid_tgid(); 175 | 176 | return bpf_map_push_elem(&kernel_launch_events_queue, &e, 0); 177 | } 178 | 179 | /** 180 | * redefinition of `enum cudaMemcpyKind` in driver_types.h. 181 | */ 182 | enum memcpy_kind { 183 | D2D = 0, // device to device 184 | D2H = 1, // device to host 185 | H2D = 2, // host to device 186 | H2H = 3, // host to host 187 | DEFAULT = 4, // inferred from pointer type at runtime 188 | }; 189 | 190 | struct cuda_memcpy { 191 | __u64 start_time; 192 | __u64 end_time; 193 | __u64 dst; 194 | __u64 src; 195 | __u64 count; 196 | enum memcpy_kind kind; 197 | }; 198 | 199 | /** 200 | * Maps a pid to an information on an incomplete cudaMemcpy call. This is 201 | * needed because we cannot access the input arguments inside of the uretprobe. 202 | */ 203 | struct { 204 | __uint(type, BPF_MAP_TYPE_HASH); 205 | __type(key, __u32); 206 | __type(value, struct cuda_memcpy); 207 | __uint(max_entries, 10240); 208 | } pid_to_memcpy SEC(".maps"); 209 | 210 | /** 211 | * Queue of successful cudaMemcpy calls to be processed from userspace. 212 | */ 213 | struct { 214 | __uint(type, BPF_MAP_TYPE_QUEUE); 215 | __uint(key_size, 0); 216 | __uint(value_size, sizeof(struct cuda_memcpy)); 217 | __uint(max_entries, 10240); 218 | } successful_cuda_memcpy_q SEC(".maps"); 219 | 220 | /** 221 | * This function exhibits synchronous behavior in MOST cases as specified by 222 | * Nvidia documentation. It is under the assumption that this call is 223 | * synchronous that we compute the average memory bandwidth of a transfer as: 224 | * avg_throughput = count / (end - start) 225 | */ 226 | SEC("uprobe/cudaMemcpy") 227 | int trace_cuda_memcpy(struct pt_regs *ctx) 228 | { 229 | __u64 dst = PT_REGS_PARM1(ctx); 230 | __u64 src = PT_REGS_PARM2(ctx); 231 | __u64 count = PT_REGS_PARM3(ctx); 232 | enum memcpy_kind kind = PT_REGS_PARM4(ctx); 233 | __u32 pid = (__u32)bpf_get_current_pid_tgid(); 234 | 235 | /* no host-side synchronization is performed in the D2D case - as a result, 236 | * we cannot compute average throughput using information available from 237 | * this uprobe. If the DEFAULT argument is passed, we cannot make any 238 | * assumption on the direction of the transfer */ 239 | if (kind == D2D || kind == DEFAULT) 240 | return 0; 241 | 242 | struct cuda_memcpy in_progress_memcpy = { .start_time = 243 | bpf_ktime_get_ns(), 244 | .dst = dst, 245 | .src = src, 246 | .count = count, 247 | .kind = kind }; 248 | 249 | if (bpf_map_update_elem(&pid_to_memcpy, &pid, &in_progress_memcpy, 0)) { 250 | return -1; 251 | } 252 | 253 | return 0; 254 | } 255 | 256 | SEC("uretprobe/cudaMemcpy") 257 | int trace_cuda_memcpy_ret(struct pt_regs *ctx) 258 | { 259 | __u32 ret = PT_REGS_RC(ctx); 260 | __u32 pid = (__u32)bpf_get_current_pid_tgid(); 261 | struct cuda_memcpy *exited_memcpy; 262 | 263 | if (ret) { 264 | return -1; 265 | } 266 | 267 | exited_memcpy = 268 | (struct cuda_memcpy *)bpf_map_lookup_elem(&pid_to_memcpy, &pid); 269 | if (!exited_memcpy) { 270 | return -1; 271 | } 272 | 273 | if (bpf_map_delete_elem(&pid_to_memcpy, &pid)) { 274 | return -1; 275 | } 276 | 277 | exited_memcpy->end_time = bpf_ktime_get_ns(); 278 | if (bpf_map_push_elem(&successful_cuda_memcpy_q, exited_memcpy, 0)) { 279 | return -1; 280 | } 281 | 282 | return 0; 283 | } 284 | 285 | char LICENSE[] SEC("license") = "GPL"; 286 | -------------------------------------------------------------------------------- /src/gpuprobe/cuda_error.rs: -------------------------------------------------------------------------------- 1 | use std::collections::HashMap; 2 | 3 | use super::GpuprobeError; 4 | 5 | /// Defines a subset of the enum values `enum cudaError_t` found in 6 | /// driver_types.h 7 | #[repr(i32)] 8 | #[derive(std::cmp::PartialEq, std::cmp::Eq, std::hash::Hash, Clone, Copy, Debug)] 9 | pub enum CudaErrorT { 10 | CudaSuccess, 11 | CudaErrorInvalidValue, 12 | CudaErrorMemoryAllocation, 13 | UnsupportedErrorType, 14 | } 15 | 16 | impl CudaErrorT { 17 | pub fn from_int(value: i32) -> Self { 18 | match value { 19 | 0 => CudaErrorT::CudaSuccess, 20 | 1 => CudaErrorT::CudaErrorInvalidValue, 21 | 2 => CudaErrorT::CudaErrorMemoryAllocation, 22 | _ => CudaErrorT::UnsupportedErrorType, 23 | } 24 | } 25 | } 26 | 27 | #[derive(std::cmp::PartialEq, std::cmp::Eq, std::hash::Hash, Clone, Copy, Debug)] 28 | pub enum EventType { 29 | CudaMalloc, 30 | CudaFree, 31 | } 32 | 33 | impl ToString for EventType { 34 | fn to_string(&self) -> String { 35 | match self { 36 | Self::CudaMalloc => "cudaMalloc", 37 | Self::CudaFree => "cudaFree", 38 | } 39 | .to_string() 40 | } 41 | } 42 | 43 | pub struct CudaError { 44 | pub pid: u32, 45 | pub event: EventType, 46 | pub error: CudaErrorT, 47 | } 48 | 49 | /// Maintains per-process error histograms 50 | pub struct CudaErrorState { 51 | pub error_histogram: HashMap>, 52 | } 53 | 54 | impl CudaErrorState { 55 | pub fn new() -> Self { 56 | CudaErrorState { 57 | error_histogram: HashMap::new(), 58 | } 59 | } 60 | 61 | pub fn insert(&mut self, err: CudaError) -> Result<(), GpuprobeError> { 62 | if !self.error_histogram.contains_key(&err.pid) { 63 | self.error_histogram.insert(err.pid, HashMap::new()); 64 | } 65 | 66 | let hist = match self.error_histogram.get_mut(&err.pid) { 67 | Some(hist) => hist, 68 | None => panic!("no entry for {} in histogram", err.pid), 69 | }; 70 | 71 | let count_ref = match hist.get_mut(&(err.event, err.error)) { 72 | Some(r) => r, 73 | None => { 74 | hist.insert((err.event, err.error), 1); 75 | return Ok(()); 76 | } 77 | }; 78 | *count_ref += 1; 79 | Ok(()) 80 | } 81 | } 82 | 83 | impl std::fmt::Display for CudaErrorState { 84 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 85 | writeln!(f, "per-process error histograms:")?; 86 | 87 | if self.error_histogram.is_empty() { 88 | println!("\tNo errors to report"); 89 | } else { 90 | for (pid, hash_map) in self.error_histogram.iter() { 91 | writeln!(f, "process {}", pid)?; 92 | for ((event, error), count) in hash_map { 93 | writeln!(f, "\t({}[{:?}]): {}", event.to_string(), error, count)?; 94 | } 95 | } 96 | } 97 | 98 | Ok(()) 99 | } 100 | } 101 | -------------------------------------------------------------------------------- /src/gpuprobe/gpuprobe_bandwidth_util.rs: -------------------------------------------------------------------------------- 1 | mod gpuprobe { 2 | include!(concat!( 3 | env!("CARGO_MANIFEST_DIR"), 4 | "/src/bpf/gpuprobe.skel.rs" 5 | )); 6 | } 7 | 8 | use libbpf_rs::{MapCore, UprobeOpts}; 9 | 10 | use super::uprobe_data::BandwidthUtilData; 11 | use super::{Gpuprobe, GpuprobeError}; 12 | 13 | impl Gpuprobe { 14 | /// attaches uprobes for the bandwidth util program, or returns an error on 15 | /// failure 16 | pub fn attach_bandwidth_util_uprobes(&mut self) -> Result<(), GpuprobeError> { 17 | let opts_memcpy = UprobeOpts { 18 | func_name: "cudaMemcpy".to_string(), 19 | retprobe: false, 20 | ..Default::default() 21 | }; 22 | 23 | let opts_memcpy_ret = UprobeOpts { 24 | func_name: "cudaMemcpy".to_string(), 25 | retprobe: true, 26 | ..Default::default() 27 | }; 28 | 29 | let cuda_memcpy_uprobe_link = self 30 | .skel 31 | .skel 32 | .progs 33 | .trace_cuda_memcpy 34 | .attach_uprobe_with_opts(-1, &self.opts.libcudart_path, 0, opts_memcpy) 35 | .map_err(|_| GpuprobeError::AttachError)?; 36 | 37 | let cuda_memcpy_uretprobe_link = self 38 | .skel 39 | .skel 40 | .progs 41 | .trace_cuda_memcpy_ret 42 | .attach_uprobe_with_opts(-1, &self.opts.libcudart_path, 0, opts_memcpy_ret) 43 | .map_err(|_| GpuprobeError::AttachError)?; 44 | 45 | self.links.links.trace_cuda_memcpy = Some(cuda_memcpy_uprobe_link); 46 | self.links.links.trace_cuda_memcpy_ret = Some(cuda_memcpy_uretprobe_link); 47 | Ok(()) 48 | } 49 | 50 | /// Copies all cudaMemcpy calls out of the queue and returns them as a Vec, 51 | /// or returns a GpuProbeError on failure 52 | pub fn collect_data_bandwidth_util(&self) -> Result { 53 | let mut output: Vec = Vec::new(); 54 | let key: [u8; 0] = []; // key size must be zero for BPF_MAP_TYPE_QUEUE 55 | // `lookup_and_delete` calls. 56 | 57 | while let Ok(opt) = self 58 | .skel 59 | .skel 60 | .maps 61 | .successful_cuda_memcpy_q 62 | .lookup_and_delete(&key) 63 | { 64 | match opt { 65 | Some(bytes) => match CudaMemcpy::from_bytes(&bytes) { 66 | Some(valid_instance) => output.push(valid_instance), 67 | None => { 68 | return Err(GpuprobeError::RuntimeError( 69 | "alloc conversion failure".to_string(), 70 | )) 71 | } 72 | }, 73 | None => { 74 | // This case suggests that a queue entry has no data. If 75 | // this occurs, it indicates a problem with the eBPF 76 | // program, so we return a runtime error. 77 | return Err(GpuprobeError::RuntimeError( 78 | "Found None data for key during lookup".to_string(), 79 | )); 80 | } 81 | } 82 | } 83 | 84 | Ok(BandwidthUtilData { 85 | cuda_memcpys: output, 86 | }) 87 | } 88 | } 89 | 90 | pub struct CudaMemcpy { 91 | pub start_time: u64, 92 | pub end_time: u64, 93 | pub dst: u64, 94 | pub src: u64, 95 | pub count: u64, 96 | pub memcpy_kind: u32, 97 | } 98 | 99 | impl CudaMemcpy { 100 | /// Constructs a CudaMemcpy struct from a raw byte array and returns it, or 101 | /// None if the byte array is invalid. 102 | pub fn from_bytes(bytes: &[u8]) -> Option { 103 | if bytes.len() < std::mem::size_of::() { 104 | return None; 105 | } 106 | // This is safe if: 107 | // 1. The byte array contains valid data for this struct 108 | // 2. The byte array is at least as large as the struct 109 | unsafe { Some(std::ptr::read_unaligned(bytes.as_ptr() as *const Self)) } 110 | } 111 | 112 | /// Returns a human readable version of the `kind` parameter passed to 113 | /// `cudaMemcpy` 114 | pub fn kind_to_str(&self) -> String { 115 | match self.memcpy_kind { 116 | 0 => "H2H".to_string(), 117 | 1 => "H2D".to_string(), 118 | 2 => "D2H".to_string(), 119 | 3 => "D2D".to_string(), 120 | 4 => "DEF".to_string(), 121 | _ => "INVALID KIND".to_string(), 122 | } 123 | } 124 | 125 | pub fn compute_bandwidth_util(&self) -> Option { 126 | if self.start_time >= self.end_time { 127 | return None; 128 | } 129 | 130 | let delta = (self.end_time - self.start_time) as f64; 131 | let nanos_per_second = 1e9; 132 | let res = (self.count as f64) / delta * nanos_per_second; 133 | Some(res) 134 | } 135 | } 136 | 137 | impl std::fmt::Display for CudaMemcpy { 138 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 139 | writeln!(f, "{{")?; 140 | writeln!(f, "\tstart_time: {}", self.start_time)?; 141 | writeln!(f, "\tend_time: {}", self.end_time)?; 142 | writeln!(f, "\tdst: {:x}", self.dst)?; 143 | writeln!(f, "\tsrc: {:x}", self.dst)?; 144 | writeln!(f, "\tcount: {}", self.count)?; 145 | writeln!(f, "\tkind: {}", self.memcpy_kind)?; 146 | writeln!(f, "}}") 147 | } 148 | } 149 | -------------------------------------------------------------------------------- /src/gpuprobe/gpuprobe_cudatrace.rs: -------------------------------------------------------------------------------- 1 | mod gpuprobe { 2 | include!(concat!( 3 | env!("CARGO_MANIFEST_DIR"), 4 | "/src/bpf/gpuprobe.skel.rs" 5 | )); 6 | } 7 | 8 | use std::collections::{BTreeMap, HashMap}; 9 | 10 | use libbpf_rs::{MapCore, UprobeOpts}; 11 | 12 | use super::{Gpuprobe, GpuprobeError}; 13 | 14 | /// contains implementations for the cudatrace program 15 | impl Gpuprobe { 16 | /// attaches uprobes for the cudatrace program, or returns an error on 17 | /// failure 18 | pub fn attach_cudatrace_uprobes(&mut self) -> Result<(), GpuprobeError> { 19 | let opts_launch_kernel = UprobeOpts { 20 | func_name: "cudaLaunchKernel".to_string(), 21 | retprobe: false, 22 | ..Default::default() 23 | }; 24 | 25 | let cuda_launch_kernel_uprobe_link = self 26 | .skel 27 | .skel 28 | .progs 29 | .trace_cuda_launch_kernel 30 | .attach_uprobe_with_opts(-1, &self.opts.libcudart_path, 0, opts_launch_kernel) 31 | .map_err(|_| GpuprobeError::AttachError)?; 32 | 33 | self.links.links.trace_cuda_launch_kernel = Some(cuda_launch_kernel_uprobe_link); 34 | Ok(()) 35 | } 36 | 37 | /// Consumes from the cudatrace event queue and updates cudatrace_state 38 | pub fn consume_cudatrace_events(&mut self) -> Result<(), GpuprobeError> { 39 | let key: [u8; 0] = []; // key size must be zero for BPF_MAP_TYPE_QUEUE 40 | // `lookup_and_delete` calls. 41 | while let Ok(opt) = self 42 | .skel 43 | .skel 44 | .maps 45 | .kernel_launch_events_queue 46 | .lookup_and_delete(&key) 47 | { 48 | let event_bytes = match opt { 49 | Some(b) => b, 50 | None => { 51 | // empty queue 52 | return Ok(()); 53 | } 54 | }; 55 | let event = match KernelLaunchEvent::from_bytes(&event_bytes) { 56 | Some(e) => e, 57 | None => { 58 | return Err(GpuprobeError::RuntimeError( 59 | "unable to construct MemleakEvent from bytes".to_string(), 60 | )); 61 | } 62 | }; 63 | self.glob_process_table.create_entry(event.pid)?; 64 | self.cudatrace_state.handle_event(event)?; 65 | } 66 | 67 | Ok(()) 68 | } 69 | } 70 | 71 | /// Represents a CUDA kernel function address as it is found in the .text 72 | /// section of the binary running on the host. We distinguish between a raw 73 | /// unresolved address, and a resolved symbol 74 | pub enum KernelAddress { 75 | Raw(u64), 76 | Symbol(String), 77 | } 78 | 79 | impl std::fmt::Display for KernelAddress { 80 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 81 | match self { 82 | KernelAddress::Raw(addr) => write!(f, "0x{:x}", addr), 83 | KernelAddress::Symbol(symbol) => write!(f, "{symbol}"), 84 | } 85 | } 86 | } 87 | 88 | pub struct CudatraceState { 89 | /// maps PID to a frequency histogram 90 | pub kernel_freq_hist: HashMap>, 91 | pub total_kernel_launches: u64, 92 | } 93 | 94 | impl CudatraceState { 95 | pub fn new() -> Self { 96 | return CudatraceState { 97 | kernel_freq_hist: HashMap::new(), 98 | total_kernel_launches: 0u64, 99 | }; 100 | } 101 | 102 | fn handle_event(&mut self, data: KernelLaunchEvent) -> Result<(), GpuprobeError> { 103 | self.total_kernel_launches += 1; 104 | if !self.kernel_freq_hist.contains_key(&data.pid) { 105 | self.kernel_freq_hist.insert(data.pid, BTreeMap::new()); 106 | } 107 | 108 | let b_tree_map = self.kernel_freq_hist.get_mut(&data.pid).unwrap(); 109 | 110 | if !b_tree_map.contains_key(&data.kern_offset) { 111 | b_tree_map.insert(data.kern_offset, 1u64); 112 | } else { 113 | *b_tree_map.get_mut(&data.kern_offset).unwrap() += 1; 114 | } 115 | Ok(()) 116 | } 117 | } 118 | 119 | struct KernelLaunchEvent { 120 | timestamp: u64, 121 | kern_offset: u64, 122 | pid: u32, 123 | } 124 | 125 | impl KernelLaunchEvent { 126 | /// Constructs a KernelLaunchEvent struct from a raw byte array and returns 127 | /// it, or None if the byte array isn't correctly sized. 128 | pub fn from_bytes(bytes: &[u8]) -> Option { 129 | if bytes.len() < std::mem::size_of::() { 130 | return None; 131 | } 132 | // This is safe if: 133 | // 1. The byte array contains valid data for this struct 134 | // 2. The byte array is at least as large as the struct 135 | unsafe { Some(std::ptr::read_unaligned(bytes.as_ptr() as *const Self)) } 136 | } 137 | } 138 | -------------------------------------------------------------------------------- /src/gpuprobe/gpuprobe_memleak.rs: -------------------------------------------------------------------------------- 1 | use libbpf_rs::{MapCore, UprobeOpts}; 2 | 3 | use super::{ 4 | cuda_error::{CudaError, CudaErrorT}, 5 | Gpuprobe, GpuprobeError, 6 | }; 7 | use std::collections::{BTreeMap, HashMap, HashSet}; 8 | 9 | /// contains implementation for the memleak program 10 | impl Gpuprobe { 11 | /// attaches uprobes for the memleak program, or returns an error on 12 | /// failure 13 | pub fn attach_memleak_uprobes(&mut self) -> Result<(), GpuprobeError> { 14 | let opts_malloc = UprobeOpts { 15 | func_name: "cudaMalloc".to_string(), 16 | retprobe: false, 17 | ..Default::default() 18 | }; 19 | 20 | let opts_malloc_ret = UprobeOpts { 21 | func_name: "cudaMalloc".to_string(), 22 | retprobe: true, 23 | ..Default::default() 24 | }; 25 | 26 | let opts_free = UprobeOpts { 27 | func_name: "cudaFree".to_string(), 28 | retprobe: false, 29 | ..Default::default() 30 | }; 31 | 32 | let opts_free_ret = UprobeOpts { 33 | func_name: "cudaFree".to_string(), 34 | retprobe: true, 35 | ..Default::default() 36 | }; 37 | 38 | let cuda_malloc_uprobe_link = self 39 | .skel 40 | .skel 41 | .progs 42 | .memleak_cuda_malloc 43 | .attach_uprobe_with_opts(-1, &self.opts.libcudart_path, 0, opts_malloc) 44 | .map_err(|_| GpuprobeError::AttachError)?; 45 | 46 | let cuda_malloc_uretprobe_link = self 47 | .skel 48 | .skel 49 | .progs 50 | .memleak_cuda_malloc_ret 51 | .attach_uprobe_with_opts(-1, &self.opts.libcudart_path, 0, opts_malloc_ret) 52 | .map_err(|_| GpuprobeError::AttachError)?; 53 | 54 | let cuda_free_uprobe_link = self 55 | .skel 56 | .skel 57 | .progs 58 | .trace_cuda_free 59 | .attach_uprobe_with_opts(-1, &self.opts.libcudart_path, 0, opts_free) 60 | .map_err(|_| GpuprobeError::AttachError)?; 61 | 62 | let cuda_free_uretprobe_link = self 63 | .skel 64 | .skel 65 | .progs 66 | .trace_cuda_free_ret 67 | .attach_uprobe_with_opts(-1, &self.opts.libcudart_path, 0, opts_free_ret) 68 | .map_err(|_| GpuprobeError::AttachError)?; 69 | 70 | self.links.links.memleak_cuda_malloc = Some(cuda_malloc_uprobe_link); 71 | self.links.links.memleak_cuda_malloc_ret = Some(cuda_malloc_uretprobe_link); 72 | self.links.links.trace_cuda_free = Some(cuda_free_uprobe_link); 73 | self.links.links.trace_cuda_free_ret = Some(cuda_free_uretprobe_link); 74 | Ok(()) 75 | } 76 | 77 | /// Consumes from the memleak event queue and updates memleak_state 78 | pub fn consume_memleak_events(&mut self) -> Result<(), GpuprobeError> { 79 | let key: [u8; 0] = []; // key size must be zero for BPF_MAP_TYPE_QUEUE 80 | // `lookup_and_delete` calls. 81 | while let Ok(opt) = self 82 | .skel 83 | .skel 84 | .maps 85 | .memleak_events_queue 86 | .lookup_and_delete(&key) 87 | { 88 | let event_bytes = match opt { 89 | Some(b) => b, 90 | None => { 91 | return Ok(()); 92 | } 93 | }; 94 | let event = match MemleakEvent::from_bytes(&event_bytes) { 95 | Some(e) => e, 96 | None => { 97 | return Err(GpuprobeError::RuntimeError( 98 | "unable to construct MemleakEvent from bytes".to_string(), 99 | )); 100 | } 101 | }; 102 | 103 | match event.is_error() { 104 | false => { 105 | self.glob_process_table.create_entry(event.pid)?; 106 | self.memleak_state.handle_event(event)?; 107 | } 108 | true => { 109 | let err = CudaError { 110 | pid: event.pid, 111 | event: match event.event_type { 112 | 0 => super::cuda_error::EventType::CudaMalloc, 113 | _ => super::cuda_error::EventType::CudaFree, 114 | }, 115 | error: CudaErrorT::from_int(event.ret), 116 | }; 117 | self.err_state.insert(err)?; 118 | } 119 | } 120 | } 121 | Ok(()) 122 | } 123 | } 124 | 125 | pub struct MemleakState { 126 | pub memory_map: HashMap>, 127 | pub num_successful_mallocs: u64, 128 | pub num_failed_mallocs: u64, 129 | pub num_successful_frees: u64, 130 | pub num_failed_frees: u64, 131 | /// we use this to keep track of which processes are still alive, and 132 | /// efficiently clean up terminated processes 133 | active_pids: HashSet, 134 | } 135 | 136 | impl MemleakState { 137 | pub fn new() -> Self { 138 | return MemleakState { 139 | memory_map: HashMap::new(), 140 | num_successful_mallocs: 0u64, 141 | num_failed_mallocs: 0u64, 142 | num_successful_frees: 0u64, 143 | num_failed_frees: 0u64, 144 | active_pids: HashSet::new(), 145 | }; 146 | } 147 | 148 | /// Handles a MemleakEvent recorded in kernel-space and updates all state. 149 | /// This includes 150 | /// - memory map update 151 | /// - number of calls for events 152 | /// - number of failures 153 | fn handle_event(&mut self, data: MemleakEvent) -> Result<(), GpuprobeError> { 154 | self.active_pids.insert(data.pid.clone()); 155 | assert!( 156 | !data.is_error(), 157 | "handle_event(): should not handle failed Cuda calls" 158 | ); 159 | if data.event_type == MemleakEventType::CudaMalloc as i32 { 160 | if !self.memory_map.contains_key(&data.pid) { 161 | self.memory_map.insert(data.pid, BTreeMap::new()); 162 | } 163 | 164 | let memory_map = match self.memory_map.get_mut(&data.pid) { 165 | Some(mm) => mm, 166 | None => { 167 | todo!("should return error here"); 168 | } 169 | }; 170 | 171 | memory_map.insert( 172 | data.device_addr, 173 | CudaMemoryAlloc { 174 | size: data.size, 175 | offset: data.device_addr, 176 | }, 177 | ); 178 | } else if data.event_type == MemleakEventType::CudaFree as i32 { 179 | if !self.memory_map.contains_key(&data.pid) { 180 | // XXX: this is the user freeing memory that doesn't exist - 181 | // It might be best to track this sort of thing and export it 182 | // as a metric 183 | println!("\x1b[33mWARN: cudaFree() called on unallocated region\x1b[0m\n"); 184 | return Ok(()); 185 | } 186 | 187 | let memory_map = match self.memory_map.get_mut(&data.pid) { 188 | Some(mm) => mm, 189 | None => { 190 | todo!("should return error here"); 191 | } 192 | }; 193 | 194 | // set the number of outsanding bytes to zero 195 | memory_map.insert( 196 | data.device_addr, 197 | CudaMemoryAlloc { 198 | size: 0u64, 199 | offset: data.device_addr, 200 | }, 201 | ); 202 | } else { 203 | return Err(GpuprobeError::RuntimeError( 204 | "invalid memleak event type".to_string(), 205 | )); 206 | } 207 | Ok(()) 208 | } 209 | 210 | /// Cleans up the memory maps for all terminated processes. This is a 211 | /// relatively expensive operation as it involves sending `kill(0)` signals 212 | /// to all of the processes being monitored as an aliveness check, so it 213 | /// should be used sparingly. 214 | pub fn cleanup_terminated_processes(&mut self) -> Result<(), GpuprobeError> { 215 | let pids: Vec = self.active_pids.clone().into_iter().collect(); 216 | for pid in pids { 217 | if MemleakState::is_process_dead(pid.clone())? { 218 | self.cleanup_single_terminated_process(pid.clone())?; 219 | } 220 | } 221 | Ok(()) 222 | } 223 | 224 | /// Cleans up the memory map for a single terminated process 225 | fn cleanup_single_terminated_process(&mut self, pid: u32) -> Result<(), GpuprobeError> { 226 | // we needn't clean up a processes's memory map more than once 227 | if !self.active_pids.contains(&pid) { 228 | return Ok(()); 229 | } 230 | 231 | let memory_map = match self.memory_map.get_mut(&pid) { 232 | Some(memory_map) => memory_map, 233 | None => { 234 | return Err(GpuprobeError::RuntimeError( 235 | "no memory map for provided pid".to_string(), 236 | )); 237 | } 238 | }; 239 | 240 | for (_, alloc) in memory_map { 241 | alloc.size = 0u64; 242 | } 243 | 244 | self.active_pids.remove(&pid); 245 | Ok(()) 246 | } 247 | 248 | /// Returns true iff the process has terminated 249 | fn is_process_dead(pid: u32) -> Result { 250 | #[cfg(target_family = "unix")] 251 | { 252 | use nix::sys::signal::kill; 253 | use nix::unistd::Pid; 254 | 255 | // We send a kill signal with value 0 to process identified by pid. 256 | // This functions as an aliveness probe. 257 | match kill(Pid::from_raw(pid as i32), None) { 258 | Ok(_) => Ok(false), 259 | Err(nix::errno::Errno::ESRCH) => Ok(true), 260 | Err(e) => Err(GpuprobeError::RuntimeError(e.to_string())), 261 | } 262 | } 263 | } 264 | } 265 | 266 | impl std::fmt::Display for MemleakState { 267 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 268 | writeln!(f, "per-process memory maps:")?; 269 | 270 | if self.memory_map.is_empty() { 271 | writeln!(f, "\tNo allocations on GPU")?; 272 | } else { 273 | for (pid, b_tree_map) in self.memory_map.iter() { 274 | writeln!(f, "process {}", pid)?; 275 | 276 | for (_, alloc) in b_tree_map.iter() { 277 | writeln!(f, "\t{alloc}")?; 278 | } 279 | } 280 | } 281 | writeln!(f) 282 | } 283 | } 284 | 285 | /// Maps one-to-one with `struct memleak_event` defined `/bpf/gpuprobe.bpf.c`. 286 | struct MemleakEvent { 287 | start: u64, 288 | end: u64, 289 | device_addr: u64, 290 | size: u64, 291 | pid: u32, 292 | ret: i32, 293 | event_type: i32, 294 | } 295 | 296 | enum MemleakEventType { 297 | CudaMalloc = 0, 298 | CudaFree = 1, 299 | } 300 | 301 | impl MemleakEvent { 302 | /// Constructs a MemleakEvent struct from a raw byte array and returns it, 303 | /// or None if the byte array isn't correctly sized. 304 | pub fn from_bytes(bytes: &[u8]) -> Option { 305 | if bytes.len() < std::mem::size_of::() { 306 | return None; 307 | } 308 | // This is safe if: 309 | // 1. The byte array contains valid data for this struct 310 | // 2. The byte array is at least as large as the struct 311 | unsafe { Some(std::ptr::read_unaligned(bytes.as_ptr() as *const Self)) } 312 | } 313 | 314 | /// Returns true iff the event is an error 315 | pub fn is_error(&self) -> bool { 316 | self.ret != CudaErrorT::CudaSuccess as i32 317 | } 318 | } 319 | 320 | impl std::fmt::Display for MemleakEvent { 321 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 322 | write!(f, "{{\n")?; 323 | 324 | if self.event_type == 0 { 325 | write!(f, "\ttype: {}\n", "cudaMalloc")?; 326 | } else if self.event_type == 1 { 327 | write!(f, "\ttype: {}\n", "cudaFree")?; 328 | } 329 | 330 | write!(f, "\tsize: {}\n", self.size)?; 331 | write!(f, "\tpid: {}\n", self.pid)?; 332 | write!(f, "\tstart: {}\n", self.start)?; 333 | write!(f, "\tend: {}\n", self.end)?; 334 | write!(f, "\tdev_addr: 0x{:x}\n", self.device_addr)?; 335 | write!(f, "\tret: {}\n", self.ret)?; 336 | 337 | write!(f, "}}") 338 | } 339 | } 340 | 341 | /// wraps metadata related to a cuda memory allocation 342 | pub struct CudaMemoryAlloc { 343 | pub size: u64, 344 | pub offset: u64, 345 | } 346 | 347 | impl std::fmt::Display for CudaMemoryAlloc { 348 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 349 | write!(f, "0x{:016x}: {} Bytes", self.offset, self.size) 350 | } 351 | } 352 | -------------------------------------------------------------------------------- /src/gpuprobe/metrics.rs: -------------------------------------------------------------------------------- 1 | use super::{GpuprobeError, Opts}; 2 | use prometheus_client::encoding::EncodeLabelSet; 3 | use prometheus_client::metrics::family::Family; 4 | use prometheus_client::metrics::gauge::Gauge; 5 | use prometheus_client::registry::Registry; 6 | 7 | #[derive(Clone, Debug, Hash, PartialEq, Eq, EncodeLabelSet)] 8 | pub struct AddrLabel { 9 | pub addr: u64, 10 | } 11 | 12 | #[derive(Clone, Debug, Hash, PartialEq, Eq, EncodeLabelSet)] 13 | pub struct ErrorLabelSet { 14 | pub pid: u32, 15 | pub call_type: String, 16 | pub return_code: u32, 17 | } 18 | 19 | #[derive(Clone, Debug, Hash, PartialEq, Eq, EncodeLabelSet)] 20 | pub struct MemleakLabelSet { 21 | pub pid: u32, 22 | pub offset: u64, 23 | } 24 | 25 | #[derive(Clone, Debug, Hash, PartialEq, Eq, EncodeLabelSet)] 26 | pub struct CudatraceLabelSet { 27 | pub pid: u32, 28 | pub kernel_offset: u64, 29 | pub kernel_symbol: String, 30 | } 31 | 32 | #[derive(Debug, Clone)] 33 | pub struct GpuprobeMetrics { 34 | opts: Opts, 35 | pub err_hist: Family, 36 | // memleak metrics 37 | pub memleaks: Family, 38 | // cuda trace 39 | pub kernel_launches: Family, 40 | } 41 | 42 | impl GpuprobeMetrics { 43 | pub fn new(opts: Opts) -> Result { 44 | Ok(GpuprobeMetrics { 45 | opts, 46 | err_hist: Family::default(), 47 | memleaks: Family::default(), 48 | kernel_launches: Family::default(), 49 | }) 50 | } 51 | 52 | pub fn register(&self, registry: &mut Registry) { 53 | if self.opts.memleak { 54 | registry.register( 55 | "cuda_memory_leaks", 56 | "Cuda memory leak statistics", 57 | self.memleaks.clone(), 58 | ); 59 | } 60 | if self.opts.cudatrace { 61 | registry.register( 62 | "cuda_kernel_launches", 63 | "Cuda kernel launch statistics", 64 | self.kernel_launches.clone(), 65 | ); 66 | } 67 | if self.opts.memleak || self.opts.cudatrace { 68 | registry.register( 69 | "cuda_error_histogram", 70 | "CUDA errors histogram keyed on process, error type and erroneous call", 71 | self.err_hist.clone(), 72 | ) 73 | } 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /src/gpuprobe/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod cuda_error; 2 | pub mod gpuprobe_bandwidth_util; 3 | pub mod gpuprobe_cudatrace; 4 | pub mod gpuprobe_memleak; 5 | pub mod metrics; 6 | pub mod process_state; 7 | pub mod uprobe_data; 8 | 9 | use chrono::Local; 10 | use metrics::GpuprobeMetrics; 11 | use std::mem::MaybeUninit; 12 | 13 | use libbpf_rs::{ 14 | skel::{OpenSkel, SkelBuilder}, 15 | OpenObject, 16 | }; 17 | 18 | mod gpuprobe { 19 | include!(concat!( 20 | env!("CARGO_MANIFEST_DIR"), 21 | "/src/bpf/gpuprobe.skel.rs" 22 | )); 23 | } 24 | use gpuprobe::*; 25 | 26 | use self::{cuda_error::CudaErrorState, gpuprobe_cudatrace::CudatraceState}; 27 | use self::{gpuprobe_memleak::MemleakState, process_state::GlobalProcessTable}; 28 | 29 | pub struct SafeGpuprobeLinks { 30 | links: GpuprobeLinks, 31 | } 32 | 33 | pub struct SafeGpuprobeSkel { 34 | // E.G: For now we settle for this questionable behavior - we are 35 | // interacting with eBPF skeleton, managing the lifetime of a 36 | // kernel-attached eBPF program. At this stage I am not sure we can do 37 | // better than a static lifetime on this parameter. 38 | skel: GpuprobeSkel<'static>, 39 | } 40 | 41 | pub struct SafeGpuProbeObj { 42 | open_obj: Box>, 43 | } 44 | 45 | unsafe impl Send for SafeGpuprobeSkel {} 46 | unsafe impl Sync for SafeGpuprobeSkel {} 47 | 48 | unsafe impl Send for SafeGpuprobeLinks {} 49 | unsafe impl Sync for SafeGpuprobeLinks {} 50 | 51 | unsafe impl Send for SafeGpuProbeObj {} 52 | unsafe impl Sync for SafeGpuProbeObj {} 53 | 54 | /// Gpuuprobe wraps the eBPF program state, provides an interface for 55 | /// attaching relevant uprobes, and exporting their metrics. 56 | /// 57 | /// TODO: maybe consider using orobouros self-referential instead of the 58 | /// static lifetime 59 | pub struct Gpuprobe { 60 | obj: SafeGpuProbeObj, 61 | skel: SafeGpuprobeSkel, // references a static lifetime! See struct def 62 | links: SafeGpuprobeLinks, 63 | opts: Opts, 64 | pub metrics: GpuprobeMetrics, 65 | memleak_state: MemleakState, 66 | cudatrace_state: CudatraceState, 67 | /// maps pid to a symbol table - cached for quick symbolic resolution 68 | glob_process_table: GlobalProcessTable, 69 | err_state: CudaErrorState, 70 | } 71 | 72 | #[derive(Clone, Debug)] 73 | pub struct Opts { 74 | pub memleak: bool, 75 | pub cudatrace: bool, 76 | pub bandwidth_util: bool, 77 | pub libcudart_path: String, 78 | } 79 | 80 | const DEFAULT_LINKS: GpuprobeLinks = GpuprobeLinks { 81 | memleak_cuda_malloc: None, 82 | memleak_cuda_malloc_ret: None, 83 | trace_cuda_free: None, 84 | trace_cuda_free_ret: None, 85 | trace_cuda_launch_kernel: None, 86 | trace_cuda_memcpy: None, 87 | trace_cuda_memcpy_ret: None, 88 | }; 89 | 90 | impl Gpuprobe { 91 | /// returns a new Gpuprobe, or an initialization error on failure 92 | pub fn new(opts: Opts) -> Result { 93 | let skel_builder = GpuprobeSkelBuilder::default(); 94 | let mut open_obj = Box::new(MaybeUninit::uninit()); 95 | let open_obj_ptr = Box::as_mut(&mut open_obj) as *mut MaybeUninit; 96 | let open_skel = unsafe { 97 | skel_builder 98 | .open(&mut *open_obj_ptr) 99 | .map_err(|_| GpuprobeError::OpenError)? 100 | }; 101 | let skel = open_skel.load().map_err(|_| GpuprobeError::LoadError)?; 102 | let metrics = GpuprobeMetrics::new(opts.clone())?; 103 | Ok(Self { 104 | obj: SafeGpuProbeObj { open_obj }, 105 | skel: SafeGpuprobeSkel { skel }, 106 | links: SafeGpuprobeLinks { 107 | links: DEFAULT_LINKS, 108 | }, 109 | opts, 110 | metrics, 111 | memleak_state: MemleakState::new(), 112 | cudatrace_state: CudatraceState::new(), 113 | glob_process_table: GlobalProcessTable::new(), 114 | err_state: CudaErrorState::new(), 115 | }) 116 | } 117 | 118 | /// Updates prometheus metrics registered by the GPUprobe instance 119 | pub fn export_open_metrics(&mut self) -> Result<(), GpuprobeError> { 120 | // updates memory leak stats 121 | if self.opts.memleak { 122 | // todo GC cycle for cleaning up memory maps?? 123 | self.memleak_state.cleanup_terminated_processes()?; 124 | self.consume_memleak_events()?; 125 | 126 | for (pid, b_tree_map) in self.memleak_state.memory_map.iter() { 127 | for (_, alloc) in b_tree_map { 128 | self.metrics 129 | .memleaks 130 | .get_or_create(&metrics::MemleakLabelSet { 131 | pid: pid.clone(), 132 | offset: alloc.offset, 133 | }) 134 | .set(alloc.size as i64); 135 | } 136 | } 137 | } 138 | 139 | if self.opts.cudatrace { 140 | self.consume_cudatrace_events()?; 141 | 142 | for (pid, b_tree_map) in self.cudatrace_state.kernel_freq_hist.iter() { 143 | for (offset, count) in b_tree_map.iter() { 144 | self.metrics 145 | .kernel_launches 146 | .get_or_create(&metrics::CudatraceLabelSet { 147 | pid: pid.clone(), 148 | kernel_offset: *offset, 149 | kernel_symbol: match self 150 | .glob_process_table 151 | .resolve_symbol_text_offset(*pid, *offset) 152 | { 153 | Some(symbol) => symbol, 154 | None => "unknown kernel".to_string(), 155 | }, 156 | }) 157 | .set(*count as i64); 158 | } 159 | } 160 | } 161 | 162 | // we use `opts.memleak || self.opts.cudatrace` as a proxy for an 163 | // implicit option for collecting errors. By placing this at the end, 164 | // we ensure that all relevant events have been handled this iteration 165 | if self.opts.memleak || self.opts.cudatrace { 166 | for (pid, hash_map) in self.err_state.error_histogram.iter() { 167 | for ((event_type, err), count) in hash_map.iter() { 168 | self.metrics 169 | .err_hist 170 | .get_or_create(&metrics::ErrorLabelSet { 171 | pid: *pid, 172 | call_type: event_type.to_string(), 173 | return_code: *err as u32, 174 | }) 175 | .set(*count as i64); 176 | } 177 | } 178 | } 179 | 180 | Ok(()) 181 | } 182 | 183 | /// Displays metrics collected by the GPUprobe instance 184 | /// Note: this causes metrics to be recollected from the eBPF Maps, which 185 | /// has non-zero interference with the eBPF uprobes. 186 | pub fn display_metrics(&mut self) -> Result<(), GpuprobeError> { 187 | let now = Local::now(); 188 | let formatted_datetime = now.format("%Y-%m-%d %H:%M:%S").to_string(); 189 | println!("========================"); 190 | println!("{}\n", formatted_datetime); 191 | 192 | if self.opts.memleak { 193 | self.memleak_state.cleanup_terminated_processes()?; 194 | self.consume_memleak_events()?; 195 | print!("{}", self.memleak_state); 196 | } 197 | if self.opts.cudatrace { 198 | self.consume_cudatrace_events()?; 199 | println!( 200 | "total kernel launches: {}", 201 | self.cudatrace_state.total_kernel_launches 202 | ); 203 | 204 | for (pid, b_tree_map) in self.cudatrace_state.kernel_freq_hist.iter() { 205 | println!("pid: {pid}"); 206 | for (addr, count) in b_tree_map.iter() { 207 | let resolved = match self 208 | .glob_process_table 209 | .resolve_symbol_text_offset(*pid, *addr) 210 | { 211 | None => "unknown kernel".to_string(), 212 | Some(str) => str, 213 | }; 214 | let formatted = format!("0x{:x} ({})", addr, resolved); 215 | println!("\t{:30} -> {}", formatted, count); 216 | } 217 | } 218 | } 219 | if self.opts.memleak || self.opts.cudatrace { 220 | println!("\n{}", self.err_state); 221 | } 222 | 223 | if self.opts.bandwidth_util { 224 | let bandwidth_util_data = self.collect_data_bandwidth_util()?; 225 | println!("{}", bandwidth_util_data); 226 | } 227 | 228 | println!("========================"); 229 | 230 | // !!TODO update bandwidth statistics as well 231 | Ok(()) 232 | } 233 | 234 | /// Attaches relevant uprobes as defined in `opts`. 235 | /// # Example: 236 | /// ```rust 237 | /// let opts = Opts { 238 | /// memleak: true, 239 | /// cudatrace: false, 240 | /// bandwidth_util: true, 241 | /// } 242 | /// 243 | /// // attaches memleak and bandwidth util uprobes and uretprobes 244 | /// gpuprobe.attach_uprobes_from_opts(&opts).unwrap(); 245 | /// 246 | /// ``` 247 | pub fn attach_uprobes(&mut self) -> Result<(), GpuprobeError> { 248 | if self.opts.memleak { 249 | self.attach_memleak_uprobes()?; 250 | } 251 | if self.opts.cudatrace { 252 | self.attach_cudatrace_uprobes()?; 253 | } 254 | if self.opts.bandwidth_util { 255 | self.attach_bandwidth_util_uprobes()?; 256 | } 257 | 258 | Ok(()) 259 | } 260 | } 261 | 262 | #[derive(Debug)] 263 | pub enum GpuprobeError { 264 | OpenError, 265 | LoadError, 266 | AttachError, 267 | RuntimeError(String), 268 | } 269 | 270 | impl std::fmt::Display for GpuprobeError { 271 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 272 | match self { 273 | GpuprobeError::OpenError => write!(f, "failed to open skeleton"), 274 | GpuprobeError::LoadError => write!(f, "failed to load skeleton"), 275 | GpuprobeError::AttachError => write!(f, "failed to attach skeleton"), 276 | GpuprobeError::RuntimeError(reason) => write!(f, "runtime error: {}", reason), 277 | } 278 | } 279 | } 280 | -------------------------------------------------------------------------------- /src/gpuprobe/process_state.rs: -------------------------------------------------------------------------------- 1 | use std::collections::HashMap; 2 | 3 | use super::GpuprobeError; 4 | use goblin::Object; 5 | use proc_maps::get_process_maps; 6 | 7 | pub struct GlobalProcessTable { 8 | per_process_tables: HashMap>, 9 | } 10 | 11 | impl GlobalProcessTable { 12 | /// Returns a new GlobalProcessTable 13 | pub fn new() -> Self { 14 | return GlobalProcessTable { 15 | per_process_tables: HashMap::new(), 16 | }; 17 | } 18 | 19 | /// Creates an entry in the per-process symbols table if it doesn't yet 20 | /// exist. If an entry already exists, simply returns. 21 | /// Since the data inside of `/proc/{pid}/exe` is static, and reading the 22 | /// file is relatively expensive, we enforce that it is only done once 23 | /// per process. 24 | pub fn create_entry(&mut self, pid: u32) -> Result<(), GpuprobeError> { 25 | if self.per_process_tables.contains_key(&pid) { 26 | return Ok(()); 27 | } 28 | 29 | let new_entry = match ProcessState::new(pid) { 30 | Ok(entry) => Some(entry), 31 | Err(_) => None, 32 | }; 33 | self.per_process_tables.insert(pid, new_entry); 34 | Ok(()) 35 | } 36 | 37 | /// Removes the entry for pid in the per-process symbols table. 38 | pub fn remove_entry(&mut self, pid: u32) { 39 | self.per_process_tables.remove(&pid); 40 | } 41 | 42 | /// Resolves the symbol of an offset within the .text section of the 43 | /// binary executed by this process. Returns None if out of bounds, or 44 | /// doesn't point to a valid symbol 45 | pub fn resolve_symbol_text_offset(&self, pid: u32, virtual_offset: u64) -> Option { 46 | let proc_state = match self.per_process_tables.get(&pid) { 47 | Some(ps) => ps, 48 | None => { 49 | return None; 50 | } 51 | }; 52 | match proc_state { 53 | Some(proc_state) => proc_state.resolve_symbol_text_offset(virtual_offset), 54 | None => None, 55 | } 56 | } 57 | } 58 | 59 | impl std::fmt::Display for GlobalProcessTable { 60 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 61 | for (_, table) in self.per_process_tables.iter() { 62 | match table { 63 | Some(table) => write!(f, "{table}")?, 64 | None => write!(f, "NO PROCESS TABLE")?, 65 | } 66 | } 67 | Ok(()) 68 | } 69 | } 70 | 71 | /// ProcessState wraps the virtual base address (after address-space layout 72 | /// randomization) and elf-symbol table. We want to enable fast lookups to the 73 | /// symbol table of a process in order to resolve CUDA kernel addresses to 74 | /// a more human-readable name. 75 | /// Creating a new ProcessState likely incurs some overhead as it involves 76 | /// reading from the `/proc` pseudo-filesystem. Since this data is static, the 77 | /// caller should only be create at most one ProcessState per process. 78 | pub struct ProcessState { 79 | pid: u32, 80 | base_addr: u64, 81 | elf_symbol_table: HashMap, 82 | } 83 | 84 | impl ProcessState { 85 | pub fn new(pid: u32) -> Result { 86 | let bin_path = match std::fs::read_link(format!("/proc/{}/exe", pid)) { 87 | Ok(p) => p, 88 | Err(e) => return Err(GpuprobeError::RuntimeError(format!("{e:?}"))), 89 | }; 90 | 91 | let maps = match get_process_maps(pid as i32) { 92 | Ok(m) => m, 93 | Err(e) => return Err(GpuprobeError::RuntimeError(format!("{e:?}"))), 94 | }; 95 | 96 | let base = match maps 97 | .into_iter() 98 | .find(|m| m.filename().map_or(false, |f| f == bin_path)) 99 | .map(|m| m.start()) 100 | { 101 | Some(base) => base, 102 | None => { 103 | return Err(GpuprobeError::RuntimeError( 104 | "unable to find binary base".to_string(), 105 | )); 106 | } 107 | } as u64; 108 | 109 | let buff = 110 | std::fs::read(bin_path).map_err(|e| GpuprobeError::RuntimeError(format!("{e:?}")))?; 111 | let obj = 112 | Object::parse(&buff).map_err(|e| GpuprobeError::RuntimeError(format!("{e:?}")))?; 113 | 114 | let symbols: HashMap = if let Object::Elf(elf) = obj { 115 | let syms = elf 116 | .syms 117 | .iter() 118 | .filter_map(|sym| { 119 | let name = elf.strtab.get_at(sym.st_name).unwrap_or("UNDEFINED"); 120 | Some((sym.st_value, name.to_string())) 121 | }) 122 | .collect(); 123 | syms 124 | } else { 125 | return Err(GpuprobeError::RuntimeError(format!( 126 | "no `/proc` entry for pid: {pid}" 127 | ))); 128 | }; 129 | 130 | Ok(ProcessState { 131 | pid, 132 | base_addr: base, 133 | elf_symbol_table: symbols, 134 | }) 135 | } 136 | 137 | /// Resolves the symbol of an offset within the .text section of the 138 | /// binary executed by this process. Returns None if out of bounds, or 139 | /// doesn't point to a valid symbol 140 | pub fn resolve_symbol_text_offset(&self, virtual_offset: u64) -> Option { 141 | self.elf_symbol_table 142 | .get(&(virtual_offset - self.base_addr)) 143 | .cloned() 144 | } 145 | } 146 | 147 | impl std::fmt::Display for ProcessState { 148 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 149 | writeln!(f, "process {}, with base {:x}", self.pid, self.base_addr)?; 150 | for (addr, symbol) in self.elf_symbol_table.iter() { 151 | writeln!(f, "\t{:016x} -> {}", addr, symbol)?; 152 | } 153 | Ok(()) 154 | } 155 | } 156 | -------------------------------------------------------------------------------- /src/gpuprobe/uprobe_data.rs: -------------------------------------------------------------------------------- 1 | use super::gpuprobe_bandwidth_util::CudaMemcpy; 2 | 3 | /// defines the data that is collected in a cycle of the cudatrace program 4 | pub struct CudaTraceData { 5 | /// a Vec of `(addr, count)` where: 6 | /// - `addr` is the function pointer to the launched cuda kernel 7 | /// - `count` is the number of times that that kernel was launched 8 | pub kernel_frequencies_histogram: Vec<(u64, u64)>, 9 | } 10 | 11 | /// defines the data that is collected in a cycle of the cudatrace program 12 | pub struct BandwidthUtilData { 13 | /// a Vec of `CudaMemcpy` calls 14 | pub cuda_memcpys: Vec, 15 | } 16 | 17 | impl std::fmt::Display for CudaTraceData { 18 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 19 | let kernel_launches = &self.kernel_frequencies_histogram; 20 | let num_launches = kernel_launches 21 | .iter() 22 | .fold(0u64, |total, (_, count)| total + count); 23 | 24 | writeln!( 25 | f, 26 | "{} `cudaLaunchKernel` calls for {} kernels", 27 | num_launches, 28 | kernel_launches.len() 29 | )?; 30 | 31 | for (addr, count) in kernel_launches.iter() { 32 | writeln!(f, "\t0x{addr:x}: {count} launches")?; 33 | } 34 | Ok(()) 35 | } 36 | } 37 | 38 | impl std::fmt::Display for BandwidthUtilData { 39 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 40 | let calls = &self.cuda_memcpys; 41 | if calls.len() == 0 { 42 | return Ok(()); 43 | } 44 | 45 | writeln!(f, "Traced {} cudaMemcpy calls", calls.len())?; 46 | for c in calls.iter() { 47 | let bandwidth_util = c.compute_bandwidth_util().unwrap_or(0.0); 48 | let delta = (c.end_time - c.start_time) as f64 / 1e9; 49 | writeln!( 50 | f, 51 | "\t{} {:.5} bytes/sec for {:.5} secs", 52 | c.kind_to_str(), 53 | bandwidth_util, 54 | delta 55 | )?; 56 | } 57 | Ok(()) 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /src/main.rs: -------------------------------------------------------------------------------- 1 | mod gpuprobe; 2 | 3 | use std::{sync::Arc, time::Duration}; 4 | 5 | use tokio::{select, sync::Mutex}; 6 | 7 | use clap::Parser; 8 | use gpuprobe::Gpuprobe; 9 | use prometheus_client::{encoding::text::encode, registry::Registry}; 10 | 11 | use axum::{extract::State, http::StatusCode, response::IntoResponse, routing::get, Router}; 12 | 13 | #[derive(Parser)] 14 | #[command(author, version, about, long_about = None, arg_required_else_help = true)] 15 | struct Args { 16 | /// Attaches memleak program: detects leaking calls to cudaMalloc from the 17 | /// CUDA runtime API. 18 | #[arg(long, exclusive = false)] 19 | memleak: bool, 20 | 21 | /// Attaches the cudatrace program: maintains per-process histograms of 22 | /// cuda kernel launches and their frequencies 23 | #[arg(long, exclusive = false)] 24 | cudatrace: bool, 25 | 26 | /// Attaches the bandwidth util program: approximates bandwidth utilization 27 | /// of cudaMemcpy. 28 | #[arg(long, exclusive = false)] 29 | bandwidth_util: bool, 30 | 31 | /// Address for the Prometheus metrics endpoint. 32 | #[arg(long, default_value = "0.0.0.0:9000")] 33 | metrics_addr: String, 34 | 35 | /// Interval in seconds for displaying metrics to stdout. 36 | #[arg(long, default_value_t = 5)] 37 | display_interval: u64, 38 | 39 | /// The path of the libcudart.so dynamic lib that is monitored. 40 | #[arg(long, default_value = "/usr/local/cuda/lib64/libcudart.so")] 41 | libcudart_path: String, 42 | } 43 | 44 | #[derive(Clone)] 45 | struct AppState { 46 | gpuprobe: Arc>, 47 | registry: Arc, 48 | } 49 | 50 | #[tokio::main] 51 | async fn main() -> Result<(), Box> { 52 | let args = Args::parse(); 53 | let opts = gpuprobe::Opts { 54 | memleak: args.memleak, 55 | cudatrace: args.cudatrace, 56 | bandwidth_util: args.bandwidth_util, 57 | libcudart_path: args.libcudart_path, 58 | }; 59 | 60 | let mut gpuprobe = gpuprobe::Gpuprobe::new(opts).unwrap(); 61 | gpuprobe.attach_uprobes().unwrap(); 62 | 63 | // Prometheus registry for exporting metrics 64 | let mut registry = Registry::default(); 65 | gpuprobe.metrics.register(&mut registry); 66 | 67 | let registry: Arc = Arc::new(registry); 68 | let gpuprobe = Mutex::new(gpuprobe); 69 | let gpuprobe = Arc::new(gpuprobe); 70 | 71 | // clones that are passed to the task that displays to stdout 72 | let gpuprobe_clone = Arc::clone(&gpuprobe); 73 | 74 | let app = Router::new() 75 | .route("/metrics", get(metrics_handler)) 76 | .with_state(AppState { gpuprobe, registry }); 77 | 78 | // a simple task that periodically displays metrics in their raw 79 | // OpenMetrics format to stdout 80 | let stdout_handle = tokio::spawn(async move { 81 | loop { 82 | let mut probe = gpuprobe_clone.lock().await; 83 | match probe.display_metrics() { 84 | Ok(_) => {} 85 | Err(e) => { 86 | println!("ERROR: {:?}", e); 87 | } 88 | } 89 | tokio::time::sleep(Duration::from_secs(args.display_interval)).await; 90 | } 91 | }); 92 | 93 | let listener = tokio::net::TcpListener::bind(&args.metrics_addr) 94 | .await 95 | .unwrap(); 96 | let server_handle = axum::serve(listener, app); 97 | 98 | select! { 99 | _ = stdout_handle => { 100 | println!("Metrics printing task ended"); 101 | } 102 | _ = server_handle => { 103 | println!("Server task ended"); 104 | } 105 | } 106 | 107 | Ok(()) 108 | } 109 | 110 | /// Handler for the endpoint that is scraped by Prometheus 111 | async fn metrics_handler(State(state): State) -> impl IntoResponse { 112 | let _ = state.gpuprobe.lock().await.export_open_metrics(); 113 | let mut buffer = String::new(); 114 | match encode(&mut buffer, &state.registry) { 115 | Ok(()) => (StatusCode::OK, buffer), 116 | Err(_) => (StatusCode::INTERNAL_SERVER_ERROR, String::new()), 117 | } 118 | } 119 | --------------------------------------------------------------------------------