├── .gitignore ├── Cargo.lock ├── Cargo.toml ├── ChangeLog ├── LICENSE ├── Makefile ├── Makefile.toml ├── README.md ├── fixtures ├── cache.mp ├── dir1 │ ├── .gitignore │ ├── duplicated │ ├── notignored │ ├── proto-http.la │ ├── script.zip │ ├── six.py │ └── tooshort ├── dir2 │ ├── ignored │ ├── lftp │ ├── lftp.offset │ └── link └── miniegg-1-py3.5.egg ├── src ├── cachemap.rs ├── errors.rs ├── main.rs ├── output.rs ├── registry.rs ├── scan.rs ├── statistics.rs ├── storepaths │ ├── cache.rs │ └── mod.rs ├── system.rs ├── tests │ └── mod.rs └── walk.rs └── userscan.1.rst /.gitignore: -------------------------------------------------------------------------------- 1 | dist/ 2 | **/*.rs.bk 3 | .*.sw? 4 | tags 5 | target/ 6 | -------------------------------------------------------------------------------- /Cargo.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Cargo. 2 | # It is not intended for manual editing. 3 | [[package]] 4 | name = "adler32" 5 | version = "1.2.0" 6 | source = "registry+https://github.com/rust-lang/crates.io-index" 7 | checksum = "aae1277d39aeec15cb388266ecc24b11c80469deae6067e17a1a7aa9e5c1f234" 8 | 9 | [[package]] 10 | name = "aho-corasick" 11 | version = "0.7.15" 12 | source = "registry+https://github.com/rust-lang/crates.io-index" 13 | checksum = "7404febffaa47dac81aa44dba71523c9d069b1bdc50a77db41195149e17f68e5" 14 | dependencies = [ 15 | "memchr", 16 | ] 17 | 18 | [[package]] 19 | name = "ansi_term" 20 | version = "0.11.0" 21 | source = "registry+https://github.com/rust-lang/crates.io-index" 22 | checksum = "ee49baf6cb617b853aa8d93bf420db2383fab46d314482ca2803b40d5fde979b" 23 | dependencies = [ 24 | "winapi", 25 | ] 26 | 27 | [[package]] 28 | name = "anyhow" 29 | version = "1.0.35" 30 | source = "registry+https://github.com/rust-lang/crates.io-index" 31 | checksum = "2c0df63cb2955042487fad3aefd2c6e3ae7389ac5dc1beb28921de0b69f779d4" 32 | 33 | [[package]] 34 | name = "atty" 35 | version = "0.2.14" 36 | source = "registry+https://github.com/rust-lang/crates.io-index" 37 | checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" 38 | dependencies = [ 39 | "hermit-abi", 40 | "libc", 41 | "winapi", 42 | ] 43 | 44 | [[package]] 45 | name = "autocfg" 46 | version = "1.0.1" 47 | source = "registry+https://github.com/rust-lang/crates.io-index" 48 | checksum = "cdb031dd78e28731d87d56cc8ffef4a8f36ca26c38fe2de700543e627f8a464a" 49 | 50 | [[package]] 51 | name = "bitflags" 52 | version = "1.2.1" 53 | source = "registry+https://github.com/rust-lang/crates.io-index" 54 | checksum = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693" 55 | 56 | [[package]] 57 | name = "bstr" 58 | version = "0.2.14" 59 | source = "registry+https://github.com/rust-lang/crates.io-index" 60 | checksum = "473fc6b38233f9af7baa94fb5852dca389e3d95b8e21c8e3719301462c5d9faf" 61 | dependencies = [ 62 | "memchr", 63 | ] 64 | 65 | [[package]] 66 | name = "byteorder" 67 | version = "1.3.4" 68 | source = "registry+https://github.com/rust-lang/crates.io-index" 69 | checksum = "08c48aae112d48ed9f069b33538ea9e3e90aa263cfa3d1c24309612b1f7472de" 70 | 71 | [[package]] 72 | name = "bytesize" 73 | version = "1.0.1" 74 | source = "registry+https://github.com/rust-lang/crates.io-index" 75 | checksum = "81a18687293a1546b67c246452202bbbf143d239cb43494cc163da14979082da" 76 | 77 | [[package]] 78 | name = "bzip2" 79 | version = "0.3.3" 80 | source = "registry+https://github.com/rust-lang/crates.io-index" 81 | checksum = "42b7c3cbf0fa9c1b82308d57191728ca0256cb821220f4e2fd410a72ade26e3b" 82 | dependencies = [ 83 | "bzip2-sys", 84 | "libc", 85 | ] 86 | 87 | [[package]] 88 | name = "bzip2-sys" 89 | version = "0.1.9+1.0.8" 90 | source = "registry+https://github.com/rust-lang/crates.io-index" 91 | checksum = "ad3b39a260062fca31f7b0b12f207e8f2590a67d32ec7d59c20484b07ea7285e" 92 | dependencies = [ 93 | "cc", 94 | "libc", 95 | "pkg-config", 96 | ] 97 | 98 | [[package]] 99 | name = "cc" 100 | version = "1.0.66" 101 | source = "registry+https://github.com/rust-lang/crates.io-index" 102 | checksum = "4c0496836a84f8d0495758516b8621a622beb77c0fed418570e50764093ced48" 103 | 104 | [[package]] 105 | name = "cfg-if" 106 | version = "0.1.10" 107 | source = "registry+https://github.com/rust-lang/crates.io-index" 108 | checksum = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822" 109 | 110 | [[package]] 111 | name = "cfg-if" 112 | version = "1.0.0" 113 | source = "registry+https://github.com/rust-lang/crates.io-index" 114 | checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" 115 | 116 | [[package]] 117 | name = "chrono" 118 | version = "0.4.19" 119 | source = "registry+https://github.com/rust-lang/crates.io-index" 120 | checksum = "670ad68c9088c2a963aaa298cb369688cf3f9465ce5e2d4ca10e6e0098a1ce73" 121 | dependencies = [ 122 | "libc", 123 | "num-integer", 124 | "num-traits", 125 | "time", 126 | "winapi", 127 | ] 128 | 129 | [[package]] 130 | name = "clap" 131 | version = "2.33.3" 132 | source = "registry+https://github.com/rust-lang/crates.io-index" 133 | checksum = "37e58ac78573c40708d45522f0d80fa2f01cc4f9b4e2bf749807255454312002" 134 | dependencies = [ 135 | "ansi_term", 136 | "atty", 137 | "bitflags", 138 | "strsim", 139 | "term_size", 140 | "textwrap", 141 | "unicode-width", 142 | "vec_map", 143 | ] 144 | 145 | [[package]] 146 | name = "colored" 147 | version = "1.9.3" 148 | source = "registry+https://github.com/rust-lang/crates.io-index" 149 | checksum = "f4ffc801dacf156c5854b9df4f425a626539c3a6ef7893cc0c5084a23f0b6c59" 150 | dependencies = [ 151 | "atty", 152 | "lazy_static", 153 | "winapi", 154 | ] 155 | 156 | [[package]] 157 | name = "crc32fast" 158 | version = "1.2.1" 159 | source = "registry+https://github.com/rust-lang/crates.io-index" 160 | checksum = "81156fece84ab6a9f2afdb109ce3ae577e42b1228441eded99bd77f627953b1a" 161 | dependencies = [ 162 | "cfg-if 1.0.0", 163 | ] 164 | 165 | [[package]] 166 | name = "crossbeam" 167 | version = "0.7.3" 168 | source = "registry+https://github.com/rust-lang/crates.io-index" 169 | checksum = "69323bff1fb41c635347b8ead484a5ca6c3f11914d784170b158d8449ab07f8e" 170 | dependencies = [ 171 | "cfg-if 0.1.10", 172 | "crossbeam-channel", 173 | "crossbeam-deque", 174 | "crossbeam-epoch", 175 | "crossbeam-queue", 176 | "crossbeam-utils 0.7.2", 177 | ] 178 | 179 | [[package]] 180 | name = "crossbeam-channel" 181 | version = "0.4.4" 182 | source = "registry+https://github.com/rust-lang/crates.io-index" 183 | checksum = "b153fe7cbef478c567df0f972e02e6d736db11affe43dfc9c56a9374d1adfb87" 184 | dependencies = [ 185 | "crossbeam-utils 0.7.2", 186 | "maybe-uninit", 187 | ] 188 | 189 | [[package]] 190 | name = "crossbeam-deque" 191 | version = "0.7.3" 192 | source = "registry+https://github.com/rust-lang/crates.io-index" 193 | checksum = "9f02af974daeee82218205558e51ec8768b48cf524bd01d550abe5573a608285" 194 | dependencies = [ 195 | "crossbeam-epoch", 196 | "crossbeam-utils 0.7.2", 197 | "maybe-uninit", 198 | ] 199 | 200 | [[package]] 201 | name = "crossbeam-epoch" 202 | version = "0.8.2" 203 | source = "registry+https://github.com/rust-lang/crates.io-index" 204 | checksum = "058ed274caafc1f60c4997b5fc07bf7dc7cca454af7c6e81edffe5f33f70dace" 205 | dependencies = [ 206 | "autocfg", 207 | "cfg-if 0.1.10", 208 | "crossbeam-utils 0.7.2", 209 | "lazy_static", 210 | "maybe-uninit", 211 | "memoffset", 212 | "scopeguard", 213 | ] 214 | 215 | [[package]] 216 | name = "crossbeam-queue" 217 | version = "0.2.3" 218 | source = "registry+https://github.com/rust-lang/crates.io-index" 219 | checksum = "774ba60a54c213d409d5353bda12d49cd68d14e45036a285234c8d6f91f92570" 220 | dependencies = [ 221 | "cfg-if 0.1.10", 222 | "crossbeam-utils 0.7.2", 223 | "maybe-uninit", 224 | ] 225 | 226 | [[package]] 227 | name = "crossbeam-utils" 228 | version = "0.7.2" 229 | source = "registry+https://github.com/rust-lang/crates.io-index" 230 | checksum = "c3c7c73a2d1e9fc0886a08b93e98eb643461230d5f1925e4036204d5f2e261a8" 231 | dependencies = [ 232 | "autocfg", 233 | "cfg-if 0.1.10", 234 | "lazy_static", 235 | ] 236 | 237 | [[package]] 238 | name = "crossbeam-utils" 239 | version = "0.8.1" 240 | source = "registry+https://github.com/rust-lang/crates.io-index" 241 | checksum = "02d96d1e189ef58269ebe5b97953da3274d83a93af647c2ddd6f9dab28cedb8d" 242 | dependencies = [ 243 | "autocfg", 244 | "cfg-if 1.0.0", 245 | "lazy_static", 246 | ] 247 | 248 | [[package]] 249 | name = "env_logger" 250 | version = "0.7.1" 251 | source = "registry+https://github.com/rust-lang/crates.io-index" 252 | checksum = "44533bbbb3bb3c1fa17d9f2e4e38bbbaf8396ba82193c4cb1b6445d711445d36" 253 | dependencies = [ 254 | "atty", 255 | "humantime", 256 | "log", 257 | "regex", 258 | "termcolor", 259 | ] 260 | 261 | [[package]] 262 | name = "fc-userscan" 263 | version = "0.4.8" 264 | dependencies = [ 265 | "anyhow", 266 | "atty", 267 | "bytesize", 268 | "chrono", 269 | "clap", 270 | "colored", 271 | "crossbeam", 272 | "env_logger", 273 | "fnv", 274 | "ignore", 275 | "lazy_static", 276 | "log", 277 | "memmap", 278 | "minilzo", 279 | "nix", 280 | "num_cpus", 281 | "probes", 282 | "regex", 283 | "rmp", 284 | "rmp-serde", 285 | "serde", 286 | "structopt", 287 | "tempfile", 288 | "thiserror", 289 | "twoway", 290 | "users", 291 | "zip", 292 | ] 293 | 294 | [[package]] 295 | name = "flate2" 296 | version = "1.0.14" 297 | source = "registry+https://github.com/rust-lang/crates.io-index" 298 | checksum = "2cfff41391129e0a856d6d822600b8d71179d46879e310417eb9c762eb178b42" 299 | dependencies = [ 300 | "cfg-if 0.1.10", 301 | "crc32fast", 302 | "libc", 303 | "miniz_oxide", 304 | ] 305 | 306 | [[package]] 307 | name = "fnv" 308 | version = "1.0.7" 309 | source = "registry+https://github.com/rust-lang/crates.io-index" 310 | checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" 311 | 312 | [[package]] 313 | name = "getrandom" 314 | version = "0.1.15" 315 | source = "registry+https://github.com/rust-lang/crates.io-index" 316 | checksum = "fc587bc0ec293155d5bfa6b9891ec18a1e330c234f896ea47fbada4cadbe47e6" 317 | dependencies = [ 318 | "cfg-if 0.1.10", 319 | "libc", 320 | "wasi 0.9.0+wasi-snapshot-preview1", 321 | ] 322 | 323 | [[package]] 324 | name = "globset" 325 | version = "0.4.6" 326 | source = "registry+https://github.com/rust-lang/crates.io-index" 327 | checksum = "c152169ef1e421390738366d2f796655fec62621dabbd0fd476f905934061e4a" 328 | dependencies = [ 329 | "aho-corasick", 330 | "bstr", 331 | "fnv", 332 | "log", 333 | "regex", 334 | ] 335 | 336 | [[package]] 337 | name = "heck" 338 | version = "0.3.1" 339 | source = "registry+https://github.com/rust-lang/crates.io-index" 340 | checksum = "20564e78d53d2bb135c343b3f47714a56af2061f1c928fdb541dc7b9fdd94205" 341 | dependencies = [ 342 | "unicode-segmentation", 343 | ] 344 | 345 | [[package]] 346 | name = "hermit-abi" 347 | version = "0.1.17" 348 | source = "registry+https://github.com/rust-lang/crates.io-index" 349 | checksum = "5aca5565f760fb5b220e499d72710ed156fdb74e631659e99377d9ebfbd13ae8" 350 | dependencies = [ 351 | "libc", 352 | ] 353 | 354 | [[package]] 355 | name = "humantime" 356 | version = "1.3.0" 357 | source = "registry+https://github.com/rust-lang/crates.io-index" 358 | checksum = "df004cfca50ef23c36850aaaa59ad52cc70d0e90243c3c7737a4dd32dc7a3c4f" 359 | dependencies = [ 360 | "quick-error", 361 | ] 362 | 363 | [[package]] 364 | name = "ignore" 365 | version = "0.4.17" 366 | source = "registry+https://github.com/rust-lang/crates.io-index" 367 | checksum = "b287fb45c60bb826a0dc68ff08742b9d88a2fea13d6e0c286b3172065aaf878c" 368 | dependencies = [ 369 | "crossbeam-utils 0.8.1", 370 | "globset", 371 | "lazy_static", 372 | "log", 373 | "memchr", 374 | "regex", 375 | "same-file", 376 | "thread_local", 377 | "walkdir", 378 | "winapi-util", 379 | ] 380 | 381 | [[package]] 382 | name = "lazy_static" 383 | version = "1.4.0" 384 | source = "registry+https://github.com/rust-lang/crates.io-index" 385 | checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" 386 | 387 | [[package]] 388 | name = "libc" 389 | version = "0.2.81" 390 | source = "registry+https://github.com/rust-lang/crates.io-index" 391 | checksum = "1482821306169ec4d07f6aca392a4681f66c75c9918aa49641a2595db64053cb" 392 | 393 | [[package]] 394 | name = "log" 395 | version = "0.4.11" 396 | source = "registry+https://github.com/rust-lang/crates.io-index" 397 | checksum = "4fabed175da42fed1fa0746b0ea71f412aa9d35e76e95e59b192c64b9dc2bf8b" 398 | dependencies = [ 399 | "cfg-if 0.1.10", 400 | ] 401 | 402 | [[package]] 403 | name = "maybe-uninit" 404 | version = "2.0.0" 405 | source = "registry+https://github.com/rust-lang/crates.io-index" 406 | checksum = "60302e4db3a61da70c0cb7991976248362f30319e88850c487b9b95bbf059e00" 407 | 408 | [[package]] 409 | name = "memchr" 410 | version = "2.3.4" 411 | source = "registry+https://github.com/rust-lang/crates.io-index" 412 | checksum = "0ee1c47aaa256ecabcaea351eae4a9b01ef39ed810004e298d2511ed284b1525" 413 | 414 | [[package]] 415 | name = "memmap" 416 | version = "0.7.0" 417 | source = "registry+https://github.com/rust-lang/crates.io-index" 418 | checksum = "6585fd95e7bb50d6cc31e20d4cf9afb4e2ba16c5846fc76793f11218da9c475b" 419 | dependencies = [ 420 | "libc", 421 | "winapi", 422 | ] 423 | 424 | [[package]] 425 | name = "memoffset" 426 | version = "0.5.6" 427 | source = "registry+https://github.com/rust-lang/crates.io-index" 428 | checksum = "043175f069eda7b85febe4a74abbaeff828d9f8b448515d3151a14a3542811aa" 429 | dependencies = [ 430 | "autocfg", 431 | ] 432 | 433 | [[package]] 434 | name = "minilzo" 435 | version = "0.2.0" 436 | source = "registry+https://github.com/rust-lang/crates.io-index" 437 | checksum = "5949071f0a6f670d5082294c3a27d531de061e3aef61d60fa28d430cf39930f4" 438 | dependencies = [ 439 | "libc", 440 | "minilzo-sys", 441 | ] 442 | 443 | [[package]] 444 | name = "minilzo-sys" 445 | version = "0.1.0" 446 | source = "registry+https://github.com/rust-lang/crates.io-index" 447 | checksum = "ce123edaa554212f9aef7466ae6805c59ce71edd79ead0c2aa8e828e444de9e5" 448 | dependencies = [ 449 | "libc", 450 | ] 451 | 452 | [[package]] 453 | name = "miniz_oxide" 454 | version = "0.3.7" 455 | source = "registry+https://github.com/rust-lang/crates.io-index" 456 | checksum = "791daaae1ed6889560f8c4359194f56648355540573244a5448a83ba1ecc7435" 457 | dependencies = [ 458 | "adler32", 459 | ] 460 | 461 | [[package]] 462 | name = "nix" 463 | version = "0.19.1" 464 | source = "registry+https://github.com/rust-lang/crates.io-index" 465 | checksum = "b2ccba0cfe4fdf15982d1674c69b1fd80bad427d293849982668dfe454bd61f2" 466 | dependencies = [ 467 | "bitflags", 468 | "cc", 469 | "cfg-if 1.0.0", 470 | "libc", 471 | ] 472 | 473 | [[package]] 474 | name = "num-integer" 475 | version = "0.1.44" 476 | source = "registry+https://github.com/rust-lang/crates.io-index" 477 | checksum = "d2cc698a63b549a70bc047073d2949cce27cd1c7b0a4a862d08a8031bc2801db" 478 | dependencies = [ 479 | "autocfg", 480 | "num-traits", 481 | ] 482 | 483 | [[package]] 484 | name = "num-traits" 485 | version = "0.2.14" 486 | source = "registry+https://github.com/rust-lang/crates.io-index" 487 | checksum = "9a64b1ec5cda2586e284722486d802acf1f7dbdc623e2bfc57e65ca1cd099290" 488 | dependencies = [ 489 | "autocfg", 490 | ] 491 | 492 | [[package]] 493 | name = "num_cpus" 494 | version = "1.13.0" 495 | source = "registry+https://github.com/rust-lang/crates.io-index" 496 | checksum = "05499f3756671c15885fee9034446956fff3f243d6077b91e5767df161f766b3" 497 | dependencies = [ 498 | "hermit-abi", 499 | "libc", 500 | ] 501 | 502 | [[package]] 503 | name = "pkg-config" 504 | version = "0.3.19" 505 | source = "registry+https://github.com/rust-lang/crates.io-index" 506 | checksum = "3831453b3449ceb48b6d9c7ad7c96d5ea673e9b470a1dc578c2ce6521230884c" 507 | 508 | [[package]] 509 | name = "ppv-lite86" 510 | version = "0.2.10" 511 | source = "registry+https://github.com/rust-lang/crates.io-index" 512 | checksum = "ac74c624d6b2d21f425f752262f42188365d7b8ff1aff74c82e45136510a4857" 513 | 514 | [[package]] 515 | name = "probes" 516 | version = "0.4.1" 517 | source = "registry+https://github.com/rust-lang/crates.io-index" 518 | checksum = "4bb02a28631f195f482c19529ec82bec8e4ffa2d96159e67eb1ae9f5c5c902d8" 519 | dependencies = [ 520 | "libc", 521 | "time", 522 | ] 523 | 524 | [[package]] 525 | name = "proc-macro-error" 526 | version = "1.0.4" 527 | source = "registry+https://github.com/rust-lang/crates.io-index" 528 | checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c" 529 | dependencies = [ 530 | "proc-macro-error-attr", 531 | "proc-macro2", 532 | "quote", 533 | "syn", 534 | "version_check", 535 | ] 536 | 537 | [[package]] 538 | name = "proc-macro-error-attr" 539 | version = "1.0.4" 540 | source = "registry+https://github.com/rust-lang/crates.io-index" 541 | checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869" 542 | dependencies = [ 543 | "proc-macro2", 544 | "quote", 545 | "version_check", 546 | ] 547 | 548 | [[package]] 549 | name = "proc-macro2" 550 | version = "1.0.24" 551 | source = "registry+https://github.com/rust-lang/crates.io-index" 552 | checksum = "1e0704ee1a7e00d7bb417d0770ea303c1bccbabf0ef1667dae92b5967f5f8a71" 553 | dependencies = [ 554 | "unicode-xid", 555 | ] 556 | 557 | [[package]] 558 | name = "quick-error" 559 | version = "1.2.3" 560 | source = "registry+https://github.com/rust-lang/crates.io-index" 561 | checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0" 562 | 563 | [[package]] 564 | name = "quote" 565 | version = "1.0.7" 566 | source = "registry+https://github.com/rust-lang/crates.io-index" 567 | checksum = "aa563d17ecb180e500da1cfd2b028310ac758de548efdd203e18f283af693f37" 568 | dependencies = [ 569 | "proc-macro2", 570 | ] 571 | 572 | [[package]] 573 | name = "rand" 574 | version = "0.7.3" 575 | source = "registry+https://github.com/rust-lang/crates.io-index" 576 | checksum = "6a6b1679d49b24bbfe0c803429aa1874472f50d9b363131f0e89fc356b544d03" 577 | dependencies = [ 578 | "getrandom", 579 | "libc", 580 | "rand_chacha", 581 | "rand_core", 582 | "rand_hc", 583 | ] 584 | 585 | [[package]] 586 | name = "rand_chacha" 587 | version = "0.2.2" 588 | source = "registry+https://github.com/rust-lang/crates.io-index" 589 | checksum = "f4c8ed856279c9737206bf725bf36935d8666ead7aa69b52be55af369d193402" 590 | dependencies = [ 591 | "ppv-lite86", 592 | "rand_core", 593 | ] 594 | 595 | [[package]] 596 | name = "rand_core" 597 | version = "0.5.1" 598 | source = "registry+https://github.com/rust-lang/crates.io-index" 599 | checksum = "90bde5296fc891b0cef12a6d03ddccc162ce7b2aff54160af9338f8d40df6d19" 600 | dependencies = [ 601 | "getrandom", 602 | ] 603 | 604 | [[package]] 605 | name = "rand_hc" 606 | version = "0.2.0" 607 | source = "registry+https://github.com/rust-lang/crates.io-index" 608 | checksum = "ca3129af7b92a17112d59ad498c6f81eaf463253766b90396d39ea7a39d6613c" 609 | dependencies = [ 610 | "rand_core", 611 | ] 612 | 613 | [[package]] 614 | name = "redox_syscall" 615 | version = "0.1.57" 616 | source = "registry+https://github.com/rust-lang/crates.io-index" 617 | checksum = "41cc0f7e4d5d4544e8861606a285bb08d3e70712ccc7d2b84d7c0ccfaf4b05ce" 618 | 619 | [[package]] 620 | name = "regex" 621 | version = "1.4.2" 622 | source = "registry+https://github.com/rust-lang/crates.io-index" 623 | checksum = "38cf2c13ed4745de91a5eb834e11c00bcc3709e773173b2ce4c56c9fbde04b9c" 624 | dependencies = [ 625 | "aho-corasick", 626 | "memchr", 627 | "regex-syntax", 628 | "thread_local", 629 | ] 630 | 631 | [[package]] 632 | name = "regex-syntax" 633 | version = "0.6.21" 634 | source = "registry+https://github.com/rust-lang/crates.io-index" 635 | checksum = "3b181ba2dcf07aaccad5448e8ead58db5b742cf85dfe035e2227f137a539a189" 636 | 637 | [[package]] 638 | name = "remove_dir_all" 639 | version = "0.5.3" 640 | source = "registry+https://github.com/rust-lang/crates.io-index" 641 | checksum = "3acd125665422973a33ac9d3dd2df85edad0f4ae9b00dafb1a05e43a9f5ef8e7" 642 | dependencies = [ 643 | "winapi", 644 | ] 645 | 646 | [[package]] 647 | name = "rmp" 648 | version = "0.8.9" 649 | source = "registry+https://github.com/rust-lang/crates.io-index" 650 | checksum = "0f10b46df14cf1ee1ac7baa4d2fbc2c52c0622a4b82fa8740e37bc452ac0184f" 651 | dependencies = [ 652 | "byteorder", 653 | "num-traits", 654 | ] 655 | 656 | [[package]] 657 | name = "rmp-serde" 658 | version = "0.14.4" 659 | source = "registry+https://github.com/rust-lang/crates.io-index" 660 | checksum = "4ce7d70c926fe472aed493b902010bccc17fa9f7284145cb8772fd22fdb052d8" 661 | dependencies = [ 662 | "byteorder", 663 | "rmp", 664 | "serde", 665 | ] 666 | 667 | [[package]] 668 | name = "same-file" 669 | version = "1.0.6" 670 | source = "registry+https://github.com/rust-lang/crates.io-index" 671 | checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" 672 | dependencies = [ 673 | "winapi-util", 674 | ] 675 | 676 | [[package]] 677 | name = "scopeguard" 678 | version = "1.1.0" 679 | source = "registry+https://github.com/rust-lang/crates.io-index" 680 | checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" 681 | 682 | [[package]] 683 | name = "serde" 684 | version = "1.0.118" 685 | source = "registry+https://github.com/rust-lang/crates.io-index" 686 | checksum = "06c64263859d87aa2eb554587e2d23183398d617427327cf2b3d0ed8c69e4800" 687 | dependencies = [ 688 | "serde_derive", 689 | ] 690 | 691 | [[package]] 692 | name = "serde_derive" 693 | version = "1.0.118" 694 | source = "registry+https://github.com/rust-lang/crates.io-index" 695 | checksum = "c84d3526699cd55261af4b941e4e725444df67aa4f9e6a3564f18030d12672df" 696 | dependencies = [ 697 | "proc-macro2", 698 | "quote", 699 | "syn", 700 | ] 701 | 702 | [[package]] 703 | name = "strsim" 704 | version = "0.8.0" 705 | source = "registry+https://github.com/rust-lang/crates.io-index" 706 | checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a" 707 | 708 | [[package]] 709 | name = "structopt" 710 | version = "0.3.21" 711 | source = "registry+https://github.com/rust-lang/crates.io-index" 712 | checksum = "5277acd7ee46e63e5168a80734c9f6ee81b1367a7d8772a2d765df2a3705d28c" 713 | dependencies = [ 714 | "clap", 715 | "lazy_static", 716 | "structopt-derive", 717 | ] 718 | 719 | [[package]] 720 | name = "structopt-derive" 721 | version = "0.4.14" 722 | source = "registry+https://github.com/rust-lang/crates.io-index" 723 | checksum = "5ba9cdfda491b814720b6b06e0cac513d922fc407582032e8706e9f137976f90" 724 | dependencies = [ 725 | "heck", 726 | "proc-macro-error", 727 | "proc-macro2", 728 | "quote", 729 | "syn", 730 | ] 731 | 732 | [[package]] 733 | name = "syn" 734 | version = "1.0.54" 735 | source = "registry+https://github.com/rust-lang/crates.io-index" 736 | checksum = "9a2af957a63d6bd42255c359c93d9bfdb97076bd3b820897ce55ffbfbf107f44" 737 | dependencies = [ 738 | "proc-macro2", 739 | "quote", 740 | "unicode-xid", 741 | ] 742 | 743 | [[package]] 744 | name = "tempfile" 745 | version = "3.1.0" 746 | source = "registry+https://github.com/rust-lang/crates.io-index" 747 | checksum = "7a6e24d9338a0a5be79593e2fa15a648add6138caa803e2d5bc782c371732ca9" 748 | dependencies = [ 749 | "cfg-if 0.1.10", 750 | "libc", 751 | "rand", 752 | "redox_syscall", 753 | "remove_dir_all", 754 | "winapi", 755 | ] 756 | 757 | [[package]] 758 | name = "term_size" 759 | version = "0.3.2" 760 | source = "registry+https://github.com/rust-lang/crates.io-index" 761 | checksum = "1e4129646ca0ed8f45d09b929036bafad5377103edd06e50bf574b353d2b08d9" 762 | dependencies = [ 763 | "libc", 764 | "winapi", 765 | ] 766 | 767 | [[package]] 768 | name = "termcolor" 769 | version = "1.1.2" 770 | source = "registry+https://github.com/rust-lang/crates.io-index" 771 | checksum = "2dfed899f0eb03f32ee8c6a0aabdb8a7949659e3466561fc0adf54e26d88c5f4" 772 | dependencies = [ 773 | "winapi-util", 774 | ] 775 | 776 | [[package]] 777 | name = "textwrap" 778 | version = "0.11.0" 779 | source = "registry+https://github.com/rust-lang/crates.io-index" 780 | checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060" 781 | dependencies = [ 782 | "term_size", 783 | "unicode-width", 784 | ] 785 | 786 | [[package]] 787 | name = "thiserror" 788 | version = "1.0.22" 789 | source = "registry+https://github.com/rust-lang/crates.io-index" 790 | checksum = "0e9ae34b84616eedaaf1e9dd6026dbe00dcafa92aa0c8077cb69df1fcfe5e53e" 791 | dependencies = [ 792 | "thiserror-impl", 793 | ] 794 | 795 | [[package]] 796 | name = "thiserror-impl" 797 | version = "1.0.22" 798 | source = "registry+https://github.com/rust-lang/crates.io-index" 799 | checksum = "9ba20f23e85b10754cd195504aebf6a27e2e6cbe28c17778a0c930724628dd56" 800 | dependencies = [ 801 | "proc-macro2", 802 | "quote", 803 | "syn", 804 | ] 805 | 806 | [[package]] 807 | name = "thread_local" 808 | version = "1.0.1" 809 | source = "registry+https://github.com/rust-lang/crates.io-index" 810 | checksum = "d40c6d1b69745a6ec6fb1ca717914848da4b44ae29d9b3080cbee91d72a69b14" 811 | dependencies = [ 812 | "lazy_static", 813 | ] 814 | 815 | [[package]] 816 | name = "time" 817 | version = "0.1.44" 818 | source = "registry+https://github.com/rust-lang/crates.io-index" 819 | checksum = "6db9e6914ab8b1ae1c260a4ae7a49b6c5611b40328a735b21862567685e73255" 820 | dependencies = [ 821 | "libc", 822 | "wasi 0.10.0+wasi-snapshot-preview1", 823 | "winapi", 824 | ] 825 | 826 | [[package]] 827 | name = "twoway" 828 | version = "0.1.8" 829 | source = "registry+https://github.com/rust-lang/crates.io-index" 830 | checksum = "59b11b2b5241ba34be09c3cc85a36e56e48f9888862e19cedf23336d35316ed1" 831 | dependencies = [ 832 | "memchr", 833 | ] 834 | 835 | [[package]] 836 | name = "unicode-segmentation" 837 | version = "1.7.1" 838 | source = "registry+https://github.com/rust-lang/crates.io-index" 839 | checksum = "bb0d2e7be6ae3a5fa87eed5fb451aff96f2573d2694942e40543ae0bbe19c796" 840 | 841 | [[package]] 842 | name = "unicode-width" 843 | version = "0.1.8" 844 | source = "registry+https://github.com/rust-lang/crates.io-index" 845 | checksum = "9337591893a19b88d8d87f2cec1e73fad5cdfd10e5a6f349f498ad6ea2ffb1e3" 846 | 847 | [[package]] 848 | name = "unicode-xid" 849 | version = "0.2.1" 850 | source = "registry+https://github.com/rust-lang/crates.io-index" 851 | checksum = "f7fe0bb3479651439c9112f72b6c505038574c9fbb575ed1bf3b797fa39dd564" 852 | 853 | [[package]] 854 | name = "users" 855 | version = "0.11.0" 856 | source = "registry+https://github.com/rust-lang/crates.io-index" 857 | checksum = "24cc0f6d6f267b73e5a2cadf007ba8f9bc39c6a6f9666f8cf25ea809a153b032" 858 | dependencies = [ 859 | "libc", 860 | "log", 861 | ] 862 | 863 | [[package]] 864 | name = "vec_map" 865 | version = "0.8.2" 866 | source = "registry+https://github.com/rust-lang/crates.io-index" 867 | checksum = "f1bddf1187be692e79c5ffeab891132dfb0f236ed36a43c7ed39f1165ee20191" 868 | 869 | [[package]] 870 | name = "version_check" 871 | version = "0.9.2" 872 | source = "registry+https://github.com/rust-lang/crates.io-index" 873 | checksum = "b5a972e5669d67ba988ce3dc826706fb0a8b01471c088cb0b6110b805cc36aed" 874 | 875 | [[package]] 876 | name = "walkdir" 877 | version = "2.3.1" 878 | source = "registry+https://github.com/rust-lang/crates.io-index" 879 | checksum = "777182bc735b6424e1a57516d35ed72cb8019d85c8c9bf536dccb3445c1a2f7d" 880 | dependencies = [ 881 | "same-file", 882 | "winapi", 883 | "winapi-util", 884 | ] 885 | 886 | [[package]] 887 | name = "wasi" 888 | version = "0.9.0+wasi-snapshot-preview1" 889 | source = "registry+https://github.com/rust-lang/crates.io-index" 890 | checksum = "cccddf32554fecc6acb585f82a32a72e28b48f8c4c1883ddfeeeaa96f7d8e519" 891 | 892 | [[package]] 893 | name = "wasi" 894 | version = "0.10.0+wasi-snapshot-preview1" 895 | source = "registry+https://github.com/rust-lang/crates.io-index" 896 | checksum = "1a143597ca7c7793eff794def352d41792a93c481eb1042423ff7ff72ba2c31f" 897 | 898 | [[package]] 899 | name = "winapi" 900 | version = "0.3.9" 901 | source = "registry+https://github.com/rust-lang/crates.io-index" 902 | checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" 903 | dependencies = [ 904 | "winapi-i686-pc-windows-gnu", 905 | "winapi-x86_64-pc-windows-gnu", 906 | ] 907 | 908 | [[package]] 909 | name = "winapi-i686-pc-windows-gnu" 910 | version = "0.4.0" 911 | source = "registry+https://github.com/rust-lang/crates.io-index" 912 | checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" 913 | 914 | [[package]] 915 | name = "winapi-util" 916 | version = "0.1.5" 917 | source = "registry+https://github.com/rust-lang/crates.io-index" 918 | checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178" 919 | dependencies = [ 920 | "winapi", 921 | ] 922 | 923 | [[package]] 924 | name = "winapi-x86_64-pc-windows-gnu" 925 | version = "0.4.0" 926 | source = "registry+https://github.com/rust-lang/crates.io-index" 927 | checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" 928 | 929 | [[package]] 930 | name = "zip" 931 | version = "0.5.9" 932 | source = "registry+https://github.com/rust-lang/crates.io-index" 933 | checksum = "cc2896475a242c41366941faa27264df2cb935185a92e059a004d0048feb2ac5" 934 | dependencies = [ 935 | "byteorder", 936 | "bzip2", 937 | "crc32fast", 938 | "flate2", 939 | "thiserror", 940 | "time", 941 | ] 942 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "fc-userscan" 3 | version = "0.4.8" 4 | authors = ["Christian Kauhaus "] 5 | description = """\ 6 | Scans files for Nix store references and registers them with the Nix garbage collector. 7 | """ 8 | repository = "https://github.com/flyingcircusio/userscan" 9 | readme = "README.md" 10 | categories = ["command-line-utilities", "filesystem", "os"] 11 | license = "BSD-3-Clause" 12 | edition = "2018" 13 | 14 | [dependencies] 15 | anyhow = "1" 16 | atty = "0.2" 17 | bytesize = "1.0" 18 | chrono = "0.4" 19 | clap = { version = "2.33", features = ["wrap_help"] } 20 | colored = "1.6" 21 | crossbeam = "0.7" 22 | env_logger = "0.7" 23 | fnv = "1.0" 24 | ignore = "0.4" 25 | lazy_static = "1.1" 26 | log = "0.4" 27 | memmap = "0.7" 28 | minilzo = "0.2" 29 | nix = "0.19" 30 | num_cpus = "1" 31 | probes = "0.4" 32 | regex = "1" 33 | rmp = "0.8" 34 | rmp-serde = "0.14" 35 | serde = { version = "1", features = ["derive"] } 36 | structopt = "0.3" 37 | thiserror = "1" 38 | twoway = "0.1" 39 | users = "0.11" 40 | zip = "0.5" 41 | 42 | [dev-dependencies] 43 | tempfile = "3.1" 44 | 45 | [profile.release] 46 | lto = "thin" 47 | -------------------------------------------------------------------------------- /ChangeLog: -------------------------------------------------------------------------------- 1 | 2020-12-16 Christian Kauhaus 2 | 3 | * Adapt to new permissions in /nix/var/nix/gcroots/per-user in NixOS 4 | 20.09. 5 | * Silently ignore unknown file types like named pipes. 6 | * Release 0.4.8 7 | 8 | 2020-06-10 Christian Kauhaus 9 | 10 | * Fix broken compile 11 | * Release 0.4.5 12 | 13 | 2020-03-24 Christian Kauhaus 14 | 15 | * Behave correctly when invoked as setuid root binary 16 | 17 | 2019-03-28 Christian Kauhaus 18 | 19 | * Adapt GC_PREFIX to newer NixOS versions. 20 | * Release 0.4.3 21 | 22 | 2018-09-07 Christian Kauhaus 23 | 24 | * Print summary and soft error count in one line to ease log parsing. 25 | * Release 0.4.2 26 | 27 | 2018-06-26 Christian Kauhaus 28 | 29 | * Update dependencies 30 | 31 | 2018-05-22 Christian Kauhaus 32 | 33 | * Release 0.4.1 34 | 35 | 2018-01-05 Christian Kauhaus 36 | 37 | * Mention `--cache-limit` option in man page. 38 | 39 | 2017-11-21 Christian Kauhaus 40 | 41 | * Release 0.4.0 42 | 43 | * Limit cache size via `-L` and abort after processing too many 44 | inodes. 45 | 46 | 2017-11-07 Christian Kauhaus 47 | 48 | * Cache: Reduce memory usage by cropping ctime_nsec values to u8. 49 | 50 | 2017-09-28 Christian Kauhaus 51 | 52 | * Release 0.3.2 53 | 54 | * Cache: Warn if trying to load a broken cache file, but don't abort. 55 | 56 | * Docs: Add man page (#8). 57 | 58 | 2017-09-18 Christian Kauhaus 59 | 60 | * Release 0.3.1 61 | 62 | * ZIP: Add warning while unpacking large ZIP archives. 63 | 64 | * Release 0.3.0 65 | 66 | * Change behaviour of --list: It doesn't register found references by 67 | default anymore. Specify --list --register to get both (#11). 68 | 69 | * Output: General improvements to make log files easier to read. 70 | 71 | * Output: Skip progress messages when in --list mode. 72 | 73 | * Cache: Don't change cache when in --list mode. 74 | 75 | 2017-08-30 kc@flyingcircus.io 76 | 77 | * Release 0.2.0 78 | 79 | * The cache is not stored as compressed messagepack file (#11). 80 | 81 | * Decompress zipped files like Python eggs on the fly (#2). 82 | 83 | * Clarify help message. 84 | 85 | 2017-08-25 Christian Kauhaus 86 | 87 | * Release 0.1.4 88 | 89 | * Limit I/O load by sleeping a small amount between file accesses 90 | (stuttering) (#10). 91 | 92 | * Unpack compressed ZIP archives on the fly (#2). 93 | 94 | * Never cross device boundaries (#3). 95 | 96 | * Select/ignore file patterns via --include/--exclude command line 97 | options, --exclude-from=IGNOREFILE or automagically by placing 98 | patterns into ~/.userscan-ignore (#5). 99 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2017 Flying Circus Internet Operations GmbH. All rights reserved. 2 | 3 | Redistribution and use in source and binary forms, with or without 4 | modification, are permitted provided that the following conditions are met: 5 | 6 | 1. Redistributions of source code must retain the above copyright notice, this 7 | list of conditions and the following disclaimer. 8 | 9 | 2. Redistributions in binary form must reproduce the above copyright notice, 10 | this list of conditions and the following disclaimer in the documentation 11 | and/or other materials provided with the distribution. 12 | 13 | 3. Neither the name of the copyright holder nor the names of its contributors 14 | may be used to endorse or promote products derived from this software without 15 | specific prior written permission. 16 | 17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 18 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 19 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 20 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 21 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 23 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 24 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 25 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # rustup target add x86_64-unknown-linux-musl 2 | # nix-shell -p lzo pkgconfig clang docutils --run make 3 | 4 | TGT = target/x86_64-unknown-linux-musl/release 5 | VERSION = $(shell cargo pkgid | sed 's/.*://') 6 | PV = fc-userscan-$(VERSION) 7 | 8 | # create a tarball containing a static binary and the man page 9 | release: dist/$(PV)/fc-userscan.1 10 | cargo test 11 | cargo build --release --target x86_64-unknown-linux-musl 12 | mkdir -p dist/$(PV) 13 | install -m 0755 $(TGT)/fc-userscan dist/$(PV) 14 | cd dist && tar czf $(PV).tar.gz $(PV) 15 | 16 | dist/$(PV)/fc-userscan.1: userscan.1.rst 17 | mkdir -p dist/$(PV) 18 | sed 's/@version@/$(VERSION)/' userscan.1.rst > dist/userscan.1.rst 19 | rst2man.py dist/userscan.1.rst > dist/$(PV)/fc-userscan.1 20 | rm dist/userscan.1.rst 21 | 22 | clean: 23 | rm -rf dist 24 | 25 | cleanall: clean 26 | cargo clean 27 | 28 | .PHONY: release clean cleanall 29 | -------------------------------------------------------------------------------- /Makefile.toml: -------------------------------------------------------------------------------- 1 | # Simple `cargo make` file to convenniently build suid root executables for 2 | # local testing. Needs sudo. 3 | 4 | [tasks.suid] 5 | dependencies = ["build"] 6 | script = [ 7 | "sudo chown root: target/debug/fc-userscan", 8 | "sudo chmod 4755 target/debug/fc-userscan" 9 | ] 10 | 11 | # vim: set sw=2 sts=2 et: 12 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # fc-userscan 2 | 3 | Scans directories containing manually compiled programs and registers them with 4 | the Nix garbage collector. 5 | 6 | ## Problem description 7 | 8 | One can install fancy libs using `nix-env` and compile programs against them. 9 | But, after a system update, the Nix garbage collector comes and pulls dynamic 10 | link and other runtime dependencies out from under our manually compiled files. 11 | 12 | This problem exists for some extent on all distros, but in NixOS it's amplified 13 | by the fact that even smallest changes somewhere down the dependency chain will 14 | change checksums from which Nix store paths are constructed. 15 | 16 | ## Solution 17 | 18 | This tool allows to scan arbitrary directories and to register all Nix 19 | dependencies found as GC roots so that they won't be taken away by the garbage 20 | collector. Since plain string search is used, it works for both dynamic linkage 21 | and other references, like config file paths. 22 | 23 | ## Example 24 | 25 | Consider a Python virtualenv: 26 | 27 | ```ShellSession 28 | $ nix-env --install python3 29 | $ pyvenv myvenv 30 | ``` 31 | 32 | Now let's see if there are Nix store references present (hint: there are): 33 | 34 | ```ShellSession 35 | $ fc-userscan -l myvenv 36 | myvenv/bin/python3.5 37 | /nix/store/a5zbx856hyfgz2isz0j60i8w44i6av09-python3-3.5.2 38 | 39 | myvenv/pyvenv.cfg 40 | /nix/store/a5zbx856hyfgz2isz0j60i8w44i6av09-python3-3.5.2 41 | ``` 42 | 43 | `fc-userscan` scans and registers Nix store references found either as symlinks 44 | (like python3.5) or in files (pyvenv.cfg). The `-l` flag causes found references 45 | to be dumped to stdout. At the same time, found references are registered with 46 | the Nix garbage collector: 47 | 48 | ```ShellSession 49 | $ ls -lR /nix/var/nix/gcroots/profiles/per-user/ckauhaus/home/ckauhaus/myvenv 50 | /nix/var/nix/gcroots/profiles/per-user/ckauhaus/home/ckauhaus/myvenv 51 | lrwxrwxrwx 1 ckauhaus users 57 Aug 11 13:29 a5zbx856hyfgz2isz0j60i8w44i6av09 -> /nix/store/a5zbx856hyfgz2isz0j60i8w44i6av09-python3-3.5.2 52 | drwxr-xr-x 2 ckauhaus users 46 Aug 11 13:29 bin 53 | 54 | /nix/var/nix/gcroots/profiles/per-user/ckauhaus/home/ckauhaus/myvenv/bin 55 | lrwxrwxrwx 1 ckauhaus users 57 Aug 11 13:29 a5zbx856hyfgz2isz0j60i8w44i6av09 -> /nix/store/a5zbx856hyfgz2isz0j60i8w44i6av09-python3-3.5.2 56 | ``` 57 | 58 | All GC root registrations for a given dir $DIR go into 59 | `/nix/var/nix/gcroots/profiles/per-user/$USER/$DIR` so this can easily be 60 | inspected by the administrator. Should a reference vanish at the original 61 | location, the registration will be cleaned up by the next run. 62 | 63 | ## Hacking 64 | 65 | `fc-userscan` compiles with Rust 1.39.0 or higher. 66 | 67 | To conveniently build setuid executables, install `cargo-make` and run `cargo 68 | make suid`. 69 | 70 | I'll happily accept pull requests at the [GitHub 71 | repository](https://github.com/flyingcircusio/userscan). 72 | Please write tests if feasible. 73 | 74 | ## Contact 75 | 76 | The primary author of `fc-userscan` is [Christian 77 | Kauhaus](mailto:kc@flyingcircus.io) or @ckauhaus on various online services. 78 | 79 | ## License 80 | 81 | The software is licensed under a 3-clause BSD license. 82 | -------------------------------------------------------------------------------- /fixtures/cache.mp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flyingcircusio/userscan/446f52cf14ab53450197166deaf20e90d64bc276/fixtures/cache.mp -------------------------------------------------------------------------------- /fixtures/dir1/.gitignore: -------------------------------------------------------------------------------- 1 | notignored 2 | -------------------------------------------------------------------------------- /fixtures/dir1/duplicated: -------------------------------------------------------------------------------- 1 | /nix/store/010yd8jls8w4vcnql4zhjbnyp2yay5pl-bash-4.4-p5 2 | /nix/store/010yd8jls8w4vcnql4zhjbnyp2yay5pl-bash-4.4-p5 3 | /nix/store/010yd8jls8w4vcnql4zhjbnyp2yay5pl-bash-4.4-p5 4 | -------------------------------------------------------------------------------- /fixtures/dir1/notignored: -------------------------------------------------------------------------------- 1 | File name listed in .gitignore, but .gitignore should not be obeyed 2 | 3 | /nix/store/00n9gkswhqdgbhgs7lnz2ckqxphavjr8-ChasingBottoms-1.3.1.2.drv 4 | /nix/store/00y6xgsdpjx3fyz4v7k5lwivi28yqd9f-initrd-fsinfo.drv 5 | -------------------------------------------------------------------------------- /fixtures/dir1/proto-http.la: -------------------------------------------------------------------------------- 1 | # proto-http.la - a libtool library file 2 | # Generated by libtool (GNU libtool) 2.4.2.418 3 | # 4 | # Please DO NOT delete this file! 5 | # It is necessary for linking the library. 6 | 7 | # The name that we can dlopen(3). 8 | dlname='' 9 | 10 | # Names of this library. 11 | library_names='' 12 | 13 | # The name of the static archive. 14 | old_library='proto-http.a' 15 | 16 | # Linker flags that cannot go in dependency_libs. 17 | inherited_linker_flags='' 18 | 19 | # Libraries that this one depends upon. 20 | dependency_libs=' -L/usr/lib /home/ckauhaus/code/fc/userenv/lftp-4.7.1/src/liblftp-network.la -lgnutls /nix/store/9w3ci6fskmz3nw27fb68hybfa5v1r33f-libidn-1.33/lib/libidn.la -lz -lexpat -lutil -lncurses -ldl -lidn /nix/store/knvydciispmr4nr2rxg0iyyff3n1v4ax-gcc-6.2.0-lib/lib/libstdc++.la' 21 | 22 | # Names of additional weak libraries provided by this library 23 | weak_library_names='' 24 | 25 | # Version information for proto-http. 26 | current=0 27 | age=0 28 | revision=0 29 | 30 | # Is this an already installed library? 31 | installed=no 32 | 33 | # Should we warn about portability when linking against -modules? 34 | shouldnotlink=yes 35 | 36 | # Files to dlopen/dlpreopen 37 | dlopen='' 38 | dlpreopen='' 39 | 40 | # Directory that this library needs to be installed in: 41 | libdir='/tmp/lftp/lib/lftp/4.7.1' 42 | -------------------------------------------------------------------------------- /fixtures/dir1/script.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flyingcircusio/userscan/446f52cf14ab53450197166deaf20e90d64bc276/fixtures/dir1/script.zip -------------------------------------------------------------------------------- /fixtures/dir1/six.py: -------------------------------------------------------------------------------- 1 | /nix/store/1b4i3gm31j1ipfbx1v9a3hhgmp2wvyyw-python2.7-six-1.9.0/lib/python2.7/site-packages/six.py -------------------------------------------------------------------------------- /fixtures/dir1/tooshort: -------------------------------------------------------------------------------- 1 | Somthing similar to a Nix store ref, but too short 2 | 3 | /nix/store/00n9gkswhqdgbhgs7lnz2ckqxphavjr 4 | -------------------------------------------------------------------------------- /fixtures/dir2/ignored: -------------------------------------------------------------------------------- 1 | Contains valid Nix store refs, but this file name is listed in .ignore 2 | 3 | /nix/store/00n9gkswhqdgbhgs7lnz2ckqxphavjr8-ChasingBottoms-1.3.1.2.drv 4 | /nix/store/00y6xgsdpjx3fyz4v7k5lwivi28yqd9f-initrd-fsinfo.drv 5 | /nix/store/00zncsd36zg37bymidhgqcarlfwf80gz-python3.4-requests-2.10.0.drv 6 | /nix/store/010yd8jls8w4vcnql4zhjbnyp2yay5pl-bash-4.4-p5 7 | /nix/store/019ix2amywwc14jk7fh462abl6y607ax-etc-file.drv 8 | /nix/store/01javy1gwy7ika2ygd0ww1m6xzxiyh5g-perl-HTML-Tagset-3.20.drv 9 | /nix/store/01llv9z7npk8wp7732r2h63rmr14d9xk-cryptonite-0.21.tar.gz.drv 10 | /nix/store/01pqbbz59b941bvwa1hf41p3v73plxy9-hamming-0.1.0.drv 11 | /nix/store/01qdsmv9gmlphxkwrisyxg9i6y9w1nrh-libX11-1.6.5.drv 12 | /nix/store/01rc2bq2s3dnvbrf0z8h23jxdw1brb10-nixos-install.drv 13 | -------------------------------------------------------------------------------- /fixtures/dir2/lftp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flyingcircusio/userscan/446f52cf14ab53450197166deaf20e90d64bc276/fixtures/dir2/lftp -------------------------------------------------------------------------------- /fixtures/dir2/lftp.offset: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flyingcircusio/userscan/446f52cf14ab53450197166deaf20e90d64bc276/fixtures/dir2/lftp.offset -------------------------------------------------------------------------------- /fixtures/dir2/link: -------------------------------------------------------------------------------- 1 | lkjlkjlkjlkj -------------------------------------------------------------------------------- /fixtures/miniegg-1-py3.5.egg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flyingcircusio/userscan/446f52cf14ab53450197166deaf20e90d64bc276/fixtures/miniegg-1-py3.5.egg -------------------------------------------------------------------------------- /src/cachemap.rs: -------------------------------------------------------------------------------- 1 | //! Persistent HashMap used for caching previous scan results. 2 | use crate::output::p2s; 3 | 4 | use fnv::FnvHashMap; 5 | use nix::fcntl; 6 | use rmp_serde::{decode, encode}; 7 | use serde::{Deserialize, Serialize}; 8 | use std::fs; 9 | use std::io; 10 | use std::io::prelude::*; 11 | use std::ops::{Deref, DerefMut}; 12 | use std::os::unix::prelude::*; 13 | use std::path::{Path, PathBuf}; 14 | use thiserror::Error; 15 | 16 | #[derive(Debug, Error)] 17 | pub enum Error { 18 | #[error("I/O error")] 19 | IO(#[from] io::Error), 20 | #[error("LZO error")] 21 | LZO(#[from] minilzo::Error), 22 | #[error("MessagePack decode error")] 23 | RmpDE(#[from] rmp_serde::decode::Error), 24 | #[error("MessagePack encode error")] 25 | RmpEN(#[from] rmp_serde::encode::Error), 26 | #[error("Cannot acquire lock")] 27 | Lock(#[from] nix::Error), 28 | } 29 | 30 | type Result = std::result::Result; 31 | 32 | #[derive(Debug, PartialOrd, Clone, Serialize, Deserialize)] 33 | pub struct CacheLine { 34 | pub ctime: i64, 35 | pub ctime_nsec: u8, 36 | pub refs: Vec, 37 | #[serde(skip)] 38 | pub used: bool, 39 | } 40 | 41 | impl PartialEq for CacheLine { 42 | fn eq(&self, other: &CacheLine) -> bool { 43 | self.ctime == other.ctime && self.ctime_nsec == other.ctime_nsec && self.refs == other.refs 44 | } 45 | } 46 | 47 | impl CacheLine { 48 | pub fn new(ctime: i64, ctime_nsec: u8, refs: &[PathBuf]) -> Self { 49 | Self { 50 | ctime, 51 | ctime_nsec, 52 | refs: refs.to_vec(), 53 | used: true, 54 | } 55 | } 56 | } 57 | 58 | /// Creates or opens a file with an exclusive flock 59 | pub fn open_locked>(path: P) -> Result { 60 | let f = fs::OpenOptions::new() 61 | .read(true) 62 | .write(true) 63 | .create(true) 64 | .truncate(false) 65 | .open(&path)?; 66 | fcntl::flock(f.as_raw_fd(), fcntl::FlockArg::LockExclusiveNonblock)?; 67 | Ok(f) 68 | } 69 | 70 | /// Persistent cache data structure. Maps inode numbers to cache lines. 71 | #[derive(Debug, Default, PartialEq, Clone, Serialize, Deserialize)] 72 | pub struct CacheMap { 73 | map: FnvHashMap, 74 | } 75 | 76 | impl CacheMap { 77 | #[allow(dead_code)] 78 | pub fn new() -> Self { 79 | Self::default() 80 | } 81 | 82 | /// Reads a cache file into a CacheMap structure 83 | pub fn load>(file: &mut fs::File, filename: P) -> Result { 84 | let mut compr = Vec::new(); 85 | file.seek(io::SeekFrom::Start(0))?; 86 | file.read_to_end(&mut compr)?; 87 | match minilzo::decompress(&compr, compr.len() * 10) 88 | .map_err(Error::from) 89 | .and_then(|data| decode::from_slice(&data).map_err(Error::from)) 90 | { 91 | Ok(cachemap) => Ok(cachemap), 92 | Err(err) => { 93 | warn!( 94 | "Problem while trying to load cache from {}: {} - continuing with empty cache", 95 | p2s(&filename), 96 | err 97 | ); 98 | Ok(Self::default()) 99 | } 100 | } 101 | } 102 | 103 | /// Writes a CacheMap structure into an open file 104 | pub fn save(&self, file: &mut fs::File) -> Result<()> { 105 | file.seek(io::SeekFrom::Start(0))?; 106 | file.set_len(0)?; 107 | Ok(file.write_all(&minilzo::compress(&encode::to_vec(self)?)?)?) 108 | } 109 | } 110 | 111 | impl Deref for CacheMap { 112 | type Target = FnvHashMap; 113 | 114 | fn deref(&self) -> &FnvHashMap { 115 | &self.map 116 | } 117 | } 118 | 119 | impl DerefMut for CacheMap { 120 | fn deref_mut(&mut self) -> &mut FnvHashMap { 121 | &mut self.map 122 | } 123 | } 124 | 125 | #[cfg(test)] 126 | mod tests { 127 | use super::*; 128 | use crate::tests::FIXTURES; 129 | use tempfile::TempDir; 130 | 131 | #[test] 132 | fn cacheline_should_compare_regardless_of_used_flag() { 133 | assert_eq!( 134 | CacheLine { 135 | ctime: 1, 136 | ctime_nsec: 2, 137 | refs: vec![], 138 | used: true, 139 | }, 140 | CacheLine { 141 | ctime: 1, 142 | ctime_nsec: 2, 143 | refs: vec![], 144 | used: false, 145 | } 146 | ) 147 | } 148 | 149 | fn dummy_cachemap() -> CacheMap { 150 | let mut cm = FnvHashMap::default(); 151 | cm.insert(1, CacheLine::new(10, 11, &[PathBuf::from("/nix/ref1")][..])); 152 | cm.insert( 153 | 2, 154 | CacheLine::new( 155 | 20, 156 | 21, 157 | &[PathBuf::from("/nix/ref1"), PathBuf::from("/nix/ref2")][..], 158 | ), 159 | ); 160 | CacheMap { map: cm } 161 | } 162 | 163 | #[test] 164 | fn save_should_create_file() { 165 | let tempdir = TempDir::new().expect("failed to create tempdir"); 166 | let filename = tempdir.path().join("cache"); 167 | { 168 | let mut f = open_locked(&filename).unwrap(); 169 | assert!(dummy_cachemap().save(&mut f).is_ok()); 170 | } 171 | assert!(fs::metadata(&filename).unwrap().len() > 0); 172 | } 173 | 174 | #[test] 175 | fn load_should_decompress_cachefile() { 176 | let tempdir = TempDir::new().expect("failed to create tempdir"); 177 | let filename = tempdir.path().join("cache.ok"); 178 | fs::copy(FIXTURES.join("cache.mp"), &filename).unwrap(); 179 | let mut f = open_locked(&filename).unwrap(); 180 | let cm = CacheMap::load(&mut f, &filename).unwrap(); 181 | assert_eq!(12, cm.map.len()); 182 | } 183 | 184 | #[test] 185 | fn load_should_ignore_broken_cachefile() { 186 | let tempdir = TempDir::new().expect("failed to create tempdir"); 187 | let filename = tempdir.path().join("cache.truncated"); 188 | fs::copy(FIXTURES.join("cache.mp"), &filename).unwrap(); 189 | let mut f = open_locked(&filename).unwrap(); 190 | f.set_len(500).unwrap(); 191 | let cm = CacheMap::load(&mut f, &filename).expect("should ignore truncated cache file"); 192 | assert_eq!(cm.map.len(), 0); 193 | } 194 | } 195 | -------------------------------------------------------------------------------- /src/errors.rs: -------------------------------------------------------------------------------- 1 | use crate::cachemap; 2 | use std::io; 3 | use std::path::PathBuf; 4 | use thiserror::Error; 5 | use users::uid_t; 6 | use zip::result::ZipError; 7 | 8 | #[derive(Debug, Error)] 9 | pub enum UErr { 10 | #[error("internal: abort directory walk")] 11 | WalkAbort, 12 | #[error("DirEntry for '{0}' does not contain metadata; cannot process")] 13 | DentNoMetadata(PathBuf), 14 | #[error("Cache limit {0} exceeded")] 15 | CacheFull(usize), 16 | #[error("File has an unknown file type - don't know how to handle that")] 17 | FiletypeUnknown, 18 | #[error("Failed to locate UID {0} in passwd database")] 19 | UnknownUser(uid_t), 20 | #[error("Failed to unpack ZIP archive '{0}': {1}")] 21 | ZIP(PathBuf, #[source] ZipError), 22 | #[error("Cannot determine current user. Who am I?")] 23 | WhoAmI, 24 | #[error("startdir must be an absolute path")] 25 | Relative, 26 | #[error("Directory traversal error")] 27 | Traverse(#[from] ignore::Error), 28 | #[error("Failed to create '{0}'")] 29 | Create(PathBuf, #[source] io::Error), 30 | #[error("Failed to remove '{0}'")] 31 | Remove(PathBuf, #[source] io::Error), 32 | #[error("Failed to read link '{0}'")] 33 | ReadLink(PathBuf, #[source] io::Error), 34 | #[error("Failed to determine current directory")] 35 | CWD(#[source] io::Error), 36 | #[error("Failed to load cache from '{0}'")] 37 | LoadCache(PathBuf, #[source] cachemap::Error), 38 | #[error("Failed to save cache to '{0}'")] 39 | SaveCache(PathBuf, #[source] cachemap::Error), 40 | #[error("I/O error")] 41 | IO(#[from] io::Error), 42 | #[error("Operating system error")] 43 | Nix(#[from] nix::Error), 44 | } 45 | 46 | pub type Result = ::std::result::Result; 47 | -------------------------------------------------------------------------------- /src/main.rs: -------------------------------------------------------------------------------- 1 | #![recursion_limit = "256"] 2 | 3 | #[macro_use] 4 | extern crate clap; 5 | #[macro_use] 6 | extern crate log; 7 | 8 | mod cachemap; 9 | mod errors; 10 | mod output; 11 | mod registry; 12 | mod scan; 13 | mod statistics; 14 | mod storepaths; 15 | mod system; 16 | #[cfg(test)] 17 | mod tests; 18 | mod walk; 19 | 20 | use anyhow::{Context, Result}; 21 | use bytesize::ByteSize; 22 | use errors::UErr; 23 | use ignore::overrides::OverrideBuilder; 24 | use ignore::WalkBuilder; 25 | use lazy_static::lazy_static; 26 | use nix::unistd::{geteuid, getuid}; 27 | use output::{p2s, Output}; 28 | use registry::{GCRoots, NullGCRoots, Register}; 29 | use statistics::Statistics; 30 | use std::fs; 31 | use std::ops::DerefMut; 32 | use std::path::PathBuf; 33 | use storepaths::Cache; 34 | use structopt::StructOpt; 35 | use users::os::unix::UserExt; 36 | 37 | static STORE: &str = "/nix/store/"; 38 | static GC_PREFIX: &str = "/nix/var/nix/gcroots/per-user"; 39 | static DOTEXCLUDE: &str = ".userscan-ignore"; 40 | 41 | fn add_dotexclude(mut wb: WalkBuilder, u: &U) -> Result { 42 | if let Some(me) = u.get_user_by_uid(u.get_effective_uid()) { 43 | let candidate = me.home_dir().join(DOTEXCLUDE); 44 | if candidate.exists() { 45 | if let Some(err) = wb.add_ignore(&candidate) { 46 | warn!("Invlid entry in ignore file {}: {}", p2s(candidate), err); 47 | } 48 | } 49 | Ok(wb) 50 | } else { 51 | Err(UErr::UnknownUser(u.get_effective_uid()).into()) 52 | } 53 | } 54 | 55 | #[derive(Debug, Clone, Default)] 56 | pub struct App { 57 | opt: Opt, 58 | output: Output, 59 | overrides: Vec, 60 | register: bool, 61 | exectx: system::ExecutionContext, 62 | } 63 | 64 | impl App { 65 | /// WalkBuilder configured according to the cmdline arguments 66 | fn walker(&self) -> Result { 67 | let startdir = self.startdir()?; 68 | let mut ov = OverrideBuilder::new(&startdir); 69 | for o in &self.overrides { 70 | let _ = ov.add(o)?; 71 | } 72 | 73 | let mut wb = WalkBuilder::new(startdir); 74 | wb.parents(false) 75 | .git_global(false) 76 | .git_ignore(false) 77 | .ignore(false) 78 | .overrides(ov.build()?) 79 | .hidden(false); 80 | for p in &self.opt.excludefrom { 81 | if let Some(err) = wb.add_ignore(p) { 82 | warn!("Problem with ignore file {}: {}", p2s(p), err); 83 | } 84 | } 85 | add_dotexclude(wb, &users::cache::UsersCache::new()) 86 | } 87 | 88 | fn scanner(&self) -> Result { 89 | let mut ob = OverrideBuilder::new(&self.opt.startdir); 90 | for glob in &self.opt.unzip { 91 | ob.add(glob)?; 92 | } 93 | let baseline = probes::load::read()?.fifteen; 94 | let max_load = match self.opt.load_increase { 95 | inc if inc <= 0.0 => 0.0, 96 | inc => baseline + inc * num_cpus::get() as f32, 97 | }; 98 | debug!("Baseline load: {}, limit: {}", baseline, max_load); 99 | Ok(scan::Scanner::new( 100 | self.opt.quickcheck, 101 | ob.build()?, 102 | max_load, 103 | )) 104 | } 105 | 106 | fn gcroots(&self) -> Result> { 107 | if self.opt.register { 108 | Ok(Box::new(GCRoots::new( 109 | GC_PREFIX, 110 | self.startdir()?, 111 | &self.output, 112 | )?)) 113 | } else { 114 | Ok(Box::new(NullGCRoots::new(&self.output))) 115 | } 116 | } 117 | 118 | fn cache(&self) -> Result { 119 | let cache = Cache::new(self.opt.cache_limit); 120 | if let Some(ref f) = self.opt.cache { 121 | cache.open(f, &self.exectx) 122 | } else { 123 | Ok(cache) 124 | } 125 | } 126 | 127 | fn statistics(&self) -> Statistics { 128 | Statistics::new(self.opt.statistics, self.output.list) 129 | } 130 | 131 | /// Normalized directory where scanning starts. 132 | /// 133 | /// Don't use this for user messages, they should print out `self.opt.startdir` instead. 134 | fn startdir(&self) -> Result { 135 | self.opt 136 | .startdir 137 | .canonicalize() 138 | .with_context(|| format!("start dir {} is not accessible", p2s(&self.opt.startdir))) 139 | } 140 | 141 | /// The Metadata entry of the start directory. 142 | /// 143 | /// Needed for crossdev detection. 144 | fn start_meta(&self) -> Result { 145 | fs::metadata(self.startdir()?).map_err(|e| e.into()) 146 | } 147 | 148 | /// Main entry point 149 | pub fn run(&self) -> Result { 150 | self.output.log_init(); 151 | debug!("uid: {}, euid: {}", getuid(), geteuid()); 152 | match walk::spawn_threads(self, self.gcroots()?.deref_mut())?.softerrors() { 153 | 0 => Ok(0), 154 | _ => Ok(1), 155 | } 156 | } 157 | } 158 | 159 | impl From for App { 160 | fn from(opt: Opt) -> Self { 161 | let output = Output::from(&opt); 162 | let mut overrides = vec![]; 163 | overrides.extend(opt.exclude.iter().map(|e| format!("!{}", e))); 164 | overrides.extend(opt.include.iter().map(|i| i.to_owned())); 165 | let register = opt.register || !opt.list; 166 | App { 167 | opt, 168 | output, 169 | overrides, 170 | register, 171 | ..Self::default() 172 | } 173 | } 174 | } 175 | 176 | lazy_static! { 177 | static ref AFTER_HELP: String = format!( 178 | "Ignore globs are always loaded from ~/{} if it exists. For the format of all ignore \ 179 | files refer to the gitignore(5) man page.", 180 | DOTEXCLUDE 181 | ); 182 | } 183 | 184 | fn parse_kb(arg: &str) -> Result { 185 | let n = arg.parse()?; 186 | Ok(ByteSize::kib(n)) 187 | } 188 | 189 | #[derive(StructOpt, Debug, Clone, Default)] 190 | #[structopt( 191 | author = "© Flying Circus Internet Operations GmbH and contributors.", 192 | after_help = AFTER_HELP.as_str() 193 | )] 194 | struct Opt { 195 | /// Starts scan in DIRECTORY 196 | #[structopt(value_name = "DIRECTORY", parse(from_os_str))] 197 | startdir: PathBuf, 198 | /// Only prints Nix store references while scanning (doesn't register) 199 | /// 200 | /// GC roots are not registered when this option is active. Specify -r/--register in addition 201 | /// to get both listing and registration. 202 | #[structopt(short, long, display_order(1))] 203 | list: bool, 204 | /// Registers references (enabled by default if --list if not given) 205 | #[structopt(short, long, display_order(2))] 206 | register: bool, 207 | /// Keeps results between runs in FILE 208 | /// 209 | /// Caches scan results in FILE to avoid re-scanning unchanged files. The cache is kept as a 210 | /// compressed messagepack file. 211 | #[structopt(short, long, value_name = "FILE", parse(from_os_str))] 212 | cache: Option, 213 | /// Limits cache to N entries 214 | /// 215 | /// Aborts program execution when trying to store more than N entries in the cache. This helps 216 | /// to limit memory consumption. 217 | #[structopt(short = "L", long, value_name = "N")] 218 | cache_limit: Option, 219 | /// Prints each file with references on a single line 220 | #[structopt(short = "1", long)] 221 | oneline: bool, 222 | /// Funky colorful output 223 | /// 224 | /// Enables colored output. If set to "auto", color is on if run in a terminal. 225 | #[structopt(short = "C", long, value_name = "WHEN", default_value = "auto", 226 | possible_values(&["always", "never", "auto"]) 227 | )] 228 | color: String, 229 | /// Prints detailed statistics like scans per file type 230 | #[structopt(short = "S", long = "stats", alias = "statistics")] 231 | statistics: bool, 232 | /// Displays additional output like scan times 233 | #[structopt(short, long)] 234 | verbose: bool, 235 | /// Shows every file opened and lots of other stuff (implies --verbose) 236 | #[structopt(short, long, display_order(100))] 237 | debug: bool, 238 | /// Scans only the first SIZE kB of a file 239 | /// 240 | /// Gives up if no Nix store references are found in the first SIZE kilobytes of a file. 241 | /// Assumes that at least one Nix store reference is located near the beginning. Speeds up 242 | /// scanning large files considerably. 243 | #[structopt(short, long, default_value = "512", value_name = "SIZE", 244 | parse(try_from_str = parse_kb))] 245 | quickcheck: ByteSize, 246 | /// Skips files matching GLOB 247 | /// 248 | /// Skips files matching GLOB. May be given multiple times. 249 | #[structopt(short, long, value_name = "GLOB", number_of_values(1))] 250 | exclude: Vec, 251 | /// Scans only files matching GLOB 252 | /// 253 | /// Scans only files matching GLOB. may be given multiple times. note including individual 254 | /// files shows no effect if their containing directory is matched by an exclude glob. 255 | #[structopt(short, long, value_name = "GLOB", number_of_values(1))] 256 | include: Vec, 257 | /// Loads exclude globs from FILE 258 | /// 259 | /// Loads exclude globs from FILE, which is expected to be in .gitignore format. May be given 260 | /// multiple times. 261 | #[structopt( 262 | short = "E", 263 | long, 264 | value_name = "FILE", 265 | number_of_values(1), 266 | parse(from_os_str) 267 | )] 268 | excludefrom: Vec, 269 | /// Scans inside ZIP archives for files matching GLOB 270 | /// 271 | /// Unpacks all files with matching GLOB as ZIP archives and scans inside. Accepts a 272 | /// comma-separated list of glob patterns [example: *.zip,*.egg]. 273 | #[structopt(short, long, use_delimiter(true))] 274 | unzip: Vec, 275 | /// Pauses scanning if the current load1 goes over load15+L 276 | /// 277 | /// The baseline is determined at program startup. If there are multiple CPUs present, 278 | /// the increase is granted per CPU. Use '0.0' to disable. 279 | #[structopt( 280 | short = "p", 281 | long = "pause-load", 282 | default_value = "0.5", 283 | value_name = "L" 284 | )] 285 | load_increase: f32, 286 | } 287 | 288 | fn main() { 289 | let app = App::from(Opt::from_args()); 290 | match app.run() { 291 | Err(ref err) => { 292 | error!("{:#?}", err); 293 | std::process::exit(2) 294 | } 295 | Ok(exitcode) => std::process::exit(exitcode), 296 | } 297 | } 298 | 299 | #[cfg(test)] 300 | pub mod test { 301 | use super::*; 302 | 303 | fn app(opts: &[&str]) -> App { 304 | let mut argv = vec!["userscan"]; 305 | argv.extend_from_slice(opts); 306 | argv.push("dir"); 307 | App::from(Opt::from_iter(&argv)) 308 | } 309 | 310 | #[test] 311 | fn overrides_should_be_collected_and_prefixed() { 312 | let a = app(&["-i", "glob1", "-e", "glob2", "-i", "glob3"]); 313 | assert_eq!(vec!["!glob2", "glob1", "glob3"], a.overrides); 314 | } 315 | 316 | #[test] 317 | fn list_should_disable_register() { 318 | let a = app(&[]); 319 | assert!(!a.output.list); 320 | assert!(a.register); 321 | 322 | let a = app(&["--list"]); 323 | assert!(a.output.list); 324 | assert!(!a.register); 325 | 326 | let a = app(&["--list", "--register"]); 327 | assert!(a.output.list); 328 | assert!(a.register); 329 | } 330 | } 331 | -------------------------------------------------------------------------------- /src/output.rs: -------------------------------------------------------------------------------- 1 | use crate::storepaths::StorePaths; 2 | use crate::{Opt, STORE}; 3 | 4 | use atty::{self, Stream}; 5 | use colored::{self, ColoredString, Colorize}; 6 | use env_logger::Builder; 7 | use log::{Level, LevelFilter}; 8 | use std::io; 9 | use std::io::prelude::*; 10 | use std::path::Path; 11 | use std::time::Duration; 12 | 13 | #[derive(Debug, Clone, PartialEq)] 14 | pub struct Output { 15 | pub level: LevelFilter, 16 | pub oneline: bool, 17 | pub color: bool, 18 | pub list: bool, 19 | } 20 | 21 | impl Default for Output { 22 | fn default() -> Self { 23 | Output { 24 | level: LevelFilter::Off, 25 | oneline: false, 26 | color: false, 27 | list: false, 28 | } 29 | } 30 | } 31 | 32 | impl Output { 33 | pub fn new(verbose: bool, debug: bool, oneline: bool, color: &str, list: bool) -> Output { 34 | Output { 35 | level: match (verbose, debug) { 36 | (_, true) => LevelFilter::Debug, 37 | (true, _) => LevelFilter::Info, 38 | _ => LevelFilter::Warn, 39 | }, 40 | color: match color { 41 | "always" => true, 42 | "never" => false, 43 | _ => atty::is(Stream::Stdout) && atty::is(Stream::Stderr), 44 | }, 45 | oneline, 46 | list, 47 | } 48 | } 49 | 50 | /// Sets up logging with colored output if requested. 51 | pub fn log_init(&self) { 52 | colored::control::set_override(self.color); 53 | Builder::new() 54 | .format(|buf, r| match r.level() { 55 | Level::Error => { 56 | writeln!(buf, "{}: {}", r.level().to_string().red().bold(), r.args()) 57 | } 58 | Level::Warn => writeln!(buf, "{}: {}", r.level().to_string().yellow(), r.args()), 59 | Level::Info => writeln!(buf, "{}", r.args()), 60 | _ => writeln!(buf, "{}", r.args().to_string().blue()), 61 | }) 62 | .filter(None, self.level) 63 | .init(); 64 | } 65 | 66 | /// Outputs the name of a scanned file together with the store paths found inside. 67 | /// 68 | /// Depending on the desired output format the files are either space- or newline-separated. 69 | pub fn write_store_paths(&self, w: &mut dyn Write, sp: &StorePaths) -> io::Result<()> { 70 | let filename = format!( 71 | "{}{}", 72 | sp.path().display(), 73 | if self.oneline { ":" } else { "" } 74 | ); 75 | write!(w, "{}", filename.purple().bold())?; 76 | let sep = if self.oneline { " " } else { "\n" }; 77 | for r in sp.iter_refs() { 78 | write!(w, "{}{}{}", sep, STORE, r.display())? 79 | } 80 | writeln!(w, "{}", if self.oneline { "" } else { "\n" }) 81 | } 82 | 83 | #[inline] 84 | pub fn print_store_paths(&self, sp: &StorePaths) { 85 | if !self.list { 86 | return; 87 | } 88 | let w = io::stdout(); 89 | let mut w = io::BufWriter::new(w.lock()); 90 | self.write_store_paths(&mut w, sp).ok(); 91 | } 92 | } 93 | 94 | impl<'a> From<&'a Opt> for Output { 95 | fn from(opt: &'a Opt) -> Self { 96 | Output::new(opt.verbose, opt.debug, opt.oneline, &opt.color, opt.list) 97 | } 98 | } 99 | 100 | /// Path to String with coloring 101 | pub fn p2s>(path: P) -> ColoredString { 102 | path.as_ref().display().to_string().green() 103 | } 104 | 105 | /// Duration to seconds 106 | /// 107 | /// Converts a `time::Duration` value into a floating-point seconds value. 108 | pub fn d2s(d: Duration) -> f32 { 109 | d.as_secs() as f32 + (d.subsec_nanos() as f32) / 1e9 110 | } 111 | 112 | #[cfg(test)] 113 | mod tests { 114 | use super::*; 115 | 116 | #[test] 117 | fn color_default_argument() { 118 | let o = Output::new(false, false, false, "never", false); 119 | assert!(!o.color); 120 | 121 | let o = Output::new(false, false, false, "always", false); 122 | assert!(o.color); 123 | } 124 | } 125 | -------------------------------------------------------------------------------- /src/registry.rs: -------------------------------------------------------------------------------- 1 | use super::STORE; 2 | use crate::errors::*; 3 | use crate::output::{p2s, Output}; 4 | use crate::storepaths::StorePaths; 5 | use crate::system::ExecutionContext; 6 | 7 | use colored::Colorize; 8 | use ignore::{self, DirEntry, WalkBuilder}; 9 | use std::collections::HashSet; 10 | use std::env; 11 | use std::ffi::OsStr; 12 | use std::fs; 13 | use std::io; 14 | use std::os::unix::fs::symlink; 15 | use std::os::unix::prelude::*; 16 | use std::path::{Path, PathBuf}; 17 | use std::result; 18 | use std::sync::mpsc; 19 | use users::get_current_username; 20 | 21 | pub type GCRootsTx = mpsc::Sender; 22 | pub type GCRootsRx = mpsc::Receiver; 23 | 24 | #[derive(Debug, Default)] 25 | pub struct GCRoots { 26 | prefix: PathBuf, // /nix/var/nix/gcroots/profiles/per-user/$USER 27 | topdir: PathBuf, // e.g., $PREFIX/srv/www if /srv/www was scanned 28 | cwd: PathBuf, // current dir when the scan was started 29 | todo: Vec, 30 | seen: HashSet, 31 | output: Output, 32 | } 33 | 34 | /// IPC endpoint for garbage collection roots registry 35 | pub trait Register { 36 | /// Receives stream of store paths via the `rx` channel. Returns on channel close. 37 | fn register_loop(&mut self, rx: GCRootsRx); 38 | 39 | /// Creates links for all registered store paths and cleans up unused ones. 40 | fn commit(&mut self, _ctx: &ExecutionContext) -> Result<()> { 41 | Ok(()) 42 | } 43 | } 44 | 45 | impl GCRoots { 46 | /// Creates Nix garbage collector handler, with `peruser` as user-level gc root (usually 47 | /// /nix/var/nix/gcroots/per-user) and `startdir` as initial scan dir (e.g., /home/user). 48 | pub fn new>(peruser: &str, startdir: P, output: &Output) -> Result { 49 | let user = match get_current_username() { 50 | Some(u) => u, 51 | None => return Err(UErr::WhoAmI), 52 | }; 53 | let prefix = Path::new(peruser).join(&user); 54 | let cwd = env::current_dir().map_err(UErr::CWD)?; 55 | Ok(GCRoots { 56 | topdir: prefix.join( 57 | startdir 58 | .as_ref() 59 | .strip_prefix("/") 60 | .map_err(|_| UErr::Relative)?, 61 | ), 62 | prefix, 63 | cwd, 64 | output: output.to_owned(), 65 | ..GCRoots::default() 66 | }) 67 | } 68 | } 69 | 70 | impl Register for GCRoots { 71 | fn register_loop(&mut self, rx: GCRootsRx) { 72 | for sp in rx { 73 | self.output.print_store_paths(&sp); 74 | self.todo.push(sp) 75 | } 76 | } 77 | 78 | fn commit(&mut self, ctx: &ExecutionContext) -> Result<()> { 79 | // Create `prefix` (/nix/var/nix/gcroots/per-user/$USER) on a best-effort basis before 80 | // dropping privileges. Failure may be or may be not a problem here, so defer error 81 | // handling to RegistryWorker::link later on. 82 | fs::create_dir(&self.prefix).ok(); 83 | nix::unistd::chown(&self.prefix, Some(ctx.uid), Some(ctx.gid)).ok(); 84 | ctx.with_dropped_privileges(|| { 85 | let mut worker = RegistryWorker::new(&self.prefix, &self.cwd); 86 | let cleaned = worker.cleanup(&self.topdir)?; 87 | let registered = self 88 | .todo 89 | .iter() 90 | .map(|sp| worker.register(sp)) 91 | .sum::>()?; 92 | info!( 93 | "{} references in {}", 94 | self.seen.len().to_string().cyan(), 95 | p2s(&self.topdir) 96 | ); 97 | if registered > 0 || cleaned > 0 { 98 | info!( 99 | "newly registered: {}, cleaned: {}", 100 | registered.to_string().green(), 101 | cleaned.to_string().purple() 102 | ); 103 | } 104 | Ok(()) 105 | }) 106 | } 107 | } 108 | 109 | fn extract_hash(path: &Path) -> &[u8] { 110 | &path.as_os_str().as_bytes()[..32] 111 | } 112 | 113 | #[derive(Debug)] 114 | pub struct RegistryWorker<'a> { 115 | prefix: &'a Path, 116 | cwd: &'a Path, 117 | seen: HashSet, 118 | } 119 | 120 | impl<'a> RegistryWorker<'a> { 121 | /// `prefix` - e.g. /nix/var/nix/gcroots/profiles/per-user/$USER 122 | /// `cwd` - directory where the scan was started 123 | fn new(prefix: &'a Path, cwd: &'a Path) -> Self { 124 | Self { 125 | prefix, 126 | cwd, 127 | seen: HashSet::new(), 128 | } 129 | } 130 | 131 | /// Removes dangling symlinks below `topdir` 132 | fn cleanup(&self, topdir: &Path) -> Result { 133 | if !topdir.exists() { 134 | return Ok(0); 135 | } 136 | WalkBuilder::new(topdir) 137 | .hidden(false) 138 | .ignore(false) 139 | .build() 140 | .map(|res: result::Result| { 141 | let dent = res?; 142 | let path = dent.path(); 143 | match dent.file_type() { 144 | Some(ft) if ft.is_dir() => { 145 | if fs::remove_dir(path).is_ok() { 146 | debug!("removing empty dir {}", path.display()) 147 | } 148 | Ok(0) 149 | } 150 | Some(ft) if ft.is_symlink() => { 151 | if self.seen.contains(path) { 152 | Ok(0) 153 | } else { 154 | info!("removing link {}", p2s(&path)); 155 | fs::remove_file(path)?; 156 | Ok(1) 157 | } 158 | } 159 | _ => Ok(0), 160 | } 161 | }) 162 | .sum() 163 | } 164 | 165 | /// Determines exactly where a GC link should live. 166 | fn gc_link_dir>(&self, scanned: P) -> PathBuf { 167 | let dir = scanned.as_ref().parent().unwrap_or_else(|| Path::new(".")); 168 | self.prefix 169 | .join(self.cwd.join(dir).strip_prefix("/").unwrap()) 170 | } 171 | 172 | fn create_link(&mut self, dir: &Path, linkname: PathBuf, target: &Path) -> Result { 173 | info!("creating link {}", p2s(&linkname)); 174 | fs::create_dir_all(dir).map_err(|e| UErr::Create(dir.to_owned(), e))?; 175 | symlink(target, &linkname).map_err(|e| UErr::Create(linkname.to_owned(), e))?; 176 | self.seen.insert(linkname); 177 | Ok(1) 178 | } 179 | 180 | /// Creates or updates a single GC link. 181 | /// 182 | /// `target` is assumed to be without leading `/nix/store/` prefix. 183 | fn link, T: AsRef>(&mut self, dir: P, target: T) -> Result { 184 | let linkname = dir 185 | .as_ref() 186 | .join(&OsStr::from_bytes(extract_hash(target.as_ref()))); 187 | let target = Path::new(STORE).join(target); 188 | if self.seen.contains(&linkname) { 189 | return Ok(0); 190 | } 191 | match fs::read_link(&linkname) { 192 | Ok(ref p) => { 193 | if *p == *target { 194 | self.seen.insert(linkname); 195 | Ok(0) 196 | } else { 197 | fs::remove_file(&linkname).map_err(|e| UErr::Remove(linkname.to_owned(), e))?; 198 | self.create_link(dir.as_ref(), linkname, &target) 199 | } 200 | } 201 | Err(e) => match e.kind() { 202 | io::ErrorKind::NotFound => self.create_link(dir.as_ref(), linkname, &target), 203 | _ => Err(e).map_err(|e| UErr::ReadLink(linkname.to_owned(), e)), 204 | }, 205 | } 206 | } 207 | 208 | /// Registers all Nix store paths with the garbage collector. 209 | fn register(&mut self, sp: &StorePaths) -> Result { 210 | let dir = self.gc_link_dir(sp.path()); 211 | sp.iter_refs().map(|p| self.link(dir.as_path(), p)).sum() 212 | } 213 | } 214 | 215 | #[derive(Debug, Default)] 216 | pub struct NullGCRoots { 217 | output: Output, 218 | rx: Option, 219 | } 220 | 221 | impl NullGCRoots { 222 | pub fn new(output: &Output) -> Self { 223 | NullGCRoots { 224 | output: output.clone(), 225 | rx: None, 226 | } 227 | } 228 | } 229 | 230 | impl Register for NullGCRoots { 231 | fn register_loop(&mut self, rx: GCRootsRx) { 232 | for storepaths in rx { 233 | self.output.print_store_paths(&storepaths); 234 | } 235 | } 236 | } 237 | 238 | #[cfg(test)] 239 | pub mod tests { 240 | use super::*; 241 | use crate::tests::FIXTURES; 242 | 243 | use std::env; 244 | use std::fs::read_dir; 245 | use std::sync::mpsc::channel; 246 | use tempfile::TempDir; 247 | 248 | fn _gcroots() -> (TempDir, GCRoots) { 249 | let tempdir = TempDir::new().expect("failed to create gcroots tempdir"); 250 | let mut gc = GCRoots::new("/", Path::new("/"), &Output::default()).unwrap(); 251 | gc.prefix = tempdir.path().to_owned(); 252 | gc.topdir = PathBuf::from("/home/user/www"); 253 | gc.cwd = PathBuf::from("/home/user"); 254 | (tempdir, gc) 255 | } 256 | 257 | fn _worker(tempdir: &TempDir) -> RegistryWorker { 258 | RegistryWorker::new(tempdir.path(), Path::new("/home/user")) 259 | } 260 | 261 | fn is_symlink(p: &Path) -> bool { 262 | fs::symlink_metadata(p) 263 | .expect(&format!("symlink {} does not exist", p.display())) 264 | .file_type() 265 | .is_symlink() 266 | } 267 | 268 | #[test] 269 | fn linkdir() { 270 | let td = TempDir::new().unwrap(); 271 | let w = _worker(&td); 272 | assert_eq!(td.path().join("home/user"), w.gc_link_dir("file2")); 273 | assert_eq!( 274 | td.path().join("home/user/www/d"), 275 | w.gc_link_dir("/home/user/www/d/file1") 276 | ); 277 | assert_eq!(td.path().join("home/user/rel"), w.gc_link_dir("rel/file3")); 278 | } 279 | 280 | #[test] 281 | fn should_create_link() { 282 | let td = TempDir::new().unwrap(); 283 | let mut w = _worker(&td); 284 | let storepath = Path::new("gmy86w4020xzjw9s8qzzz0bgx8ldkhhk-e34kjk"); 285 | let expected = td.path().join("gmy86w4020xzjw9s8qzzz0bgx8ldkhhk"); 286 | assert_eq!(w.link(td.path(), storepath).expect("link 1 failed"), 1); 287 | assert!(is_symlink(&expected)); 288 | assert!(w.seen.contains(&expected)); 289 | // second attempt: do nothing 290 | assert_eq!(w.link(td.path(), storepath).expect("link 2 failed"), 0); 291 | } 292 | 293 | #[test] 294 | fn create_link_should_create_dir() { 295 | let td = TempDir::new().unwrap(); 296 | let mut w = _worker(&td); 297 | assert!(fs::metadata(td.path().join("d1")).is_err()); 298 | assert_eq!( 299 | w.link(td.path().join("d1"), "gmy86w4020xzjw9s8qzzz0bgx8ldkhhk-e") 300 | .unwrap(), 301 | 1 302 | ); 303 | assert!(fs::metadata(td.path().join("d1")) 304 | .expect("dir d1 not created") 305 | .is_dir()); 306 | } 307 | 308 | #[test] 309 | fn create_link_should_correct_existing_link() { 310 | let td = TempDir::new().unwrap(); 311 | let mut w = _worker(&td); 312 | let link = td.path().join("f0vdg3cb0005ksjb0fd5qs6f56zg2qs5"); 313 | symlink("changeme", &link).unwrap(); 314 | w.link(td.path(), "f0vdg3cb0005ksjb0fd5qs6f56zg2qs5-v") 315 | .unwrap(); 316 | assert_eq!( 317 | PathBuf::from("/nix/store/f0vdg3cb0005ksjb0fd5qs6f56zg2qs5-v"), 318 | fs::read_link(&link).unwrap() 319 | ); 320 | } 321 | 322 | #[test] 323 | fn cleanup_nonexistent_dir_should_succeed() { 324 | let td = TempDir::new().unwrap(); 325 | let w = _worker(&td); 326 | assert_eq!(w.cleanup(&td.path().join("no/such/dir")).unwrap(), 0); 327 | } 328 | 329 | #[test] 330 | fn should_create_links_no_earlier_than_in_commit() -> Result<()> { 331 | let (td, mut gc) = _gcroots(); 332 | let (tx, rx) = channel::(); 333 | let dent = ignore::Walk::new(td.path()).into_iter().next().unwrap()?; 334 | tx.send(StorePaths::new( 335 | dent, 336 | vec![ 337 | PathBuf::from("11111111111111111111111111111111-foo"), 338 | PathBuf::from("22222222222222222222222222222222-bar"), 339 | ], 340 | 1000, 341 | None, 342 | )) 343 | .unwrap(); 344 | drop(tx); 345 | 346 | let contents = |base: &Path| -> Vec { 347 | let mut paths = read_dir(base) 348 | .expect(&format!("failed to read_dir() {}", base.display())) 349 | .into_iter() 350 | .map(|e| e.unwrap().path()) 351 | .collect::>(); 352 | paths.sort(); 353 | paths 354 | }; 355 | 356 | gc.register_loop(rx); 357 | assert!( 358 | contents(td.path()).is_empty(), 359 | "register_loop() should not create links" 360 | ); 361 | 362 | gc.commit(&ExecutionContext::new())?; 363 | let base = td.path().join( 364 | env::temp_dir() 365 | .strip_prefix("/") 366 | .expect("env::temp_dir does not start with '/'"), 367 | ); 368 | assert_eq!( 369 | contents(&base), 370 | &[ 371 | PathBuf::from(base.join("11111111111111111111111111111111")), 372 | PathBuf::from(base.join("22222222222222222222222222222222")), 373 | ], 374 | ); 375 | Ok(()) 376 | } 377 | 378 | /* 379 | * passive GCRoots consumer to test walker/scanner 380 | */ 381 | 382 | #[derive(Debug)] 383 | pub struct FakeGCRoots { 384 | pub registered: Vec, 385 | prefix: PathBuf, 386 | rx: Option, 387 | } 388 | 389 | impl FakeGCRoots { 390 | pub fn new(reldir: &Path) -> Self { 391 | FakeGCRoots { 392 | registered: Vec::new(), 393 | prefix: reldir.canonicalize().unwrap(), 394 | rx: None, 395 | } 396 | } 397 | } 398 | 399 | pub fn fake_gc() -> FakeGCRoots { 400 | FakeGCRoots::new(&*FIXTURES) 401 | } 402 | 403 | impl Register for FakeGCRoots { 404 | fn register_loop(&mut self, rx: GCRootsRx) { 405 | for storepaths in rx { 406 | for r in storepaths.refs() { 407 | let relpath = storepaths.path().strip_prefix(&self.prefix).unwrap(); 408 | self.registered 409 | .push(format!("{}|{}", relpath.display(), r.display())); 410 | } 411 | } 412 | } 413 | } 414 | } 415 | -------------------------------------------------------------------------------- /src/scan.rs: -------------------------------------------------------------------------------- 1 | use crate::errors::*; 2 | use crate::output::p2s; 3 | use crate::storepaths::StorePaths; 4 | 5 | use anyhow::Context; 6 | use anyhow::Result as AResult; 7 | use bytesize::ByteSize; 8 | use ignore::overrides::Override; 9 | use ignore::{DirEntry, Match}; 10 | use lazy_static::lazy_static; 11 | use memmap::Mmap; 12 | use probes::load; 13 | use regex::bytes::Regex; 14 | use std::ffi::OsStr; 15 | use std::fs; 16 | use std::io::Read; 17 | use std::os::unix::prelude::*; 18 | use std::path::PathBuf; 19 | use std::thread::sleep; 20 | use std::time::Duration; 21 | use zip::read::ZipArchive; 22 | 23 | lazy_static! { 24 | static ref STORE_RE: Regex = 25 | Regex::new(r"(?-u)/nix/store/([0-9a-z]{32}-[0-9a-zA-Z+._?=-]+)").unwrap(); 26 | } 27 | 28 | const MIN_STOREREF_LEN: u64 = 45; 29 | 30 | struct ScanResult { 31 | refs: Vec, 32 | meta: fs::Metadata, 33 | bytes_scanned: u64, 34 | } 35 | 36 | #[derive(Debug, Clone)] 37 | pub struct Scanner { 38 | /// Skips the rest of a file if there is no Nix store reference in the first QUICKCHECK bytes. 39 | quickcheck: ByteSize, 40 | /// Unzips files matched by the given globs and scans inside. 41 | unzip: Override, 42 | /// Pauses scanning if the current load1 is higher than this 43 | max_load: f32, 44 | } 45 | 46 | impl Default for Scanner { 47 | fn default() -> Self { 48 | Scanner { 49 | quickcheck: ByteSize::b(0), 50 | unzip: Override::empty(), 51 | max_load: 0.0, 52 | } 53 | } 54 | } 55 | 56 | /// Scans a regular file. 57 | /// 58 | /// Only the first `quickcheck` bytes are considered. The whole file is read if `quickcheck` is 0. 59 | fn scan_regular_quickcheck( 60 | dent: &DirEntry, 61 | meta: fs::Metadata, 62 | quickcheck: u64, 63 | ) -> AResult { 64 | debug!("Scanning {}", dent.path().display()); 65 | let mmap = unsafe { Mmap::map(&fs::File::open(dent.path())?)? }; 66 | if quickcheck > 0 67 | && meta.len() > quickcheck 68 | && twoway::find_bytes(&mmap[0..(quickcheck as usize)], b"/nix/store/").is_none() 69 | { 70 | return Ok(ScanResult { 71 | refs: vec![], 72 | meta, 73 | bytes_scanned: quickcheck, 74 | }); 75 | } 76 | let bytes_scanned = meta.len(); 77 | Ok(ScanResult { 78 | refs: STORE_RE 79 | .captures_iter(&mmap) 80 | .map(|cap| OsStr::from_bytes(&cap[1]).into()) 81 | .collect(), 82 | meta, 83 | bytes_scanned, 84 | }) 85 | } 86 | 87 | fn scan_regular(dent: &DirEntry, quickcheck: ByteSize) -> AResult { 88 | let meta = dent.metadata()?; 89 | if meta.len() < MIN_STOREREF_LEN { 90 | // minimum length to fit a single store reference not reached 91 | let bytes_scanned = meta.len(); 92 | Ok(ScanResult { 93 | refs: vec![], 94 | meta, 95 | bytes_scanned, 96 | }) 97 | } else { 98 | scan_regular_quickcheck(dent, meta, quickcheck.as_u64()) 99 | } 100 | } 101 | 102 | /// Unpacks a ZIP archive on the fly and scans its contents. 103 | fn scan_zip_archive(dent: &DirEntry) -> AResult { 104 | debug!("Scanning ZIP archive {}", dent.path().display()); 105 | let meta = dent.metadata()?; 106 | let mut archive = match ZipArchive::new(fs::File::open(&dent.path())?) { 107 | Ok(a) => a, 108 | Err(e) => return Err(UErr::ZIP(dent.path().to_owned(), e).into()), 109 | }; 110 | let mut buf = Vec::new(); 111 | let mut refs = Vec::new(); 112 | if archive.len() > 1000 || meta.len() > 2 << 20 { 113 | warn!( 114 | "{}: unpacking large ZIP archives may be slow", 115 | p2s(dent.path()) 116 | ); 117 | } 118 | for i in 0..archive.len() { 119 | let mut f = archive 120 | .by_index(i) 121 | .map_err(|e| UErr::ZIP(dent.path().to_owned(), e))?; 122 | f.read_to_end(&mut buf)?; 123 | refs.extend( 124 | STORE_RE 125 | .captures_iter(&buf) 126 | .map(|cap| OsStr::from_bytes(&cap[1]).into()), 127 | ); 128 | } 129 | let bytes_scanned = meta.len(); 130 | Ok(ScanResult { 131 | refs, 132 | meta, 133 | bytes_scanned, 134 | }) 135 | } 136 | 137 | /// Scans the symlink's target name (i.e., readlink() output). 138 | fn scan_symlink(dent: &DirEntry) -> AResult { 139 | debug!("Scanning link {}", dent.path().display()); 140 | let meta = dent.metadata()?; 141 | let target = fs::read_link(dent.path())?; 142 | let len = target.as_os_str().len() as u64; 143 | let refs = match STORE_RE.captures(target.as_os_str().as_bytes()) { 144 | Some(cap) => vec![OsStr::from_bytes(&cap[1]).into()], 145 | None => vec![], 146 | }; 147 | Ok(ScanResult { 148 | refs, 149 | meta, 150 | bytes_scanned: len, 151 | }) 152 | } 153 | 154 | impl Scanner { 155 | pub fn new(quickcheck: ByteSize, unzip: Override, max_load: f32) -> Self { 156 | Scanner { 157 | quickcheck, 158 | unzip, 159 | max_load, 160 | } 161 | } 162 | 163 | /// Scans a thing that has a file type. 164 | /// 165 | /// Returns Some(result) if a scan strategy was found, None otherwise. 166 | fn scan_inode(&self, dent: &DirEntry, ft: fs::FileType) -> Option> { 167 | if ft.is_file() { 168 | if !self.unzip.is_empty() { 169 | if let Match::Whitelist(_) = self.unzip.matched(dent.path(), false) { 170 | return Some(scan_zip_archive(dent)); 171 | } 172 | } 173 | return Some(scan_regular(dent, self.quickcheck)); 174 | } 175 | if ft.is_symlink() { 176 | return Some(scan_symlink(dent)); 177 | } 178 | None 179 | } 180 | 181 | /// Decodes the DirEntry and scans it if feasible. 182 | fn scan(&self, dent: &DirEntry) -> AResult { 183 | match dent.error() { 184 | Some(e) if !e.is_partial() => { 185 | return Err(e.clone()) 186 | .with_context(|| format!("{}: metadata error", p2s(dent.path()))) 187 | } 188 | _ => (), 189 | } 190 | if let Some(ft) = dent.file_type() { 191 | if let Some(res) = self.scan_inode(dent, ft) { 192 | return res.with_context(|| format!("{}: scan failed", p2s(dent.path()))); 193 | } 194 | } 195 | // fall-through: no idea how to handle this DirEntry 196 | Err(UErr::FiletypeUnknown.into()) 197 | } 198 | 199 | fn pause_for_load(&self) { 200 | let mut backoff = Duration::new(1, 0); 201 | loop { 202 | let l1 = load::read().expect("determine system load").one; 203 | if l1 <= self.max_load { 204 | return; 205 | } 206 | debug!("Pausing until load {} is below {}", l1, self.max_load); 207 | sleep(backoff); 208 | if backoff.as_secs() < 30 { 209 | backoff = backoff * 11 / 10; 210 | } 211 | } 212 | } 213 | 214 | /// Scans `dent` and transforms results into a StorePaths object. 215 | pub fn find_paths(&self, dent: DirEntry) -> AResult { 216 | if self.max_load > 0.0 { 217 | self.pause_for_load(); 218 | } 219 | self.scan(&dent).map(|mut r| { 220 | r.refs.sort(); 221 | r.refs.dedup(); 222 | StorePaths::new(dent, r.refs, r.bytes_scanned, Some(r.meta)) 223 | }) 224 | } 225 | } 226 | 227 | #[cfg(test)] 228 | mod tests { 229 | use super::*; 230 | use crate::tests::{assert_eq_vecs, dent, FIXTURES}; 231 | use ignore::overrides::OverrideBuilder; 232 | use std::path::Path; 233 | 234 | #[test] 235 | fn should_not_look_further_than_quickcheck() { 236 | let mut scanner = Scanner::default(); 237 | assert_eq_vecs( 238 | scanner 239 | .find_paths(dent("dir2/lftp.offset")) 240 | .unwrap() 241 | .refs() 242 | .to_vec(), 243 | |path| path.to_string_lossy().into_owned(), 244 | &["q3wx1gab2ysnk5nyvyyg56ana2v4r2ar-glibc-2.24"], 245 | ); 246 | scanner.quickcheck = ByteSize::kib(4); 247 | assert_eq_vecs( 248 | scanner 249 | .find_paths(dent("dir2/lftp.offset")) 250 | .unwrap() 251 | .refs() 252 | .to_vec(), 253 | |path| path.to_string_lossy().into_owned(), 254 | &[], 255 | ); 256 | } 257 | 258 | #[test] 259 | fn should_unpack_eggs() { 260 | let sp = Scanner::default() 261 | .find_paths(dent("miniegg-1-py3.5.egg")) 262 | .unwrap(); 263 | assert!(sp.refs().is_empty()); 264 | 265 | let unzip = OverrideBuilder::new(&*FIXTURES) 266 | .add("*.egg") 267 | .unwrap() 268 | .build() 269 | .unwrap(); 270 | let sp = Scanner::new(ByteSize::default(), unzip, 0.0) 271 | .find_paths(dent("miniegg-1-py3.5.egg")) 272 | .unwrap(); 273 | assert_eq!( 274 | vec![Path::new("76lhp1gvc3wbl6q4p2qgn2n7245imyvr-perl-5.22.3")], 275 | *sp.refs() 276 | ); 277 | assert_eq!(2226, sp.bytes_scanned()); 278 | } 279 | } 280 | -------------------------------------------------------------------------------- /src/statistics.rs: -------------------------------------------------------------------------------- 1 | use crate::output::{d2s, p2s}; 2 | use crate::storepaths::StorePaths; 3 | use atty::{self, Stream}; 4 | use bytesize::ByteSize; 5 | use colored::Colorize; 6 | use std::collections::HashMap; 7 | use std::ffi::OsString; 8 | use std::hash::Hash; 9 | use std::ops::{Add, AddAssign}; 10 | use std::path::Path; 11 | use std::sync::mpsc; 12 | use std::sync::mpsc::channel; 13 | use std::time; 14 | 15 | pub type StatsTx = mpsc::Sender; 16 | 17 | #[derive(Debug, Clone, Default, PartialEq)] 18 | pub struct Pair { 19 | files: usize, 20 | bytes: u64, 21 | } 22 | 23 | impl Pair { 24 | #[allow(unused)] 25 | fn new(files: usize, bytes: u64) -> Self { 26 | Pair { files, bytes } 27 | } 28 | } 29 | 30 | impl Add for Pair { 31 | type Output = Self; 32 | 33 | fn add(self, inc: u64) -> Pair { 34 | Pair { 35 | files: self.files + 1, 36 | bytes: self.bytes + inc, 37 | } 38 | } 39 | } 40 | 41 | impl AddAssign for Pair { 42 | fn add_assign(&mut self, inc: u64) { 43 | self.files += 1; 44 | self.bytes += inc; 45 | } 46 | } 47 | 48 | #[derive(Debug, Clone)] 49 | pub enum StatsMsg { 50 | SoftError, 51 | Scan(File), 52 | } 53 | 54 | #[derive(Debug, Clone)] 55 | pub struct File { 56 | scanned: u64, 57 | ext: OsString, 58 | } 59 | 60 | impl<'a> From<&'a StorePaths> for File { 61 | fn from(sp: &'a StorePaths) -> Self { 62 | let ext = match sp.path().extension() { 63 | Some(ext) => ext.to_os_string(), 64 | None => OsString::from(""), 65 | }; 66 | File { 67 | scanned: sp.bytes_scanned(), 68 | ext, 69 | } 70 | } 71 | } 72 | 73 | fn map2vec(map: &HashMap, cutoff: usize) -> Vec<(usize, u64, T)> 74 | where 75 | T: Eq + Hash + Clone, 76 | { 77 | let mut res = map 78 | .iter() 79 | .map(|e| { 80 | let (k, p): (&T, &Pair) = e; 81 | (p.files, p.bytes, k.clone()) 82 | }) 83 | .collect::>(); 84 | res.sort_by(|a, b| a.0.cmp(&b.0).reverse()); 85 | res.truncate(cutoff); 86 | res 87 | } 88 | 89 | #[derive(Debug)] 90 | pub struct Statistics { 91 | pub softerrors: usize, 92 | pub total: Pair, 93 | by_ext: HashMap, 94 | rx: Option>, 95 | start: time::Instant, 96 | detailed: bool, 97 | progress: bool, 98 | progress_last: u64, 99 | } 100 | 101 | const SHOW_NOT_BEFORE: u64 = 5; 102 | 103 | impl Statistics { 104 | pub fn new(detailed: bool, quiet: bool) -> Self { 105 | Statistics { 106 | softerrors: 0, 107 | total: Pair::default(), 108 | by_ext: HashMap::new(), 109 | rx: None, 110 | start: time::Instant::now(), 111 | detailed, 112 | progress: !quiet && atty::is(Stream::Stderr), 113 | progress_last: SHOW_NOT_BEFORE, 114 | } 115 | } 116 | 117 | pub fn softerrors(&self) -> usize { 118 | self.softerrors 119 | } 120 | 121 | fn process(&mut self, msg: StatsMsg) { 122 | match msg { 123 | StatsMsg::Scan(f) => { 124 | self.total += f.scanned; 125 | if self.detailed { 126 | let by_ext = self.by_ext.entry(f.ext).or_insert_with(Pair::default); 127 | *by_ext += f.scanned; 128 | } 129 | } 130 | StatsMsg::SoftError => self.softerrors += 1, 131 | } 132 | } 133 | 134 | fn print_progress(&mut self) { 135 | let elapsed = self.start.elapsed().as_secs(); 136 | if elapsed > self.progress_last { 137 | let p = format!( 138 | "Scanning in progress... {} files ({} read) in {} s ", 139 | self.total.files, 140 | ByteSize::b(self.total.bytes), 141 | elapsed 142 | ); 143 | eprint!("\r{}", p.purple()); 144 | self.progress_last = elapsed; 145 | } 146 | } 147 | 148 | pub fn receive_loop(&mut self) { 149 | if let Some(rx) = self.rx.take() { 150 | for msg in rx { 151 | self.process(msg); 152 | if self.progress { 153 | self.print_progress(); 154 | } 155 | } 156 | if self.progress && self.progress_last > SHOW_NOT_BEFORE { 157 | eprintln!() 158 | } 159 | }; 160 | } 161 | 162 | pub fn tx(&mut self) -> StatsTx { 163 | let (tx, rx) = channel::(); 164 | self.rx = Some(rx); 165 | tx 166 | } 167 | 168 | pub fn print_details(&self) { 169 | if self.by_ext.len() <= 1 { 170 | return; 171 | } 172 | println!( 173 | "Top 10 scanned file extensions:\n\ 174 | extension #files read" 175 | ); 176 | for (files, bytes, ext) in map2vec(&self.by_ext, 10) { 177 | if !ext.is_empty() { 178 | println!( 179 | "{:-10} {:6} {}", 180 | ext.to_string_lossy(), 181 | files, 182 | ByteSize::b(bytes) 183 | ); 184 | } 185 | } 186 | println!(); 187 | } 188 | 189 | pub fn log_summary>(&self, startdir: P) { 190 | let elapsed = self.start.elapsed(); 191 | info!( 192 | "Processed {} files ({} read) in {:5.5}{}", 193 | self.total.files.to_string().cyan(), 194 | ByteSize::b(self.total.bytes), 195 | d2s(elapsed).to_string().cyan(), 196 | " s".cyan() 197 | ); 198 | if self.detailed { 199 | self.print_details() 200 | } 201 | let dir = p2s(startdir.as_ref()); 202 | if self.softerrors > 0 { 203 | warn!( 204 | "{}: Finished {} with {} soft error(s)", 205 | crate_name!(), 206 | dir, 207 | self.softerrors 208 | ); 209 | } else { 210 | info!("{}: Finished {}", crate_name!(), dir); 211 | } 212 | } 213 | } 214 | 215 | #[cfg(test)] 216 | mod tests { 217 | use super::*; 218 | use crate::tests::assert_eq_vecs; 219 | 220 | fn _msg_read(bytes: u64, ext: &str) -> StatsMsg { 221 | StatsMsg::Scan(File { 222 | scanned: bytes, 223 | ext: ext.into(), 224 | }) 225 | } 226 | 227 | #[test] 228 | fn add_single_item_with_details() { 229 | let mut s = Statistics::new(true, false); 230 | s.process(_msg_read(3498, "jpg")); 231 | assert_eq!(s.total, Pair::new(1, 3498)); 232 | assert_eq!(s.by_ext.len(), 1); 233 | } 234 | 235 | #[test] 236 | fn add_single_item_no_details() { 237 | let mut s = Statistics::new(false, false); 238 | s.process(_msg_read(3498, "jpg")); 239 | assert_eq!(s.by_ext.len(), 0); 240 | } 241 | 242 | #[test] 243 | fn add_softerrors() { 244 | let mut s = Statistics::new(false, false); 245 | s.process(StatsMsg::SoftError); 246 | s.process(StatsMsg::SoftError); 247 | s.process(StatsMsg::SoftError); 248 | assert_eq!(3, s.softerrors); 249 | } 250 | 251 | #[test] 252 | fn account_extensions() { 253 | let mut s = Statistics::new(true, false); 254 | s.process(_msg_read(45, "png")); 255 | s.process(_msg_read(21, "jpg")); 256 | s.process(_msg_read(85, "png")); 257 | assert_eq_vecs( 258 | s.by_ext.iter().collect::>(), 259 | |v| format!("{:?} {} {}", v.0, v.1.files, v.1.bytes), 260 | &["\"png\" 2 130", "\"jpg\" 1 21"], 261 | ); 262 | } 263 | 264 | #[test] 265 | fn map2vec_extensions() { 266 | let mut s = Statistics::new(true, false); 267 | s.process(_msg_read(45, "png")); 268 | s.process(_msg_read(21, "jpg")); 269 | s.process(_msg_read(85, "png")); 270 | assert_eq!( 271 | map2vec(&s.by_ext, 2), 272 | vec![ 273 | (2, 130, OsString::from("png")), 274 | (1, 21, OsString::from("jpg")), 275 | ] 276 | ); 277 | } 278 | 279 | #[test] 280 | fn map2vec_cutoff() { 281 | let mut s = Statistics::new(true, false); 282 | s.process(_msg_read(95, "png")); 283 | s.process(_msg_read(31, "png")); 284 | s.process(_msg_read(21, "jpg")); 285 | s.process(_msg_read(305, "txt")); 286 | assert_eq!(map2vec(&s.by_ext, 1), vec![(2, 126, OsString::from("png"))]); 287 | } 288 | } 289 | -------------------------------------------------------------------------------- /src/storepaths/cache.rs: -------------------------------------------------------------------------------- 1 | //! File-based cache for `DirEntry` structures. 2 | //! 3 | //! The cache persists scan results between `userscan` invocations so that unchanged files don't 4 | //! need to be scanned again. It is currently saved as compressed MessagePack file. 5 | 6 | use super::{Lookup, StorePaths}; 7 | use crate::cachemap::*; 8 | use crate::errors::*; 9 | use crate::output::p2s; 10 | use crate::system::ExecutionContext; 11 | use colored::Colorize; 12 | use ignore::DirEntry; 13 | use std::fs; 14 | use std::os::unix::prelude::*; 15 | use std::path::{Path, PathBuf}; 16 | use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering}; 17 | use std::sync::RwLock; 18 | 19 | #[derive(Debug, Default)] 20 | pub struct Cache { 21 | map: RwLock, 22 | filename: PathBuf, 23 | file: Option, 24 | dirty: AtomicBool, 25 | hits: AtomicUsize, 26 | misses: AtomicUsize, 27 | limit: usize, 28 | } 29 | 30 | impl Cache { 31 | pub fn new(limit: Option) -> Self { 32 | Cache { 33 | limit: limit.unwrap_or(0), 34 | ..Self::default() 35 | } 36 | } 37 | 38 | pub fn open>(mut self, path: P, ctx: &ExecutionContext) -> Result { 39 | self.filename = path.as_ref().to_path_buf(); 40 | info!("Loading cache {}", p2s(&self.filename)); 41 | self.file = ctx.with_dropped_privileges::<_, UErr, _>(|| { 42 | if let Some(p) = path.as_ref().parent() { 43 | fs::create_dir_all(p).map_err(|e| UErr::Create(p.to_owned(), e))?; 44 | } 45 | let mut cachefile = 46 | open_locked(&path).map_err(|e| UErr::LoadCache(self.filename.clone(), e))?; 47 | if cachefile.metadata().map_err(UErr::from)?.len() > 0 { 48 | let map = CacheMap::load(&mut cachefile, &self.filename) 49 | .map_err(|e| UErr::LoadCache(self.filename.clone(), e))?; 50 | debug!("loaded {} entries from cache", map.len()); 51 | self.map = RwLock::new(map); 52 | self.dirty = AtomicBool::new(false); 53 | } else { 54 | debug!("creating new cache {}", p2s(&path)); 55 | self.map.write().expect("tainted lock").clear(); 56 | self.dirty = AtomicBool::new(true); 57 | } 58 | Ok(Some(cachefile)) 59 | })?; 60 | Ok(self) 61 | } 62 | 63 | pub fn commit(&mut self, ctx: &ExecutionContext) -> Result<()> { 64 | if let Some(ref mut file) = self.file { 65 | if !self.dirty.compare_and_swap(true, false, Ordering::SeqCst) { 66 | return Ok(()); 67 | } 68 | ctx.drop_privileges()?; 69 | let mut map = self.map.write().expect("tainted lock"); 70 | map.retain(|_, ref mut v| v.used); 71 | debug!("writing {} entries to cache", map.len()); 72 | map.save(file) 73 | .map_err(|e| UErr::SaveCache(self.filename.clone(), e))?; 74 | ctx.regain_privileges()?; 75 | } 76 | Ok(()) 77 | } 78 | 79 | fn get(&self, dent: &DirEntry) -> Option<(Vec, fs::Metadata)> { 80 | let ino = dent.ino()?; 81 | let mut map = self.map.write().expect("tainted lock"); 82 | let c = map.get_mut(&ino)?; 83 | let meta = dent.metadata().ok()?; 84 | if c.ctime == meta.ctime() && c.ctime_nsec == meta.ctime_nsec() as u8 { 85 | c.used = true; 86 | Some((c.refs.clone(), meta)) 87 | } else { 88 | None 89 | } 90 | } 91 | 92 | pub fn lookup(&self, dent: DirEntry) -> Lookup { 93 | if let Some(ft) = dent.file_type() { 94 | if ft.is_dir() { 95 | return Lookup::Dir(StorePaths { 96 | dent, 97 | refs: vec![], 98 | cached: true, 99 | bytes_scanned: 0, 100 | metadata: None, 101 | }); 102 | } 103 | } 104 | match self.get(&dent) { 105 | Some((refs, metadata)) => { 106 | self.hits.fetch_add(1, Ordering::Relaxed); 107 | Lookup::Hit(StorePaths { 108 | dent, 109 | refs, 110 | cached: true, 111 | bytes_scanned: 0, 112 | metadata: Some(metadata), 113 | }) 114 | } 115 | None => { 116 | self.misses.fetch_add(1, Ordering::Relaxed); 117 | Lookup::Miss(dent) 118 | } 119 | } 120 | } 121 | 122 | pub fn insert(&self, sp: &mut StorePaths) -> Result<()> { 123 | if sp.cached { 124 | return Ok(()); 125 | } 126 | let meta = sp.metadata()?; 127 | let mut map = self.map.write().expect("tainted lock"); 128 | if self.limit > 0 && map.len() >= self.limit { 129 | return Err(UErr::CacheFull(self.limit)); 130 | } 131 | map.insert( 132 | sp.ino()?, 133 | CacheLine::new(meta.ctime(), meta.ctime_nsec() as u8, &sp.refs), 134 | ); 135 | self.dirty.store(true, Ordering::Release); 136 | Ok(()) 137 | } 138 | 139 | /* statistics */ 140 | 141 | pub fn len(&self) -> usize { 142 | self.map.read().expect("tainted lock").len() 143 | } 144 | 145 | pub fn hit_ratio(&self) -> f32 { 146 | let h = self.hits.load(Ordering::SeqCst); 147 | let m = self.misses.load(Ordering::SeqCst); 148 | if h == 0 { 149 | 0.0 150 | } else { 151 | h as f32 / (h as f32 + m as f32) 152 | } 153 | } 154 | 155 | pub fn log_statistics(&self) { 156 | if self.file.is_some() { 157 | info!( 158 | "Cache saved to {}, {} entries, hit ratio {}%", 159 | p2s(&self.filename), 160 | self.len().to_string().cyan(), 161 | ((self.hit_ratio() * 100.0) as u32).to_string().cyan() 162 | ) 163 | } 164 | } 165 | } 166 | 167 | #[cfg(test)] 168 | mod tests { 169 | use super::Lookup::*; 170 | use super::*; 171 | use crate::tests::{dent, FIXTURES}; 172 | use std::fs; 173 | use tempfile::TempDir; 174 | 175 | fn sp_dummy() -> StorePaths { 176 | let dent = tests::dent("dir2/lftp"); 177 | StorePaths { 178 | dent, 179 | refs: vec![PathBuf::from("q3wx1gab2ysnk5nyvyyg56ana2v4r2ar-glibc-2.24")], 180 | cached: false, 181 | bytes_scanned: 0, 182 | metadata: None, 183 | } 184 | } 185 | 186 | fn sp_fixture>(path: P) -> StorePaths { 187 | StorePaths { 188 | dent: tests::dent(path), 189 | refs: vec![], 190 | cached: false, 191 | bytes_scanned: 0, 192 | metadata: None, 193 | } 194 | } 195 | 196 | #[test] 197 | fn insert_cacheline() { 198 | let c = Cache::new(None); 199 | c.insert(&mut sp_fixture("dir1/proto-http.la")) 200 | .expect("insert failed"); 201 | 202 | let dent = tests::dent("dir1/proto-http.la"); 203 | let map = c.map.read().unwrap(); 204 | let entry = map 205 | .get(&dent.ino().unwrap()) 206 | .expect("cache entry not found"); 207 | assert_eq!( 208 | entry.ctime, 209 | fs::metadata("dir1/proto-http.la").unwrap().ctime() 210 | ); 211 | } 212 | 213 | #[test] 214 | fn insert_should_fail_on_limit() { 215 | let c = Cache::new(Some(2)); 216 | c.insert(&mut sp_fixture("dir1/proto-http.la")).expect("ok"); 217 | c.insert(&mut sp_fixture("dir2/lftp")).expect("ok"); 218 | assert!(c.insert(&mut sp_fixture("dir2/lftp.offset")).is_err()); 219 | } 220 | 221 | #[test] 222 | fn lookup_should_miss_on_changed_metadata() { 223 | let c = Cache::new(None); 224 | let ino = tests::dent("dir2/lftp").ino().unwrap(); 225 | c.insert(&mut sp_dummy()).expect("insert failed"); 226 | 227 | match c.lookup(tests::dent("dir2/lftp")) { 228 | Hit(sp) => assert_eq!( 229 | vec![PathBuf::from("q3wx1gab2ysnk5nyvyyg56ana2v4r2ar-glibc-2.24")], 230 | sp.refs 231 | ), 232 | _ => panic!("test failure: did not find dir2/lftp in cache"), 233 | } 234 | 235 | c.map.write().unwrap().get_mut(&ino).unwrap().ctime = 6674; 236 | match c.lookup(tests::dent("dir2/lftp")) { 237 | Miss(_) => (), 238 | _ => panic!("should not hit: dir2/lftp"), 239 | } 240 | } 241 | 242 | #[test] 243 | fn load_save_cache() { 244 | let td = TempDir::new().unwrap(); 245 | let cache_file = td.path().join("cache.mp"); 246 | fs::copy(FIXTURES.join("cache.mp"), &cache_file).unwrap(); 247 | let mut c = Cache::new(None) 248 | .open(&cache_file, &ExecutionContext::new()) 249 | .unwrap(); 250 | assert_eq!(12, c.len()); 251 | assert!(!c.dirty.load(Ordering::SeqCst)); 252 | for ref cl in c.map.read().unwrap().values() { 253 | assert!(!cl.used); 254 | } 255 | 256 | c.insert(&mut sp_dummy()).unwrap(); 257 | assert!(c.dirty.load(Ordering::SeqCst)); 258 | // exactly the newly inserted cacheline should have the "used" flag set 259 | assert_eq!( 260 | 1, 261 | c.map 262 | .read() 263 | .unwrap() 264 | .values() 265 | .filter(|cl| cl.used) 266 | .collect::>() 267 | .len() 268 | ); 269 | 270 | c.commit(&ExecutionContext::new()).unwrap(); 271 | assert_eq!(1, c.len()); 272 | let cache_len = fs::metadata(&cache_file).unwrap().len(); 273 | assert!(cache_len > 60); 274 | } 275 | } 276 | -------------------------------------------------------------------------------- /src/storepaths/mod.rs: -------------------------------------------------------------------------------- 1 | use crate::errors::*; 2 | use ignore::{self, DirEntry}; 3 | use std::fmt; 4 | use std::fs; 5 | use std::path::{Path, PathBuf}; 6 | 7 | mod cache; 8 | pub use self::cache::Cache; 9 | 10 | #[derive(Debug)] 11 | pub struct StorePaths { 12 | dent: DirEntry, 13 | refs: Vec, 14 | cached: bool, 15 | bytes_scanned: u64, 16 | metadata: Option, 17 | } 18 | 19 | impl StorePaths { 20 | pub fn new( 21 | dent: DirEntry, 22 | refs: Vec, 23 | bytes_scanned: u64, 24 | metadata: Option, 25 | ) -> Self { 26 | StorePaths { 27 | dent, 28 | refs, 29 | bytes_scanned, 30 | cached: false, 31 | metadata, 32 | } 33 | } 34 | 35 | #[inline] 36 | pub fn path(&self) -> &Path { 37 | self.dent.path() 38 | } 39 | 40 | #[inline] 41 | pub fn error(&self) -> Option<&ignore::Error> { 42 | self.dent.error() 43 | } 44 | 45 | #[inline] 46 | pub fn ino(&self) -> Result { 47 | self.dent 48 | .ino() 49 | .ok_or_else(|| UErr::DentNoMetadata(self.path().to_owned())) 50 | } 51 | 52 | pub fn metadata(&mut self) -> Result { 53 | match self.metadata { 54 | Some(ref m) => Ok(m.clone()), 55 | None => { 56 | let m = self 57 | .dent 58 | .metadata() 59 | .map_err(|_| UErr::DentNoMetadata(self.path().to_owned()))?; 60 | self.metadata = Some(m.clone()); 61 | Ok(m) 62 | } 63 | } 64 | } 65 | 66 | #[inline] 67 | pub fn is_empty(&self) -> bool { 68 | self.refs.is_empty() 69 | } 70 | 71 | #[inline] 72 | pub fn iter_refs<'a>(&'a self) -> Box + 'a> { 73 | Box::new(self.refs.iter().map(|p| p.as_path())) 74 | } 75 | 76 | #[allow(dead_code)] // only used in tests 77 | pub fn refs(&self) -> &Vec { 78 | &self.refs 79 | } 80 | 81 | #[inline] 82 | pub fn bytes_scanned(&self) -> u64 { 83 | self.bytes_scanned 84 | } 85 | } 86 | 87 | impl fmt::Display for StorePaths { 88 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 89 | if self.refs.is_empty() { 90 | write!(f, "{}", self.dent.path().display()) 91 | } else { 92 | write!(f, "{}:", self.dent.path().display())?; 93 | for r in &self.refs { 94 | write!(f, " {}", r.display())?; 95 | } 96 | Ok(()) 97 | } 98 | } 99 | } 100 | 101 | pub enum Lookup { 102 | Dir(StorePaths), 103 | Hit(StorePaths), 104 | Miss(DirEntry), 105 | } 106 | -------------------------------------------------------------------------------- /src/system.rs: -------------------------------------------------------------------------------- 1 | use nix::unistd::{getegid, geteuid, getgid, getuid, setegid, seteuid, Gid, Uid}; 2 | use std::error::Error; 3 | 4 | #[derive(Debug, Clone)] 5 | pub struct ExecutionContext { 6 | pub is_suid: bool, 7 | pub is_sgid: bool, 8 | pub uid: Uid, 9 | pub euid: Uid, 10 | pub gid: Gid, 11 | pub egid: Gid, 12 | } 13 | 14 | impl Default for ExecutionContext { 15 | fn default() -> Self { 16 | Self::new() 17 | } 18 | } 19 | 20 | impl ExecutionContext { 21 | pub fn new() -> Self { 22 | Self { 23 | is_suid: getuid() != geteuid(), 24 | is_sgid: getgid() != getegid(), 25 | uid: getuid(), 26 | euid: geteuid(), 27 | gid: getgid(), 28 | egid: getegid(), 29 | } 30 | } 31 | 32 | pub fn drop_privileges(&self) -> Result<(), nix::Error> { 33 | debug!("Dropping privileges -> {}/{}", self.uid, self.gid); 34 | if self.is_suid { 35 | seteuid(self.uid)?; 36 | } 37 | if self.is_sgid { 38 | setegid(self.gid)?; 39 | } 40 | Ok(()) 41 | } 42 | 43 | pub fn regain_privileges(&self) -> Result<(), nix::Error> { 44 | debug!("Regaining privileges -> {}/{}", self.euid, self.egid); 45 | if self.is_suid { 46 | seteuid(self.euid)?; 47 | } 48 | if self.is_sgid { 49 | setegid(self.egid)?; 50 | } 51 | Ok(()) 52 | } 53 | 54 | /// Convenience helper which brackets a closure with drop/regain privileges 55 | pub fn with_dropped_privileges(&self, unprivileged: F) -> Result 56 | where 57 | E: Error + From, 58 | F: FnOnce() -> Result, 59 | { 60 | self.drop_privileges()?; 61 | let res = unprivileged(); 62 | self.regain_privileges()?; 63 | res 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /src/tests/mod.rs: -------------------------------------------------------------------------------- 1 | use super::*; 2 | use nix::unistd::chdir; 3 | use std::collections::HashSet; 4 | use std::path::{Path, PathBuf}; 5 | 6 | lazy_static! { 7 | pub static ref FIXTURES: PathBuf = Path::new(env!("CARGO_MANIFEST_DIR")).join("fixtures"); 8 | } 9 | 10 | /// Quick creation of an App instance for testing. 11 | pub fn app>(startdir: P) -> App { 12 | chdir(&*FIXTURES).expect("chdir(fixtures) failed"); 13 | let mut a = App::default(); 14 | a.opt.unzip = vec!["*.zip".into()]; 15 | a.opt.startdir = PathBuf::from(startdir.as_ref()); 16 | a 17 | } 18 | 19 | /// Quick creation of an DirEntry instance for testing. 20 | pub fn dent>(path: P) -> ignore::DirEntry { 21 | app(&path) 22 | .walker() 23 | .unwrap() 24 | .build() 25 | .next() 26 | .unwrap_or_else(|| panic!("didn't find path: {}", path.as_ref().display())) 27 | .unwrap_or_else(|e| panic!("unable to read path: {}", e)) 28 | } 29 | 30 | /// Tests equality of two Vecs with verbose diff reporting. 31 | pub fn assert_eq_vecs(result: Vec, map_res: F, expect: &[&str]) 32 | where 33 | F: for<'a> Fn(&'a R) -> String, 34 | { 35 | let mut expected: HashSet<&str> = expect.into_iter().map(|p| *p).collect(); 36 | let mut unexpected = Vec::new(); 37 | for r in result { 38 | let key = map_res(&r); 39 | if !expected.remove(&*key) { 40 | unexpected.push(key); 41 | } 42 | } 43 | if !unexpected.is_empty() { 44 | panic!("unexpected results: {:?}", unexpected); 45 | } 46 | if !expected.is_empty() { 47 | panic!("missing expected results: {:?}", expected); 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /src/walk.rs: -------------------------------------------------------------------------------- 1 | use crate::errors::UErr; 2 | use crate::output::p2s; 3 | use crate::registry::{GCRootsTx, Register}; 4 | use crate::scan::Scanner; 5 | use crate::statistics::{Statistics, StatsMsg, StatsTx}; 6 | use crate::storepaths::{Cache, Lookup, StorePaths}; 7 | use crate::App; 8 | 9 | use anyhow::{Context, Result}; 10 | use ignore::{self, DirEntry, WalkParallel, WalkState}; 11 | use std::io::{self, ErrorKind}; 12 | use std::os::unix::fs::MetadataExt; 13 | use std::sync::atomic::{AtomicBool, Ordering}; 14 | use std::sync::mpsc::channel; 15 | use std::sync::Arc; 16 | 17 | #[derive(Clone, Debug)] 18 | struct ProcessingContext { 19 | startdev: u64, 20 | cache: Arc, 21 | scanner: Arc, 22 | stats: StatsTx, 23 | gc: GCRootsTx, 24 | abort: Arc, 25 | } 26 | 27 | impl ProcessingContext { 28 | fn create(app: &App, stats: &mut Statistics, gc: GCRootsTx) -> Result { 29 | Ok(Self { 30 | startdev: app.start_meta()?.dev(), 31 | cache: Arc::new(app.cache()?), 32 | scanner: Arc::new(app.scanner()?), 33 | stats: stats.tx(), 34 | gc, 35 | abort: Arc::new(AtomicBool::new(false)), 36 | }) 37 | } 38 | 39 | /// Scans a single DirEntry. 40 | /// 41 | /// The cache is queried first. Results (scanned or cached) are sent to the registry and 42 | /// statistics collector. 43 | fn scan_entry(&self, dent: DirEntry) -> Result { 44 | let mut sp = match self.cache.lookup(dent) { 45 | Lookup::Dir(sp) | Lookup::Hit(sp) => sp, 46 | Lookup::Miss(d) => self.scanner.find_paths(d)?, 47 | }; 48 | if let Some(err) = sp.error() { 49 | if err.is_partial() { 50 | warn!("{}", err); 51 | self.stats.send(StatsMsg::SoftError).unwrap(); 52 | } else { 53 | return Err(err.clone().into()); 54 | } 55 | } 56 | if sp.metadata()?.dev() != self.startdev { 57 | return Ok(WalkState::Skip); 58 | } 59 | self.cache.insert(&mut sp).context(UErr::WalkAbort)?; 60 | self.stats.send(StatsMsg::Scan((&sp).into())).unwrap(); 61 | if !sp.is_empty() { 62 | self.gc.send(sp).unwrap(); 63 | } 64 | Ok(WalkState::Continue) 65 | } 66 | 67 | /// Walks through a directory hierachy and processes each found DirEntry. 68 | fn walk(self, walker: WalkParallel) -> Result> { 69 | walker.run(|| { 70 | let pctx = self.clone(); 71 | Box::new(move |res: Result| { 72 | res.map_err(From::from) 73 | .and_then(|dent| pctx.scan_entry(dent)) 74 | .unwrap_or_else(|err| { 75 | if let Some(UErr::WalkAbort) = err.downcast_ref::() { 76 | error!("Traversal error: {:#}", &err); 77 | pctx.abort.store(true, Ordering::SeqCst); 78 | return WalkState::Quit; 79 | } else if let Some(UErr::FiletypeUnknown) = err.downcast_ref::() { 80 | // ignore & continue 81 | return WalkState::Continue; 82 | } else if let Some(e) = err.downcast_ref::() { 83 | error!("Traversal failure: {:#}", &e); 84 | pctx.abort.store(true, Ordering::SeqCst); 85 | return WalkState::Quit; 86 | } else if let Some(e) = err.downcast_ref::() { 87 | if e.kind() == ErrorKind::PermissionDenied { 88 | error!("I/O error: {:#}", &err); 89 | pctx.abort.store(true, Ordering::SeqCst); 90 | return WalkState::Quit; 91 | } 92 | } 93 | warn!("{:#}", &err); 94 | pctx.stats.send(StatsMsg::SoftError).unwrap(); 95 | WalkState::Continue 96 | }) 97 | }) 98 | }); 99 | if self.abort.load(Ordering::SeqCst) { 100 | Err(UErr::WalkAbort.into()) 101 | } else { 102 | Ok(self.cache) 103 | } 104 | } 105 | } 106 | 107 | /// Creates threads, starts parallel scanning and collects results. 108 | pub fn spawn_threads(app: &App, gcroots: &mut dyn Register) -> Result { 109 | let mut stats = app.statistics(); 110 | let (gc_tx, gc_rx) = channel::(); 111 | let mut cache = crossbeam::scope(|sc| -> Result> { 112 | let pctx = ProcessingContext::create(app, &mut stats, gc_tx)?; 113 | let walker = app.walker()?.build_parallel(); 114 | info!("{}: Scouting {}", crate_name!(), p2s(&app.opt.startdir)); 115 | let walk_hdl = sc.spawn(|_| pctx.walk(walker)); 116 | sc.spawn(|_| stats.receive_loop()); 117 | gcroots.register_loop(gc_rx); 118 | walk_hdl.join().expect("subthread panic") 119 | }) 120 | .expect("thread panic")?; 121 | if app.register { 122 | gcroots.commit(&app.exectx)?; 123 | // don't touch cache if in no-register mode 124 | Arc::get_mut(&mut cache) 125 | .expect("dangling cache references (all threads terminated?)") 126 | .commit(&app.exectx)?; 127 | cache.log_statistics(); 128 | } 129 | stats.log_summary(&app.opt.startdir); 130 | Ok(stats) 131 | } 132 | 133 | #[cfg(test)] 134 | mod tests { 135 | use super::*; 136 | use crate::registry; 137 | use crate::registry::tests::{fake_gc, FakeGCRoots}; 138 | use crate::tests::{app, assert_eq_vecs, FIXTURES}; 139 | 140 | use ignore::WalkBuilder; 141 | use std::fs; 142 | use std::fs::{create_dir, set_permissions, File, Permissions}; 143 | use std::io::Write; 144 | use std::os::unix::fs::{symlink, PermissionsExt}; 145 | use std::path::{Path, PathBuf}; 146 | use std::sync::mpsc::channel; 147 | use tempfile::TempDir; 148 | use users::mock::{MockUsers, User}; 149 | use users::os::unix::UserExt; 150 | 151 | // helper functions 152 | 153 | fn wfile>(path: P, contents: &str) { 154 | let mut file = File::create(path).unwrap(); 155 | file.write_all(contents.as_bytes()).unwrap(); 156 | } 157 | 158 | /// Walks whatever a given WalkBuilder builds and collects path relative to the fixtures dir. 159 | /// Hard errors lead to a panic, partial errors are silently ignored. 160 | pub fn walk2vec(wb: &WalkBuilder, prefix: &Path) -> Vec { 161 | let mut paths = vec![]; 162 | let prefix = prefix.canonicalize().unwrap(); 163 | for r in wb.build() { 164 | if let Ok(dent) = r { 165 | let p = dent.path().strip_prefix(&prefix).unwrap(); 166 | paths.push(p.to_owned()); 167 | } 168 | } 169 | paths.sort(); 170 | paths 171 | } 172 | 173 | struct TestDir { 174 | temp: TempDir, 175 | } 176 | 177 | /// Create and remove directory for running tests. Provides an easy way to execute setup code. 178 | impl TestDir { 179 | fn new(setup: F) -> Self 180 | where 181 | F: FnOnce(&Path), 182 | { 183 | let temp = TempDir::new().unwrap(); 184 | setup(&temp.path()); 185 | Self { temp } 186 | } 187 | 188 | fn path(&self) -> &Path { 189 | self.temp.path() 190 | } 191 | } 192 | 193 | impl Drop for TestDir { 194 | /// Set read/exec bits everywhere -- else TempDir's cleanup might fail 195 | fn drop(&mut self) { 196 | for entry in fs::read_dir(self.temp.path()).unwrap() { 197 | if let Ok(f) = entry { 198 | set_permissions(f.path(), Permissions::from_mode(0o755)).ok(); 199 | } 200 | } 201 | } 202 | } 203 | 204 | #[test] 205 | fn walk_fixture_dir1() { 206 | let mut gcroots = fake_gc(); 207 | let stats = spawn_threads(&app("dir1"), &mut gcroots).unwrap(); 208 | assert_eq_vecs( 209 | gcroots.registered, 210 | |s| s.to_owned(), 211 | &[ 212 | "dir1/duplicated|010yd8jls8w4vcnql4zhjbnyp2yay5pl-bash-4.4-p5", 213 | "dir1/notignored|00n9gkswhqdgbhgs7lnz2ckqxphavjr8-ChasingBottoms-1.3.1.2.drv", 214 | "dir1/notignored|00y6xgsdpjx3fyz4v7k5lwivi28yqd9f-initrd-fsinfo.drv", 215 | "dir1/proto-http.la|9w3ci6fskmz3nw27fb68hybfa5v1r33f-libidn-1.33", 216 | "dir1/proto-http.la|knvydciispmr4nr2rxg0iyyff3n1v4ax-gcc-6.2.0-lib", 217 | "dir1/script.zip|9v78r3afqy9xn9zwdj9wfys6sk3vc01d-coreutils-8.31", 218 | "dir1/six.py|1b4i3gm31j1ipfbx1v9a3hhgmp2wvyyw-python2.7-six-1.9.0", 219 | ], 220 | ); 221 | assert_eq!(stats.softerrors, 0); 222 | } 223 | 224 | #[test] 225 | fn harderror_on_unreadable_file() { 226 | let t = TestDir::new(|p| { 227 | let f = p.join("unreadable_file"); 228 | wfile(&f, "/nix/store/dxscwf37hgq0xafs54h0c8xx47vg6d5g-n"); 229 | set_permissions(&f, Permissions::from_mode(0o000)).unwrap(); 230 | }); 231 | assert!(spawn_threads(&app(t.path()), &mut FakeGCRoots::new(t.path())).is_err()); 232 | } 233 | 234 | #[test] 235 | fn harderror_on_unreadable_dir() { 236 | let t = TestDir::new(|p| { 237 | let d = p.join("unreadable_dir"); 238 | create_dir(&d).unwrap(); 239 | wfile( 240 | &d.join("file3"), 241 | "/nix/store/5hg176hhc19mg8vm2rg3lv2j3vlj166b-m", 242 | ); 243 | set_permissions(&d, Permissions::from_mode(0o111)).unwrap(); 244 | }); 245 | assert!(spawn_threads(&app(t.path()), &mut FakeGCRoots::new(t.path())).is_err()); 246 | } 247 | 248 | #[test] 249 | fn harderror_on_traversable_dir() { 250 | let t = TestDir::new(|p| { 251 | let d = p.join("untraversable_dir"); 252 | create_dir(&d).unwrap(); 253 | set_permissions(&d, Permissions::from_mode(0o000)).unwrap(); 254 | }); 255 | assert!(spawn_threads(&app(t.path()), &mut FakeGCRoots::new(t.path())).is_err()); 256 | } 257 | 258 | #[test] 259 | fn ignore_dangling_link() { 260 | let t = TestDir::new(|p| { 261 | symlink(p.join("no/where"), p.join("symlink")).unwrap(); 262 | }); 263 | let stats = spawn_threads(&app(t.path()), &mut FakeGCRoots::new(t.path())).unwrap(); 264 | assert_eq!(stats.softerrors, 0); 265 | } 266 | 267 | #[test] 268 | fn softfail_on_broken_zip_archive() { 269 | let t = TestDir::new(|p| { 270 | fs::write( 271 | p.join("broken.zip"), 272 | &fs::read(&*FIXTURES.join("dir1/script.zip")).unwrap()[..200], 273 | ) 274 | .unwrap() 275 | }); 276 | let stats = spawn_threads(&app(t.path()), &mut FakeGCRoots::new(t.path())).unwrap(); 277 | assert_eq!(stats.softerrors, 1); 278 | } 279 | 280 | #[test] 281 | fn walk_infiniteloop() { 282 | let t = TempDir::new().unwrap(); 283 | let p = t.path(); 284 | create_dir(p.join("dir1")).unwrap(); 285 | create_dir(p.join("dir2")).unwrap(); 286 | symlink("../dir2/file2", p.join("dir1/file1")).unwrap(); 287 | symlink("../dir1/file1", p.join("dir2/file2")).unwrap(); 288 | symlink(".", p.join("recursive")).unwrap(); 289 | let mut gcroots = registry::tests::FakeGCRoots::new(p); 290 | let stats = spawn_threads(&app(p), &mut gcroots).unwrap(); 291 | assert_eq!(gcroots.registered.len(), 0); 292 | assert_eq!(stats.softerrors, 0); 293 | } 294 | 295 | #[test] 296 | fn should_not_cross_devices() { 297 | let app = app("dir1"); 298 | let (tx, _) = channel::(); 299 | let mut pctx = ProcessingContext::create(&app, &mut app.statistics(), tx).unwrap(); 300 | pctx.startdev = 0; 301 | let dent = app.walker().unwrap().build().next().unwrap().unwrap(); 302 | assert_eq!(WalkState::Skip, pctx.scan_entry(dent).unwrap()); 303 | } 304 | 305 | #[test] 306 | fn walk_should_obey_exclude() { 307 | let mut app = app("."); 308 | app.overrides = vec![ 309 | "!dir1".to_owned(), 310 | "!lftp*".to_owned(), 311 | "!cache*".to_owned(), 312 | ]; 313 | assert_eq!( 314 | vec![ 315 | "", 316 | "dir2", 317 | "dir2/ignored", 318 | "dir2/link", 319 | "miniegg-1-py3.5.egg", 320 | ] 321 | .into_iter() 322 | .map(PathBuf::from) 323 | .collect::>(), 324 | walk2vec(&app.walker().unwrap(), &*FIXTURES) 325 | ); 326 | } 327 | 328 | #[test] 329 | fn walk_should_obey_excludefile() { 330 | let t = TempDir::new().unwrap(); 331 | let p = t.path(); 332 | 333 | let mut users = MockUsers::with_current_uid(100); 334 | users.add_user(User::new(100, "johndoe", 100).with_home_dir(&*p.to_string_lossy())); 335 | let app = app(p); 336 | 337 | wfile(p.join(".userscan-ignore"), "file2\n*.jpg\ndata*\n"); 338 | for f in vec!["file1", "file2", "pic.jpg", "data.json"] { 339 | File::create(p.join(f)).unwrap(); 340 | } 341 | 342 | let walker = app 343 | .walker() 344 | .and_then(|wb| crate::add_dotexclude(wb, &users)) 345 | .unwrap(); 346 | assert_eq!( 347 | vec!["", ".userscan-ignore", "file1"] 348 | .into_iter() 349 | .map(PathBuf::from) 350 | .collect::>(), 351 | walk2vec(&walker, p) 352 | ); 353 | } 354 | } 355 | -------------------------------------------------------------------------------- /userscan.1.rst: -------------------------------------------------------------------------------- 1 | :title: fc-userscan 2 | :subtitle: Scans directories and registers for Nix store references 3 | :manual_section: 1 4 | :copyright: Flying Circus Internet Operations GmbH and contributors 5 | :author: Christian Kauhaus 6 | :version: @version@ 7 | 8 | SYNOPSIS 9 | ======== 10 | 11 | **fc-userscan** [*OPTIONS*] *STARTDIR* 12 | 13 | 14 | DESCRIPTION 15 | =========== 16 | 17 | Scans directories recursively for files containing references to the Nix store 18 | (i.e., manually compiled programs). Found references are registered as Nix 19 | garbage collector roots to protect referenced derivations from garbage 20 | collection. 21 | 22 | 23 | OPTIONS 24 | ======= 25 | 26 | **--cache**, **-c** *FILE* 27 | Preserves scan results between runs to avoid re-scanning unchanged files. 28 | For each file, the ctime inode attribute is used to decide whether it has 29 | been changed or not. 30 | 31 | **--color**, **-C** [ **always** | **never** | **auto** ] 32 | Turns on funky colorful output. If set to **auto**, color is on only if run 33 | in a terminal. 34 | 35 | **--debug**, **-d** 36 | Shows every file opened and lots of other stuff. Implies **--verbose**. 37 | 38 | **--exclude**, **-e** *GLOB* 39 | Don't scan files matching *GLOB*. Note that matching directories are 40 | completely left out so that contained files skipped even when they are 41 | matched by a subsequent **--include** option. Exclude globs should be given 42 | in gitignore(5) format. This option may be given multiple times. 43 | 44 | **--exclude-from**, **-E** *FILE* 45 | Like **--exclude**, but reads globs from *FILE*. Note that globs can be 46 | inverted (i.e., explicit include) by prefixing them with an exclamation mark 47 | (!). The format is further described in gitignore(5). 48 | 49 | **--help**, **-h** 50 | Prints verbose or brief options overview. 51 | 52 | **--include**, **-i** 53 | Scans only files matching *GLOB*. Handy for restricting scans to a few types 54 | of files or to override broader exclude globs given before. 55 | 56 | **--cache-limit**, **-L** 57 | Limits cache capacity to N inodes. fc-userscan will abort when trying to 58 | save more than N entries in the cache. This will effectively cap memory and 59 | disk usage. 60 | 61 | **--list**, **-l** 62 | Only prints found store references while scanning, but does not register 63 | them. Can be used in conjunctions with **--register**. 64 | 65 | **--oneline**, **-1** 66 | When in list mode, each file is printed together with its references on the 67 | same line. Automatic post-processing may be easier using this format. 68 | If not given, the file and its references are printed on separate line. 69 | 70 | **--pause-load**, **-p** *L* 71 | Pauses scanning if the current load1 goes over load15+L. The baseline is 72 | determined at program startup. If there are multiple CPUs present, the 73 | increase is granted per CPU. Use **0.0** to disable. 74 | 75 | **--quickcheck**, **-q** *SIZE* 76 | Improves performance for large files: if no single Nix store reference is 77 | present in the first *SIZE* kilobytes of a file, the rest of the file is 78 | skipped. There are usually store references present somewhere near the start 79 | of a file if a file contains references at all. 80 | 81 | **--register**, **-r** 82 | Registers GC roots for newly found references and removes GC roots for 83 | references that are no longer valid. This is the default unless **--list** 84 | is given. 85 | 86 | **--statistics**, **--stats**, **-S** 87 | Prints scanned files and read bytes per file type at the end of the run. 88 | This may help to fine-tune exclude lists. 89 | 90 | **--unzip**, **-z** *GLOB[,GLOB...]* 91 | Unpacks files matching GLOB as ZIP archives and scans all contained 92 | files. Accepts a comma-separated list of glob patterns. 93 | 94 | **--verbose** | **-v** 95 | Generally increases output level. Prints newly created/deleted GC roots, 96 | processed totals, and cache hit rate for example. 97 | 98 | **--version** | **-V** 99 | Prints program version and exits. 100 | 101 | 102 | EXIT STATUS 103 | =========== 104 | 105 | **2** if the program has been terminated due to hard errors like filures to 106 | create GC store references or problems while reading a cache file. 107 | 108 | **1** if there were less critical problems like I/O errors while reading 109 | individual files or insufficient permissions. 110 | 111 | **0** if no problems were encountered. 112 | 113 | 114 | FILES 115 | ===== 116 | 117 | ~/.userscan-ignore 118 | Additional exclude/include globs are read from this file if it exists. The 119 | format of this file is the same as in gitignore(5). 120 | 121 | /nix/var/nix/gcroots/profiles/per-user 122 | Garbage collection roots generated by fc-userscan are created in 123 | subdirectories of the per-user GC dir. For example, if fc-userscan is run by 124 | user joe, Nix store references found in **/lib/rc** are registered in 125 | **/nix/var/nix/gcroots/profiles/per-user/joe/lib/rc**. 126 | 127 | /nix/store 128 | While scanning for Nix store references, the standard Nix store prefix is 129 | hard-coded for performance reasons. Alternative Nix store locations are not 130 | supported. 131 | 132 | 133 | NOTES 134 | ===== 135 | 136 | Partial reading of cache files is not supported. This means that once a cache 137 | file has been truncated or otherwise damaged, fc-userscan will bail out while 138 | trying to read the cache file. Delete the cache file in this case. It will be 139 | created automatcally on the next run. 140 | 141 | 142 | BUGS 143 | ==== 144 | 145 | Unpacking of large ZIP archives or those containing very many files is unduly 146 | slow. Try to avoid **-z** if possible. 147 | 148 | 149 | EXAMPLES 150 | ======== 151 | 152 | Scan files and register references starting at the current directory. Give a 153 | high-level summary and a break-down per extension: 154 | 155 | **fc-userscan -v -S .** 156 | 157 | List found references, but don't register them: 158 | 159 | **fc-userscan -l .** 160 | 161 | Scan home dir, using a cache and an exclude file: 162 | 163 | **fc-userscan -c ~/.cache/userscan -E /etc/userscan/exclude ~** 164 | 165 | 166 | SEE ALSO 167 | ======== 168 | 169 | nix-collect-garbage(1), nix-store(1), gitignore(5) 170 | --------------------------------------------------------------------------------