├── .gitignore ├── Cargo.lock ├── Cargo.toml ├── LICENSE-APACHE ├── LICENSE-GPL-3.0 ├── LICENSE-MIT ├── README.md ├── clippy.toml ├── dwarfs-enc ├── Cargo.toml ├── LICENSE-GPL-3.0 ├── README.md ├── examples │ └── mkdwarfs.rs ├── src │ ├── chunker.rs │ ├── error.rs │ ├── lib.rs │ ├── metadata.rs │ ├── ordered_parallel.rs │ └── section.rs └── tests │ └── basic.rs ├── dwarfs-test ├── Cargo.toml ├── LICENSE-APACHE ├── LICENSE-MIT ├── src │ ├── lib.rs │ ├── main.rs │ ├── mtree.rs │ └── traverse.rs └── tests │ ├── basic.rs │ └── large.rs ├── dwarfs ├── CHANGELOG.md ├── Cargo.toml ├── LICENSE-APACHE ├── LICENSE-MIT ├── README.md └── src │ ├── archive.rs │ ├── fsst.rs │ ├── lib.rs │ ├── metadata.rs │ ├── metadata │ ├── de_frozen.rs │ ├── de_thrift.rs │ ├── ser_frozen.rs │ ├── ser_thrift.rs │ └── tests.rs │ └── section.rs ├── flake.lock ├── flake.nix └── typos.toml /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | result 3 | result-* 4 | *.dwarfs 5 | 6 | perf*.data* 7 | flamegraph*.svg 8 | -------------------------------------------------------------------------------- /Cargo.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Cargo. 2 | # It is not intended for manual editing. 3 | version = 4 4 | 5 | [[package]] 6 | name = "aho-corasick" 7 | version = "1.1.3" 8 | source = "registry+https://github.com/rust-lang/crates.io-index" 9 | checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" 10 | dependencies = [ 11 | "memchr", 12 | ] 13 | 14 | [[package]] 15 | name = "allocator-api2" 16 | version = "0.2.21" 17 | source = "registry+https://github.com/rust-lang/crates.io-index" 18 | checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" 19 | 20 | [[package]] 21 | name = "anstream" 22 | version = "0.6.18" 23 | source = "registry+https://github.com/rust-lang/crates.io-index" 24 | checksum = "8acc5369981196006228e28809f761875c0327210a891e941f4c683b3a99529b" 25 | dependencies = [ 26 | "anstyle", 27 | "anstyle-parse", 28 | "anstyle-query", 29 | "anstyle-wincon", 30 | "colorchoice", 31 | "is_terminal_polyfill", 32 | "utf8parse", 33 | ] 34 | 35 | [[package]] 36 | name = "anstyle" 37 | version = "1.0.10" 38 | source = "registry+https://github.com/rust-lang/crates.io-index" 39 | checksum = "55cc3b69f167a1ef2e161439aa98aed94e6028e5f9a59be9a6ffb47aef1651f9" 40 | 41 | [[package]] 42 | name = "anstyle-parse" 43 | version = "0.2.6" 44 | source = "registry+https://github.com/rust-lang/crates.io-index" 45 | checksum = "3b2d16507662817a6a20a9ea92df6652ee4f94f914589377d69f3b21bc5798a9" 46 | dependencies = [ 47 | "utf8parse", 48 | ] 49 | 50 | [[package]] 51 | name = "anstyle-query" 52 | version = "1.1.2" 53 | source = "registry+https://github.com/rust-lang/crates.io-index" 54 | checksum = "79947af37f4177cfead1110013d678905c37501914fba0efea834c3fe9a8d60c" 55 | dependencies = [ 56 | "windows-sys", 57 | ] 58 | 59 | [[package]] 60 | name = "anstyle-wincon" 61 | version = "3.0.7" 62 | source = "registry+https://github.com/rust-lang/crates.io-index" 63 | checksum = "ca3534e77181a9cc07539ad51f2141fe32f6c3ffd4df76db8ad92346b003ae4e" 64 | dependencies = [ 65 | "anstyle", 66 | "once_cell", 67 | "windows-sys", 68 | ] 69 | 70 | [[package]] 71 | name = "bitflags" 72 | version = "2.9.0" 73 | source = "registry+https://github.com/rust-lang/crates.io-index" 74 | checksum = "5c8214115b7bf84099f1309324e63141d4c5d7cc26862f97a0a857dbefe165bd" 75 | 76 | [[package]] 77 | name = "block-buffer" 78 | version = "0.10.4" 79 | source = "registry+https://github.com/rust-lang/crates.io-index" 80 | checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" 81 | dependencies = [ 82 | "generic-array", 83 | ] 84 | 85 | [[package]] 86 | name = "bstr" 87 | version = "1.12.0" 88 | source = "registry+https://github.com/rust-lang/crates.io-index" 89 | checksum = "234113d19d0d7d613b40e86fb654acf958910802bcceab913a4f9e7cda03b1a4" 90 | dependencies = [ 91 | "memchr", 92 | "regex-automata", 93 | "serde", 94 | ] 95 | 96 | [[package]] 97 | name = "bumpalo" 98 | version = "3.18.1" 99 | source = "registry+https://github.com/rust-lang/crates.io-index" 100 | checksum = "793db76d6187cd04dff33004d8e6c9cc4e05cd330500379d2394209271b4aeee" 101 | 102 | [[package]] 103 | name = "cc" 104 | version = "1.2.22" 105 | source = "registry+https://github.com/rust-lang/crates.io-index" 106 | checksum = "32db95edf998450acc7881c932f94cd9b05c87b4b2599e8bab064753da4acfd1" 107 | dependencies = [ 108 | "jobserver", 109 | "libc", 110 | "shlex", 111 | ] 112 | 113 | [[package]] 114 | name = "cfg-if" 115 | version = "1.0.0" 116 | source = "registry+https://github.com/rust-lang/crates.io-index" 117 | checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" 118 | 119 | [[package]] 120 | name = "clap" 121 | version = "4.5.39" 122 | source = "registry+https://github.com/rust-lang/crates.io-index" 123 | checksum = "fd60e63e9be68e5fb56422e397cf9baddded06dae1d2e523401542383bc72a9f" 124 | dependencies = [ 125 | "clap_builder", 126 | "clap_derive", 127 | ] 128 | 129 | [[package]] 130 | name = "clap_builder" 131 | version = "4.5.39" 132 | source = "registry+https://github.com/rust-lang/crates.io-index" 133 | checksum = "89cc6392a1f72bbeb820d71f32108f61fdaf18bc526e1d23954168a67759ef51" 134 | dependencies = [ 135 | "anstream", 136 | "anstyle", 137 | "clap_lex", 138 | "strsim", 139 | ] 140 | 141 | [[package]] 142 | name = "clap_derive" 143 | version = "4.5.32" 144 | source = "registry+https://github.com/rust-lang/crates.io-index" 145 | checksum = "09176aae279615badda0765c0c0b3f6ed53f4709118af73cf4655d85d1530cd7" 146 | dependencies = [ 147 | "heck", 148 | "proc-macro2", 149 | "quote", 150 | "syn", 151 | ] 152 | 153 | [[package]] 154 | name = "clap_lex" 155 | version = "0.7.4" 156 | source = "registry+https://github.com/rust-lang/crates.io-index" 157 | checksum = "f46ad14479a25103f283c0f10005961cf086d8dc42205bb44c46ac563475dca6" 158 | 159 | [[package]] 160 | name = "colorchoice" 161 | version = "1.0.3" 162 | source = "registry+https://github.com/rust-lang/crates.io-index" 163 | checksum = "5b63caa9aa9397e2d9480a9b13673856c78d8ac123288526c37d7839f2a86990" 164 | 165 | [[package]] 166 | name = "console" 167 | version = "0.15.11" 168 | source = "registry+https://github.com/rust-lang/crates.io-index" 169 | checksum = "054ccb5b10f9f2cbf51eb355ca1d05c2d279ce1804688d0db74b4733a5aeafd8" 170 | dependencies = [ 171 | "encode_unicode", 172 | "libc", 173 | "once_cell", 174 | "unicode-width", 175 | "windows-sys", 176 | ] 177 | 178 | [[package]] 179 | name = "cpufeatures" 180 | version = "0.2.17" 181 | source = "registry+https://github.com/rust-lang/crates.io-index" 182 | checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280" 183 | dependencies = [ 184 | "libc", 185 | ] 186 | 187 | [[package]] 188 | name = "crossbeam-channel" 189 | version = "0.5.15" 190 | source = "registry+https://github.com/rust-lang/crates.io-index" 191 | checksum = "82b8f8f868b36967f9606790d1903570de9ceaf870a7bf9fbbd3016d636a2cb2" 192 | dependencies = [ 193 | "crossbeam-utils", 194 | ] 195 | 196 | [[package]] 197 | name = "crossbeam-utils" 198 | version = "0.8.21" 199 | source = "registry+https://github.com/rust-lang/crates.io-index" 200 | checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" 201 | 202 | [[package]] 203 | name = "crypto-common" 204 | version = "0.1.6" 205 | source = "registry+https://github.com/rust-lang/crates.io-index" 206 | checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" 207 | dependencies = [ 208 | "generic-array", 209 | "typenum", 210 | ] 211 | 212 | [[package]] 213 | name = "digest" 214 | version = "0.10.7" 215 | source = "registry+https://github.com/rust-lang/crates.io-index" 216 | checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" 217 | dependencies = [ 218 | "block-buffer", 219 | "crypto-common", 220 | ] 221 | 222 | [[package]] 223 | name = "dwarfs" 224 | version = "0.2.1" 225 | dependencies = [ 226 | "bstr", 227 | "indexmap", 228 | "liblzma", 229 | "log", 230 | "lru", 231 | "lz4", 232 | "measure_time", 233 | "positioned-io", 234 | "serde", 235 | "sha2", 236 | "xxhash-rust", 237 | "zerocopy", 238 | "zstd-safe", 239 | ] 240 | 241 | [[package]] 242 | name = "dwarfs-enc" 243 | version = "0.1.0" 244 | dependencies = [ 245 | "clap", 246 | "crossbeam-channel", 247 | "dwarfs", 248 | "indexmap", 249 | "indicatif", 250 | "liblzma", 251 | "rustic_cdc", 252 | "rustix", 253 | "serde", 254 | "sha2", 255 | "zerocopy", 256 | "zstd-safe", 257 | ] 258 | 259 | [[package]] 260 | name = "dwarfs-test" 261 | version = "0.0.0" 262 | dependencies = [ 263 | "dwarfs", 264 | "env_logger", 265 | "hex", 266 | "rustix", 267 | "sha2", 268 | "tempfile", 269 | "xshell", 270 | ] 271 | 272 | [[package]] 273 | name = "encode_unicode" 274 | version = "1.0.0" 275 | source = "registry+https://github.com/rust-lang/crates.io-index" 276 | checksum = "34aa73646ffb006b8f5147f3dc182bd4bcb190227ce861fc4a4844bf8e3cb2c0" 277 | 278 | [[package]] 279 | name = "env_filter" 280 | version = "0.1.3" 281 | source = "registry+https://github.com/rust-lang/crates.io-index" 282 | checksum = "186e05a59d4c50738528153b83b0b0194d3a29507dfec16eccd4b342903397d0" 283 | dependencies = [ 284 | "log", 285 | "regex", 286 | ] 287 | 288 | [[package]] 289 | name = "env_logger" 290 | version = "0.11.8" 291 | source = "registry+https://github.com/rust-lang/crates.io-index" 292 | checksum = "13c863f0904021b108aa8b2f55046443e6b1ebde8fd4a15c399893aae4fa069f" 293 | dependencies = [ 294 | "anstream", 295 | "anstyle", 296 | "env_filter", 297 | "jiff", 298 | "log", 299 | ] 300 | 301 | [[package]] 302 | name = "equivalent" 303 | version = "1.0.2" 304 | source = "registry+https://github.com/rust-lang/crates.io-index" 305 | checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" 306 | 307 | [[package]] 308 | name = "errno" 309 | version = "0.3.12" 310 | source = "registry+https://github.com/rust-lang/crates.io-index" 311 | checksum = "cea14ef9355e3beab063703aa9dab15afd25f0667c341310c1e5274bb1d0da18" 312 | dependencies = [ 313 | "libc", 314 | "windows-sys", 315 | ] 316 | 317 | [[package]] 318 | name = "fastrand" 319 | version = "2.3.0" 320 | source = "registry+https://github.com/rust-lang/crates.io-index" 321 | checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" 322 | 323 | [[package]] 324 | name = "foldhash" 325 | version = "0.1.5" 326 | source = "registry+https://github.com/rust-lang/crates.io-index" 327 | checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" 328 | 329 | [[package]] 330 | name = "generic-array" 331 | version = "0.14.7" 332 | source = "registry+https://github.com/rust-lang/crates.io-index" 333 | checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" 334 | dependencies = [ 335 | "typenum", 336 | "version_check", 337 | ] 338 | 339 | [[package]] 340 | name = "getrandom" 341 | version = "0.3.3" 342 | source = "registry+https://github.com/rust-lang/crates.io-index" 343 | checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4" 344 | dependencies = [ 345 | "cfg-if", 346 | "libc", 347 | "r-efi", 348 | "wasi", 349 | ] 350 | 351 | [[package]] 352 | name = "hashbrown" 353 | version = "0.15.3" 354 | source = "registry+https://github.com/rust-lang/crates.io-index" 355 | checksum = "84b26c544d002229e640969970a2e74021aadf6e2f96372b9c58eff97de08eb3" 356 | dependencies = [ 357 | "allocator-api2", 358 | "equivalent", 359 | "foldhash", 360 | ] 361 | 362 | [[package]] 363 | name = "heck" 364 | version = "0.5.0" 365 | source = "registry+https://github.com/rust-lang/crates.io-index" 366 | checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" 367 | 368 | [[package]] 369 | name = "hex" 370 | version = "0.4.3" 371 | source = "registry+https://github.com/rust-lang/crates.io-index" 372 | checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" 373 | 374 | [[package]] 375 | name = "indexmap" 376 | version = "2.9.0" 377 | source = "registry+https://github.com/rust-lang/crates.io-index" 378 | checksum = "cea70ddb795996207ad57735b50c5982d8844f38ba9ee5f1aedcfb708a2aa11e" 379 | dependencies = [ 380 | "equivalent", 381 | "hashbrown", 382 | ] 383 | 384 | [[package]] 385 | name = "indicatif" 386 | version = "0.17.11" 387 | source = "registry+https://github.com/rust-lang/crates.io-index" 388 | checksum = "183b3088984b400f4cfac3620d5e076c84da5364016b4f49473de574b2586235" 389 | dependencies = [ 390 | "console", 391 | "number_prefix", 392 | "portable-atomic", 393 | "unicode-width", 394 | "web-time", 395 | ] 396 | 397 | [[package]] 398 | name = "is_terminal_polyfill" 399 | version = "1.70.1" 400 | source = "registry+https://github.com/rust-lang/crates.io-index" 401 | checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" 402 | 403 | [[package]] 404 | name = "jiff" 405 | version = "0.2.13" 406 | source = "registry+https://github.com/rust-lang/crates.io-index" 407 | checksum = "f02000660d30638906021176af16b17498bd0d12813dbfe7b276d8bc7f3c0806" 408 | dependencies = [ 409 | "jiff-static", 410 | "log", 411 | "portable-atomic", 412 | "portable-atomic-util", 413 | "serde", 414 | ] 415 | 416 | [[package]] 417 | name = "jiff-static" 418 | version = "0.2.13" 419 | source = "registry+https://github.com/rust-lang/crates.io-index" 420 | checksum = "f3c30758ddd7188629c6713fc45d1188af4f44c90582311d0c8d8c9907f60c48" 421 | dependencies = [ 422 | "proc-macro2", 423 | "quote", 424 | "syn", 425 | ] 426 | 427 | [[package]] 428 | name = "jobserver" 429 | version = "0.1.33" 430 | source = "registry+https://github.com/rust-lang/crates.io-index" 431 | checksum = "38f262f097c174adebe41eb73d66ae9c06b2844fb0da69969647bbddd9b0538a" 432 | dependencies = [ 433 | "getrandom", 434 | "libc", 435 | ] 436 | 437 | [[package]] 438 | name = "js-sys" 439 | version = "0.3.77" 440 | source = "registry+https://github.com/rust-lang/crates.io-index" 441 | checksum = "1cfaf33c695fc6e08064efbc1f72ec937429614f25eef83af942d0e227c3a28f" 442 | dependencies = [ 443 | "once_cell", 444 | "wasm-bindgen", 445 | ] 446 | 447 | [[package]] 448 | name = "libc" 449 | version = "0.2.172" 450 | source = "registry+https://github.com/rust-lang/crates.io-index" 451 | checksum = "d750af042f7ef4f724306de029d18836c26c1765a54a6a3f094cbd23a7267ffa" 452 | 453 | [[package]] 454 | name = "liblzma" 455 | version = "0.4.1" 456 | source = "registry+https://github.com/rust-lang/crates.io-index" 457 | checksum = "66352d7a8ac12d4877b6e6ea5a9b7650ee094257dc40889955bea5bc5b08c1d0" 458 | dependencies = [ 459 | "liblzma-sys", 460 | ] 461 | 462 | [[package]] 463 | name = "liblzma-sys" 464 | version = "0.4.3" 465 | source = "registry+https://github.com/rust-lang/crates.io-index" 466 | checksum = "5839bad90c3cc2e0b8c4ed8296b80e86040240f81d46b9c0e9bc8dd51ddd3af1" 467 | dependencies = [ 468 | "cc", 469 | "libc", 470 | "pkg-config", 471 | ] 472 | 473 | [[package]] 474 | name = "linux-raw-sys" 475 | version = "0.9.4" 476 | source = "registry+https://github.com/rust-lang/crates.io-index" 477 | checksum = "cd945864f07fe9f5371a27ad7b52a172b4b499999f1d97574c9fa68373937e12" 478 | 479 | [[package]] 480 | name = "log" 481 | version = "0.4.27" 482 | source = "registry+https://github.com/rust-lang/crates.io-index" 483 | checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94" 484 | 485 | [[package]] 486 | name = "lru" 487 | version = "0.14.0" 488 | source = "registry+https://github.com/rust-lang/crates.io-index" 489 | checksum = "9f8cc7106155f10bdf99a6f379688f543ad6596a415375b36a59a054ceda1198" 490 | dependencies = [ 491 | "hashbrown", 492 | ] 493 | 494 | [[package]] 495 | name = "lz4" 496 | version = "1.28.1" 497 | source = "registry+https://github.com/rust-lang/crates.io-index" 498 | checksum = "a20b523e860d03443e98350ceaac5e71c6ba89aea7d960769ec3ce37f4de5af4" 499 | dependencies = [ 500 | "lz4-sys", 501 | ] 502 | 503 | [[package]] 504 | name = "lz4-sys" 505 | version = "1.11.1+lz4-1.10.0" 506 | source = "registry+https://github.com/rust-lang/crates.io-index" 507 | checksum = "6bd8c0d6c6ed0cd30b3652886bb8711dc4bb01d637a68105a3d5158039b418e6" 508 | dependencies = [ 509 | "cc", 510 | "libc", 511 | ] 512 | 513 | [[package]] 514 | name = "measure_time" 515 | version = "0.9.0" 516 | source = "registry+https://github.com/rust-lang/crates.io-index" 517 | checksum = "51c55d61e72fc3ab704396c5fa16f4c184db37978ae4e94ca8959693a235fc0e" 518 | dependencies = [ 519 | "log", 520 | ] 521 | 522 | [[package]] 523 | name = "memchr" 524 | version = "2.7.4" 525 | source = "registry+https://github.com/rust-lang/crates.io-index" 526 | checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" 527 | 528 | [[package]] 529 | name = "number_prefix" 530 | version = "0.4.0" 531 | source = "registry+https://github.com/rust-lang/crates.io-index" 532 | checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3" 533 | 534 | [[package]] 535 | name = "once_cell" 536 | version = "1.21.3" 537 | source = "registry+https://github.com/rust-lang/crates.io-index" 538 | checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" 539 | 540 | [[package]] 541 | name = "pkg-config" 542 | version = "0.3.32" 543 | source = "registry+https://github.com/rust-lang/crates.io-index" 544 | checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" 545 | 546 | [[package]] 547 | name = "portable-atomic" 548 | version = "1.11.0" 549 | source = "registry+https://github.com/rust-lang/crates.io-index" 550 | checksum = "350e9b48cbc6b0e028b0473b114454c6316e57336ee184ceab6e53f72c178b3e" 551 | 552 | [[package]] 553 | name = "portable-atomic-util" 554 | version = "0.2.4" 555 | source = "registry+https://github.com/rust-lang/crates.io-index" 556 | checksum = "d8a2f0d8d040d7848a709caf78912debcc3f33ee4b3cac47d73d1e1069e83507" 557 | dependencies = [ 558 | "portable-atomic", 559 | ] 560 | 561 | [[package]] 562 | name = "positioned-io" 563 | version = "0.3.4" 564 | source = "registry+https://github.com/rust-lang/crates.io-index" 565 | checksum = "e8078ce4d22da5e8f57324d985cc9befe40c49ab0507a192d6be9e59584495c9" 566 | dependencies = [ 567 | "libc", 568 | "winapi", 569 | ] 570 | 571 | [[package]] 572 | name = "proc-macro2" 573 | version = "1.0.95" 574 | source = "registry+https://github.com/rust-lang/crates.io-index" 575 | checksum = "02b3e5e68a3a1a02aad3ec490a98007cbc13c37cbe84a3cd7b8e406d76e7f778" 576 | dependencies = [ 577 | "unicode-ident", 578 | ] 579 | 580 | [[package]] 581 | name = "quote" 582 | version = "1.0.40" 583 | source = "registry+https://github.com/rust-lang/crates.io-index" 584 | checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d" 585 | dependencies = [ 586 | "proc-macro2", 587 | ] 588 | 589 | [[package]] 590 | name = "r-efi" 591 | version = "5.2.0" 592 | source = "registry+https://github.com/rust-lang/crates.io-index" 593 | checksum = "74765f6d916ee2faa39bc8e68e4f3ed8949b48cccdac59983d287a7cb71ce9c5" 594 | 595 | [[package]] 596 | name = "regex" 597 | version = "1.11.1" 598 | source = "registry+https://github.com/rust-lang/crates.io-index" 599 | checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191" 600 | dependencies = [ 601 | "aho-corasick", 602 | "memchr", 603 | "regex-automata", 604 | "regex-syntax", 605 | ] 606 | 607 | [[package]] 608 | name = "regex-automata" 609 | version = "0.4.9" 610 | source = "registry+https://github.com/rust-lang/crates.io-index" 611 | checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908" 612 | dependencies = [ 613 | "aho-corasick", 614 | "memchr", 615 | "regex-syntax", 616 | ] 617 | 618 | [[package]] 619 | name = "regex-syntax" 620 | version = "0.8.5" 621 | source = "registry+https://github.com/rust-lang/crates.io-index" 622 | checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" 623 | 624 | [[package]] 625 | name = "rustic_cdc" 626 | version = "0.3.1" 627 | source = "registry+https://github.com/rust-lang/crates.io-index" 628 | checksum = "fbcebf2228827bc4b61cb54dfd84cf43aacf06ca2dfe4c014b136a0e32b876e2" 629 | 630 | [[package]] 631 | name = "rustix" 632 | version = "1.0.7" 633 | source = "registry+https://github.com/rust-lang/crates.io-index" 634 | checksum = "c71e83d6afe7ff64890ec6b71d6a69bb8a610ab78ce364b3352876bb4c801266" 635 | dependencies = [ 636 | "bitflags", 637 | "errno", 638 | "libc", 639 | "linux-raw-sys", 640 | "windows-sys", 641 | ] 642 | 643 | [[package]] 644 | name = "serde" 645 | version = "1.0.219" 646 | source = "registry+https://github.com/rust-lang/crates.io-index" 647 | checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6" 648 | dependencies = [ 649 | "serde_derive", 650 | ] 651 | 652 | [[package]] 653 | name = "serde_derive" 654 | version = "1.0.219" 655 | source = "registry+https://github.com/rust-lang/crates.io-index" 656 | checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00" 657 | dependencies = [ 658 | "proc-macro2", 659 | "quote", 660 | "syn", 661 | ] 662 | 663 | [[package]] 664 | name = "sha2" 665 | version = "0.10.9" 666 | source = "registry+https://github.com/rust-lang/crates.io-index" 667 | checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283" 668 | dependencies = [ 669 | "cfg-if", 670 | "cpufeatures", 671 | "digest", 672 | ] 673 | 674 | [[package]] 675 | name = "shlex" 676 | version = "1.3.0" 677 | source = "registry+https://github.com/rust-lang/crates.io-index" 678 | checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" 679 | 680 | [[package]] 681 | name = "strsim" 682 | version = "0.11.1" 683 | source = "registry+https://github.com/rust-lang/crates.io-index" 684 | checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" 685 | 686 | [[package]] 687 | name = "syn" 688 | version = "2.0.101" 689 | source = "registry+https://github.com/rust-lang/crates.io-index" 690 | checksum = "8ce2b7fc941b3a24138a0a7cf8e858bfc6a992e7978a068a5c760deb0ed43caf" 691 | dependencies = [ 692 | "proc-macro2", 693 | "quote", 694 | "unicode-ident", 695 | ] 696 | 697 | [[package]] 698 | name = "tempfile" 699 | version = "3.20.0" 700 | source = "registry+https://github.com/rust-lang/crates.io-index" 701 | checksum = "e8a64e3985349f2441a1a9ef0b853f869006c3855f2cda6862a94d26ebb9d6a1" 702 | dependencies = [ 703 | "fastrand", 704 | "getrandom", 705 | "once_cell", 706 | "rustix", 707 | "windows-sys", 708 | ] 709 | 710 | [[package]] 711 | name = "typenum" 712 | version = "1.18.0" 713 | source = "registry+https://github.com/rust-lang/crates.io-index" 714 | checksum = "1dccffe3ce07af9386bfd29e80c0ab1a8205a2fc34e4bcd40364df902cfa8f3f" 715 | 716 | [[package]] 717 | name = "unicode-ident" 718 | version = "1.0.18" 719 | source = "registry+https://github.com/rust-lang/crates.io-index" 720 | checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" 721 | 722 | [[package]] 723 | name = "unicode-width" 724 | version = "0.2.0" 725 | source = "registry+https://github.com/rust-lang/crates.io-index" 726 | checksum = "1fc81956842c57dac11422a97c3b8195a1ff727f06e85c84ed2e8aa277c9a0fd" 727 | 728 | [[package]] 729 | name = "utf8parse" 730 | version = "0.2.2" 731 | source = "registry+https://github.com/rust-lang/crates.io-index" 732 | checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" 733 | 734 | [[package]] 735 | name = "version_check" 736 | version = "0.9.5" 737 | source = "registry+https://github.com/rust-lang/crates.io-index" 738 | checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" 739 | 740 | [[package]] 741 | name = "wasi" 742 | version = "0.14.2+wasi-0.2.4" 743 | source = "registry+https://github.com/rust-lang/crates.io-index" 744 | checksum = "9683f9a5a998d873c0d21fcbe3c083009670149a8fab228644b8bd36b2c48cb3" 745 | dependencies = [ 746 | "wit-bindgen-rt", 747 | ] 748 | 749 | [[package]] 750 | name = "wasm-bindgen" 751 | version = "0.2.100" 752 | source = "registry+https://github.com/rust-lang/crates.io-index" 753 | checksum = "1edc8929d7499fc4e8f0be2262a241556cfc54a0bea223790e71446f2aab1ef5" 754 | dependencies = [ 755 | "cfg-if", 756 | "once_cell", 757 | "wasm-bindgen-macro", 758 | ] 759 | 760 | [[package]] 761 | name = "wasm-bindgen-backend" 762 | version = "0.2.100" 763 | source = "registry+https://github.com/rust-lang/crates.io-index" 764 | checksum = "2f0a0651a5c2bc21487bde11ee802ccaf4c51935d0d3d42a6101f98161700bc6" 765 | dependencies = [ 766 | "bumpalo", 767 | "log", 768 | "proc-macro2", 769 | "quote", 770 | "syn", 771 | "wasm-bindgen-shared", 772 | ] 773 | 774 | [[package]] 775 | name = "wasm-bindgen-macro" 776 | version = "0.2.100" 777 | source = "registry+https://github.com/rust-lang/crates.io-index" 778 | checksum = "7fe63fc6d09ed3792bd0897b314f53de8e16568c2b3f7982f468c0bf9bd0b407" 779 | dependencies = [ 780 | "quote", 781 | "wasm-bindgen-macro-support", 782 | ] 783 | 784 | [[package]] 785 | name = "wasm-bindgen-macro-support" 786 | version = "0.2.100" 787 | source = "registry+https://github.com/rust-lang/crates.io-index" 788 | checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de" 789 | dependencies = [ 790 | "proc-macro2", 791 | "quote", 792 | "syn", 793 | "wasm-bindgen-backend", 794 | "wasm-bindgen-shared", 795 | ] 796 | 797 | [[package]] 798 | name = "wasm-bindgen-shared" 799 | version = "0.2.100" 800 | source = "registry+https://github.com/rust-lang/crates.io-index" 801 | checksum = "1a05d73b933a847d6cccdda8f838a22ff101ad9bf93e33684f39c1f5f0eece3d" 802 | dependencies = [ 803 | "unicode-ident", 804 | ] 805 | 806 | [[package]] 807 | name = "web-time" 808 | version = "1.1.0" 809 | source = "registry+https://github.com/rust-lang/crates.io-index" 810 | checksum = "5a6580f308b1fad9207618087a65c04e7a10bc77e02c8e84e9b00dd4b12fa0bb" 811 | dependencies = [ 812 | "js-sys", 813 | "wasm-bindgen", 814 | ] 815 | 816 | [[package]] 817 | name = "winapi" 818 | version = "0.3.9" 819 | source = "registry+https://github.com/rust-lang/crates.io-index" 820 | checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" 821 | dependencies = [ 822 | "winapi-i686-pc-windows-gnu", 823 | "winapi-x86_64-pc-windows-gnu", 824 | ] 825 | 826 | [[package]] 827 | name = "winapi-i686-pc-windows-gnu" 828 | version = "0.4.0" 829 | source = "registry+https://github.com/rust-lang/crates.io-index" 830 | checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" 831 | 832 | [[package]] 833 | name = "winapi-x86_64-pc-windows-gnu" 834 | version = "0.4.0" 835 | source = "registry+https://github.com/rust-lang/crates.io-index" 836 | checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" 837 | 838 | [[package]] 839 | name = "windows-sys" 840 | version = "0.59.0" 841 | source = "registry+https://github.com/rust-lang/crates.io-index" 842 | checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" 843 | dependencies = [ 844 | "windows-targets", 845 | ] 846 | 847 | [[package]] 848 | name = "windows-targets" 849 | version = "0.52.6" 850 | source = "registry+https://github.com/rust-lang/crates.io-index" 851 | checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" 852 | dependencies = [ 853 | "windows_aarch64_gnullvm", 854 | "windows_aarch64_msvc", 855 | "windows_i686_gnu", 856 | "windows_i686_gnullvm", 857 | "windows_i686_msvc", 858 | "windows_x86_64_gnu", 859 | "windows_x86_64_gnullvm", 860 | "windows_x86_64_msvc", 861 | ] 862 | 863 | [[package]] 864 | name = "windows_aarch64_gnullvm" 865 | version = "0.52.6" 866 | source = "registry+https://github.com/rust-lang/crates.io-index" 867 | checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" 868 | 869 | [[package]] 870 | name = "windows_aarch64_msvc" 871 | version = "0.52.6" 872 | source = "registry+https://github.com/rust-lang/crates.io-index" 873 | checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" 874 | 875 | [[package]] 876 | name = "windows_i686_gnu" 877 | version = "0.52.6" 878 | source = "registry+https://github.com/rust-lang/crates.io-index" 879 | checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" 880 | 881 | [[package]] 882 | name = "windows_i686_gnullvm" 883 | version = "0.52.6" 884 | source = "registry+https://github.com/rust-lang/crates.io-index" 885 | checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" 886 | 887 | [[package]] 888 | name = "windows_i686_msvc" 889 | version = "0.52.6" 890 | source = "registry+https://github.com/rust-lang/crates.io-index" 891 | checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" 892 | 893 | [[package]] 894 | name = "windows_x86_64_gnu" 895 | version = "0.52.6" 896 | source = "registry+https://github.com/rust-lang/crates.io-index" 897 | checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" 898 | 899 | [[package]] 900 | name = "windows_x86_64_gnullvm" 901 | version = "0.52.6" 902 | source = "registry+https://github.com/rust-lang/crates.io-index" 903 | checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" 904 | 905 | [[package]] 906 | name = "windows_x86_64_msvc" 907 | version = "0.52.6" 908 | source = "registry+https://github.com/rust-lang/crates.io-index" 909 | checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" 910 | 911 | [[package]] 912 | name = "wit-bindgen-rt" 913 | version = "0.39.0" 914 | source = "registry+https://github.com/rust-lang/crates.io-index" 915 | checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1" 916 | dependencies = [ 917 | "bitflags", 918 | ] 919 | 920 | [[package]] 921 | name = "xshell" 922 | version = "0.2.7" 923 | source = "registry+https://github.com/rust-lang/crates.io-index" 924 | checksum = "9e7290c623014758632efe00737145b6867b66292c42167f2ec381eb566a373d" 925 | dependencies = [ 926 | "xshell-macros", 927 | ] 928 | 929 | [[package]] 930 | name = "xshell-macros" 931 | version = "0.2.7" 932 | source = "registry+https://github.com/rust-lang/crates.io-index" 933 | checksum = "32ac00cd3f8ec9c1d33fb3e7958a82df6989c42d747bd326c822b1d625283547" 934 | 935 | [[package]] 936 | name = "xxhash-rust" 937 | version = "0.8.15" 938 | source = "registry+https://github.com/rust-lang/crates.io-index" 939 | checksum = "fdd20c5420375476fbd4394763288da7eb0cc0b8c11deed431a91562af7335d3" 940 | 941 | [[package]] 942 | name = "zerocopy" 943 | version = "0.8.25" 944 | source = "registry+https://github.com/rust-lang/crates.io-index" 945 | checksum = "a1702d9583232ddb9174e01bb7c15a2ab8fb1bc6f227aa1233858c351a3ba0cb" 946 | dependencies = [ 947 | "zerocopy-derive", 948 | ] 949 | 950 | [[package]] 951 | name = "zerocopy-derive" 952 | version = "0.8.25" 953 | source = "registry+https://github.com/rust-lang/crates.io-index" 954 | checksum = "28a6e20d751156648aa063f3800b706ee209a32c0b4d9f24be3d980b01be55ef" 955 | dependencies = [ 956 | "proc-macro2", 957 | "quote", 958 | "syn", 959 | ] 960 | 961 | [[package]] 962 | name = "zstd-safe" 963 | version = "7.2.4" 964 | source = "registry+https://github.com/rust-lang/crates.io-index" 965 | checksum = "8f49c4d5f0abb602a93fb8736af2a4f4dd9512e36f7f570d66e65ff867ed3b9d" 966 | dependencies = [ 967 | "zstd-sys", 968 | ] 969 | 970 | [[package]] 971 | name = "zstd-sys" 972 | version = "2.0.15+zstd.1.5.7" 973 | source = "registry+https://github.com/rust-lang/crates.io-index" 974 | checksum = "eb81183ddd97d0c74cedf1d50d85c8d08c1b8b68ee863bdee9e706eedba1a237" 975 | dependencies = [ 976 | "cc", 977 | "pkg-config", 978 | ] 979 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [workspace] 2 | resolver = "3" 3 | members = [ 4 | "dwarfs", 5 | "dwarfs-test", 6 | "dwarfs-enc", 7 | ] 8 | 9 | [profile.bench] 10 | debug = "line-tables-only" 11 | -------------------------------------------------------------------------------- /LICENSE-APACHE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS -------------------------------------------------------------------------------- /LICENSE-MIT: -------------------------------------------------------------------------------- 1 | Permission is hereby granted, free of charge, to any 2 | person obtaining a copy of this software and associated 3 | documentation files (the "Software"), to deal in the 4 | Software without restriction, including without 5 | limitation the rights to use, copy, modify, merge, 6 | publish, distribute, sublicense, and/or sell copies of 7 | the Software, and to permit persons to whom the Software 8 | is furnished to do so, subject to the following 9 | conditions: 10 | 11 | The above copyright notice and this permission notice 12 | shall be included in all copies or substantial portions 13 | of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF 16 | ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED 17 | TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 18 | PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT 19 | SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 20 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 21 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR 22 | IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 23 | DEALINGS IN THE SOFTWARE. 24 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # dwarfs 2 | 3 | [![crates.io](https://img.shields.io/crates/v/dwarfs?label=dwarfs)](https://crates.io/crates/dwarfs) 4 | [![docs.rs](https://img.shields.io/docsrs/dwarfs?label=docs.rs%2Fdwarfs)](https://docs.rs/dwarfs) 5 | [![crates.io](https://img.shields.io/crates/v/dwarfs-enc?label=dwarfs-enc)](https://crates.io/crates/dwarfs-enc) 6 | [![docs.rs](https://img.shields.io/docsrs/dwarfs-enc?label=docs.rs%2Fdwarfs-enc)](https://docs.rs/dwarfs-enc) 7 | 8 | Libraries for reading and writing [DwarFS][dwarfs] archives (aka. DwarFS images), 9 | in pure Rust without `unsafe`. 10 | 11 | #### License 12 | 13 | TL;DR: We mostly follow [upstream][dwarfs]: the package for constructing 14 | DwarFS archives (dwarfs-enc) is GPL-3.0. Other code is "(MIT OR Apache-2.0)". 15 | 16 | Long version: 17 | 18 | All files under directory `dwarfs-enc` are licensed under GNU General Public 19 | License, version 3. Check `./dwarfs-enc/README.md` and `./LICENSE-GPL-3.0` for 20 | details. 21 | 22 | Other files in this repository outside `dwarfs-enc`, including `dwarfs` and 23 | `dwarfs-test` packages, are licensed under Apache 24 | License 2.0 or MIT license at your option. Check `./dwarfs/README.md`, 25 | `./LICENSE-APACHE` and `./LICENSE-MIT` for details. 26 | 27 | [dwarfs]: https://github.com/mhx/dwarfs 28 | -------------------------------------------------------------------------------- /clippy.toml: -------------------------------------------------------------------------------- 1 | doc-valid-idents = ["DwarFS", ".."] -------------------------------------------------------------------------------- /dwarfs-enc/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "dwarfs-enc" 3 | version = "0.1.0" 4 | edition = "2024" 5 | license = "GPL-3.0-only" 6 | description = "A library for writing DwarFS archives (aka. DwarFS images)" 7 | keywords = ["dwarfs", "archive", "compression"] 8 | categories = ["compression", "filesystem"] 9 | repository = "https://github.com/oxalica/dwarfs-rs" 10 | 11 | [features] 12 | default = ["zstd"] 13 | 14 | zstd = ["dep:zstd-safe"] 15 | lzma = ["dep:liblzma"] 16 | 17 | [dependencies] 18 | crossbeam-channel = "0.5.15" 19 | dwarfs = { version = "0.2.1", path = "../dwarfs", features = ["serialize"] } 20 | indexmap = "2.9.0" 21 | liblzma = { version = "0.4.1", optional = true } 22 | rustic_cdc = "0.3.1" 23 | rustix = { version = "1.0.7", features = ["fs"] } 24 | serde = "1.0.219" 25 | sha2 = "0.10.9" 26 | zerocopy = { version = "0.8.25", features = ["derive", "std"] } 27 | zstd-safe = { version = "7.2.4", default-features = false, optional = true } 28 | 29 | [dev-dependencies] 30 | clap = { version = "4.5.39", features = ["derive"] } 31 | indicatif = "0.17.11" 32 | 33 | [[example]] 34 | name = "mkdwarfs" 35 | required-features = ["zstd", "lzma"] 36 | 37 | [lints.clippy] 38 | dbg-macro = "warn" 39 | todo = "warn" 40 | print-stdout = "warn" 41 | print-stderr = "warn" -------------------------------------------------------------------------------- /dwarfs-enc/LICENSE-GPL-3.0: -------------------------------------------------------------------------------- 1 | ../LICENSE-GPL-3.0 -------------------------------------------------------------------------------- /dwarfs-enc/README.md: -------------------------------------------------------------------------------- 1 | # dwarfs-enc 2 | 3 | [![crates.io](https://img.shields.io/crates/v/dwarfs-enc)](https://crates.io/crates/dwarfs-enc) 4 | [![docs.rs](https://img.shields.io/docsrs/dwarfs-enc)](https://docs.rs/dwarfs-enc) 5 | 6 | A library for writing [DwarFS][dwarfs] archives (aka. DwarFS images), 7 | building on top of [`dwarfs` crate][dwarfs-rs]. 8 | 9 | [dwarfs]: https://github.com/mhx/dwarfs 10 | [dwarfs-rs]: https://crates.io/crates/dwarfs 11 | 12 | #### License 13 | 14 | SPDX-License-Identifier: GPL-3.0-only 15 | 16 | Copyright (C) 2025 Oxalica 17 | 18 | This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, version 3. 19 | 20 | This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. 21 | 22 | You should have received a copy of the GNU General Public License along with this program. If not, see . 23 | -------------------------------------------------------------------------------- /dwarfs-enc/examples/mkdwarfs.rs: -------------------------------------------------------------------------------- 1 | #![expect(clippy::print_stderr, reason = "allowed in examples")] 2 | use std::{ 3 | borrow::Cow, 4 | fs, 5 | path::{Path, PathBuf}, 6 | time::Instant, 7 | }; 8 | 9 | use dwarfs_enc::{ 10 | chunker::{self, Chunker}, 11 | metadata::{Builder as MetadataBuilder, InodeMetadata}, 12 | section::{self, CompressParam}, 13 | }; 14 | use indicatif::{HumanBytes, HumanCount, MultiProgress, ProgressBar, ProgressStyle}; 15 | 16 | #[derive(Debug, clap::Parser)] 17 | struct Cli { 18 | #[arg(short, long)] 19 | input: PathBuf, 20 | #[arg(short, long)] 21 | output: PathBuf, 22 | 23 | #[arg(short, long)] 24 | force: bool, 25 | 26 | #[arg(long, conflicts_with = "lzma")] 27 | zstd: Option, 28 | #[arg(long)] 29 | lzma: Option, 30 | } 31 | 32 | fn main() -> Result<(), Box> { 33 | let cli: Cli = clap::Parser::parse(); 34 | 35 | let inst = Instant::now(); 36 | 37 | let fout = fs::OpenOptions::new() 38 | .write(true) 39 | .create(true) 40 | .truncate(true) 41 | .create_new(!cli.force) 42 | .open(&cli.output)?; 43 | 44 | let root_meta = fs::metadata(&cli.input)?; 45 | let root_meta = InodeMetadata::from(&root_meta); 46 | 47 | let stat = { 48 | let progress = ProgressBar::new_spinner(); 49 | let mut stat = Stats::default(); 50 | traverse_stats(&cli.input, &mut stat, &progress)?; 51 | progress.finish(); 52 | stat 53 | }; 54 | 55 | let compress = match (cli.zstd, cli.lzma) { 56 | (None, None) => CompressParam::None, 57 | (Some(zstd), None) => CompressParam::Zstd(zstd), 58 | (None, Some(lzma)) => CompressParam::Lzma(lzma), 59 | _ => unreachable!(), 60 | }; 61 | eprintln!("using compression: {compress:?}"); 62 | 63 | let pb_in_bytes = ProgressBar::new(stat.total_bytes).with_style( 64 | ProgressStyle::with_template( 65 | "input : {binary_bytes}/{binary_total_bytes} ({binary_bytes_per_sec}) {wide_bar}", 66 | ) 67 | .unwrap(), 68 | ); 69 | let pb_out_bytes = ProgressBar::no_length() 70 | .with_style(ProgressStyle::with_template("output: {binary_bytes} {spinner}").unwrap()); 71 | let fout_pb = pb_out_bytes.wrap_write(&fout); 72 | 73 | let pbs = MultiProgress::new(); 74 | pbs.add(pb_in_bytes.clone()); 75 | pbs.add(pb_out_bytes.clone()); 76 | 77 | // Make bars visible now, or there would be a delay on the second bar, 78 | // because block compression takes quite some time to finish. 79 | pb_in_bytes.tick(); 80 | pb_out_bytes.tick(); 81 | 82 | let mut builder = MetadataBuilder::new(&root_meta); 83 | let writer = section::Writer::new(fout_pb)?; 84 | let chunker = chunker::BasicChunker::new(writer, builder.block_size(), compress); 85 | let mut chunker = chunker::CdcChunker::new(chunker); 86 | 87 | build_archive(&mut builder, &mut chunker, &cli.input, &pb_in_bytes)?; 88 | 89 | pb_in_bytes.finish(); 90 | pbs.println(format!( 91 | "deduplicated {}", 92 | HumanBytes(chunker.deduplicated_bytes()), 93 | ))?; 94 | 95 | pbs.println("finalizing metadata")?; 96 | let mut w = chunker.finish()?; 97 | w.write_metadata_sections(&builder.finish()?, compress)?; 98 | 99 | pbs.println("waiting for compression to finish")?; 100 | w.finish()?; 101 | pb_out_bytes.finish(); 102 | 103 | let output_len = fout.metadata()?.len(); 104 | 105 | let elapsed = inst.elapsed(); 106 | eprintln!( 107 | "completed in {:?}, with compression ratio {:.2}%", 108 | elapsed, 109 | output_len as f32 / stat.total_bytes as f32 * 100.0, 110 | ); 111 | 112 | Ok(()) 113 | } 114 | 115 | #[derive(Debug, Default)] 116 | struct Stats { 117 | files: u64, 118 | total_bytes: u64, 119 | } 120 | 121 | fn traverse_stats( 122 | root_path: &Path, 123 | stat: &mut Stats, 124 | progress: &ProgressBar, 125 | ) -> std::io::Result<()> { 126 | for ent in fs::read_dir(root_path)? { 127 | let ent = ent?; 128 | let ft = ent.file_type()?; 129 | if ft.is_dir() { 130 | traverse_stats(&ent.path(), stat, progress)?; 131 | } else if ft.is_file() { 132 | stat.files += 1; 133 | stat.total_bytes += fs::symlink_metadata(ent.path())?.len(); 134 | 135 | if stat.files % 1024 == 0 { 136 | progress.set_message(format!( 137 | "found {} files, total {}", 138 | HumanCount(stat.files), 139 | HumanBytes(stat.total_bytes), 140 | )); 141 | } 142 | } 143 | } 144 | Ok(()) 145 | } 146 | 147 | fn build_archive( 148 | meta_builder: &mut MetadataBuilder, 149 | chunker: &mut dyn Chunker, 150 | root_path: &Path, 151 | pb_in_bytes: &ProgressBar, 152 | ) -> dwarfs_enc::Result<()> { 153 | let mut stack = Vec::new(); 154 | stack.push(( 155 | meta_builder.root(), 156 | root_path.to_owned(), 157 | fs::read_dir(root_path)?, 158 | )); 159 | 160 | while let Some(&mut (dir, ref dir_path, ref mut iter)) = stack.last_mut() { 161 | let Some(ent) = iter.next().transpose()? else { 162 | stack.pop(); 163 | continue; 164 | }; 165 | 166 | let name = ent.file_name(); 167 | let name_str = name.to_string_lossy(); 168 | if matches!(name_str, Cow::Owned(_)) { 169 | eprintln!("normalized non-UTF-8 name: {name:?} -> {name_str:?}"); 170 | } 171 | let subpath = dir_path.join(&name); 172 | 173 | let ft = ent.file_type()?; 174 | let os_meta = ent.metadata()?; 175 | let inode_meta = InodeMetadata::from(&os_meta); 176 | 177 | if ft.is_dir() { 178 | let subdir = meta_builder.put_dir(dir, &name_str, &inode_meta)?; 179 | let subiter = fs::read_dir(&subpath)?; 180 | stack.push((subdir, subpath, subiter)); 181 | } else if ft.is_file() { 182 | let os_file = fs::File::open(&subpath)?; 183 | let chunks = chunker.put_reader(&mut pb_in_bytes.wrap_read(os_file))?; 184 | meta_builder.put_file(dir, &name_str, &inode_meta, chunks)?; 185 | } else if ft.is_symlink() { 186 | let target = fs::read_link(&subpath)?; 187 | let target_str = target.to_string_lossy(); 188 | if matches!(target_str, Cow::Owned(_)) { 189 | eprintln!("normalized non-UTF-8 symlink target: {target:?} -> {target_str:?}"); 190 | } 191 | meta_builder.put_symlink(dir, &name_str, &inode_meta, &target_str)?; 192 | } else { 193 | eprintln!( 194 | "ignore unsupported file type {:?} for path: {}", 195 | ft, 196 | subpath.display(), 197 | ); 198 | } 199 | } 200 | Ok(()) 201 | } 202 | -------------------------------------------------------------------------------- /dwarfs-enc/src/chunker.rs: -------------------------------------------------------------------------------- 1 | //! File data slicing and/or deduplication. 2 | use std::{ 3 | collections::{HashMap, hash_map::Entry}, 4 | fmt, 5 | io::{Read, Write}, 6 | num::NonZero, 7 | }; 8 | 9 | use dwarfs::section::SectionType; 10 | use rustic_cdc::{Rabin64, RollingHash64}; 11 | use sha2::{Digest, Sha512_256}; 12 | 13 | use crate::{ 14 | Error, Result, 15 | metadata::Chunk, 16 | section::{self, CompressParam}, 17 | }; 18 | 19 | type Chunks = Vec; 20 | 21 | /// Algorithm to slice and/or deduplicate file content. 22 | pub trait Chunker { 23 | /// Put data via a [`Read`] instance into the archive, and return the 24 | /// chunking result ready for [`crate::metadata::Builder::put_file`]. 25 | fn put_reader(&mut self, rdr: &mut dyn Read) -> Result; 26 | 27 | /// Put in-memory data into the archive. 28 | /// 29 | /// This is a shortcut to [`Chunker::put_reader`]. 30 | fn put_bytes(&mut self, mut bytes: &[u8]) -> Result { 31 | self.put_reader(&mut bytes) 32 | } 33 | } 34 | 35 | /// The simplest chunker to concat all files and slice data at block size. 36 | /// 37 | /// This does no deduplication. 38 | pub struct BasicChunker { 39 | buf: Box<[u8]>, 40 | buf_len: usize, 41 | compression: CompressParam, 42 | w: section::Writer, 43 | } 44 | 45 | impl fmt::Debug for BasicChunker { 46 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 47 | f.debug_struct("BasicChunker") 48 | .field("buf", &format_args!("{}/{}", self.buf_len, self.buf.len())) 49 | .field("compression", &self.compression) 50 | .field("w", &self.w) 51 | .finish() 52 | } 53 | } 54 | 55 | impl BasicChunker { 56 | /// Create a basic chunker with given section writer and parameters. 57 | /// 58 | /// Note: `block_size` must match the block size configured for 59 | /// [`crate::metadata::Builder`]. You should always get it from 60 | /// [`crate::metadata::Builder::block_size`]. 61 | pub fn new( 62 | w: section::Writer, 63 | block_size: NonZero, 64 | compression: CompressParam, 65 | ) -> Self { 66 | Self { 67 | buf: vec![0u8; block_size.get() as usize].into_boxed_slice(), 68 | buf_len: 0, 69 | compression, 70 | w, 71 | } 72 | } 73 | 74 | /// Finalize data chunks and get back the underlying section writer. 75 | pub fn finish(mut self) -> Result> 76 | where 77 | W: Write, 78 | { 79 | if self.buf_len != 0 { 80 | self.w.write_section( 81 | SectionType::BLOCK, 82 | self.compression, 83 | &self.buf[..self.buf_len], 84 | )?; 85 | self.buf_len = 0; 86 | } 87 | Ok(self.w) 88 | } 89 | 90 | fn put_reader_inner(&mut self, rdr: &mut dyn Read) -> Result 91 | where 92 | W: Write, 93 | { 94 | let mut chunks = SeqChunks { 95 | start_section_idx: self.w.section_count(), 96 | start_offset: self.buf_len as u32, 97 | len: 0, 98 | }; 99 | loop { 100 | while self.buf_len < self.buf.len() { 101 | match rdr.read(&mut self.buf[self.buf_len..]) { 102 | Ok(0) => return Ok(chunks), 103 | Ok(n) => { 104 | self.buf_len += n; 105 | chunks.len += n as u64; 106 | } 107 | Err(err) if err.kind() == std::io::ErrorKind::Interrupted => continue, 108 | Err(err) => return Err(err.into()), 109 | } 110 | } 111 | 112 | debug_assert_eq!(self.buf_len, self.buf.len()); 113 | self.w 114 | .write_section(SectionType::BLOCK, self.compression, &self.buf)?; 115 | self.buf_len = 0; 116 | } 117 | } 118 | } 119 | 120 | #[derive(Debug, Clone, Copy)] 121 | struct SeqChunks { 122 | start_section_idx: u32, 123 | start_offset: u32, 124 | len: u64, 125 | } 126 | 127 | impl SeqChunks { 128 | fn to_chunks(mut self, block_size: u32) -> impl Iterator { 129 | std::iter::from_fn(move || { 130 | let rest_len = block_size - self.start_offset; 131 | if self.len == 0 { 132 | None 133 | } else if self.len <= u64::from(rest_len) { 134 | let c = Chunk { 135 | section_idx: self.start_section_idx, 136 | offset: self.start_offset, 137 | size: self.len as u32, 138 | }; 139 | self.len = 0; 140 | Some(c) 141 | } else { 142 | let c = Chunk { 143 | section_idx: self.start_section_idx, 144 | offset: self.start_offset, 145 | size: rest_len, 146 | }; 147 | self.len -= u64::from(rest_len); 148 | self.start_section_idx += 1; 149 | self.start_offset = 0; 150 | Some(c) 151 | } 152 | }) 153 | } 154 | } 155 | 156 | impl Chunker for BasicChunker { 157 | fn put_reader(&mut self, rdr: &mut dyn Read) -> Result { 158 | let seq = self.put_reader_inner(rdr)?; 159 | Ok(seq.to_chunks(self.buf.len() as u32).collect()) 160 | } 161 | } 162 | 163 | /// The deduplicating chunker using Content Defined Chunking (CDC). 164 | /// 165 | /// The exact algorithm used may change. Currently it uses [rustic_cdc]. 166 | pub struct CdcChunker { 167 | inner: BasicChunker, 168 | // TODO: This struct is too large. 169 | rabin: Rabin64, 170 | chunk_buf: Box<[u8]>, 171 | 172 | table: HashMap, 173 | deduplicated_bytes: u64, 174 | } 175 | 176 | impl fmt::Debug for CdcChunker { 177 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 178 | f.debug_struct("CdcChunker") 179 | .field("inner", &self.inner) 180 | .field("table_size", &self.table.len()) 181 | .field("deduplicated_bytes", &self.deduplicated_bytes) 182 | .finish_non_exhaustive() 183 | } 184 | } 185 | 186 | struct CdcChunk { 187 | sha256_suffix: [u8; 24], 188 | start_section_idx: u32, 189 | start_offset: u32, 190 | } 191 | 192 | impl CdcChunker { 193 | const WINDOW_SIZE_BITS: u32 = 6; 194 | const WINDOW_SIZE: usize = 1usize << Self::WINDOW_SIZE_BITS; 195 | const CUT_MASK: u64 = (1u64 << 11) - 1; 196 | const MIN_CHUNK_SIZE: usize = Self::WINDOW_SIZE; 197 | const MAX_CHUNK_SIZE: usize = 64 << 10; 198 | 199 | /// Create the deduplicating chunker on top of a [`BasicChunker`]. 200 | pub fn new(inner: BasicChunker) -> Self { 201 | let rabin = Rabin64::new(Self::WINDOW_SIZE_BITS); 202 | CdcChunker { 203 | inner, 204 | rabin, 205 | chunk_buf: vec![0u8; Self::MAX_CHUNK_SIZE].into_boxed_slice(), 206 | table: HashMap::new(), 207 | deduplicated_bytes: 0, 208 | } 209 | } 210 | 211 | /// Get the total deduplicated bytes. 212 | pub fn deduplicated_bytes(&self) -> u64 { 213 | self.deduplicated_bytes 214 | } 215 | 216 | /// Finalize data chunks and get back the underlying section writer. 217 | pub fn finish(self) -> Result> 218 | where 219 | W: Write, 220 | { 221 | self.inner.finish() 222 | } 223 | } 224 | 225 | impl Chunker for CdcChunker { 226 | fn put_reader(&mut self, rdr: &mut dyn Read) -> Result { 227 | let block_size = self.inner.buf.len() as u32; 228 | 229 | let mut chunks = Chunks::new(); 230 | let mut record_chunk = |cdchunk: &[u8]| { 231 | debug_assert_ne!(cdchunk.len(), 0); 232 | 233 | let hash = Sha512_256::new_with_prefix(cdchunk).finalize(); 234 | let (&hash_prefix, hash_suffix) = hash.split_first_chunk::<8>().expect("hash is 32B"); 235 | let hash_suffix: [u8; 24] = hash_suffix.try_into().expect("hash is 32B"); 236 | 237 | let seq = match self.table.entry(u64::from_ne_bytes(hash_prefix)) { 238 | Entry::Vacant(ent) => { 239 | let seq = self.inner.put_reader_inner(&mut { cdchunk })?; 240 | ent.insert(CdcChunk { 241 | sha256_suffix: hash_suffix, 242 | start_section_idx: seq.start_section_idx, 243 | start_offset: seq.start_offset, 244 | }); 245 | seq 246 | } 247 | Entry::Occupied(ent) if ent.get().sha256_suffix == hash_suffix => { 248 | self.deduplicated_bytes += cdchunk.len() as u64; 249 | SeqChunks { 250 | start_section_idx: ent.get().start_section_idx, 251 | start_offset: ent.get().start_offset, 252 | len: cdchunk.len() as u64, 253 | } 254 | } 255 | // Hash prefix collision. 256 | Entry::Occupied(_) => self.inner.put_reader_inner(&mut { cdchunk })?, 257 | }; 258 | 259 | // Merge chunks if possible. 260 | for c in seq.to_chunks(block_size) { 261 | if let Some(p) = chunks 262 | .last_mut() 263 | .filter(|p| (p.section_idx, p.offset + p.size) == (c.section_idx, c.offset)) 264 | { 265 | p.size += c.size; 266 | } else { 267 | chunks.push(c); 268 | } 269 | } 270 | 271 | Ok::<_, Error>(()) 272 | }; 273 | 274 | self.rabin.reset(); 275 | 276 | // | chunk_buf | 277 | // | ...chunk | chunk | partial chunk | next read | ... | 278 | // ^cut_pos ^end_pos 279 | // ~~~~~~~~~~~ read_len 280 | let mut cut_pos = 0usize; 281 | let mut end_pos = 0usize; 282 | loop { 283 | assert_ne!(end_pos, self.chunk_buf.len()); 284 | let read_len = match rdr.read(&mut self.chunk_buf[end_pos..]) { 285 | Ok(0) => break, 286 | Ok(n) => n, 287 | Err(err) if err.kind() == std::io::ErrorKind::Interrupted => continue, 288 | Err(err) => return Err(err.into()), 289 | }; 290 | 291 | for (&b, pos) in self.chunk_buf[end_pos..end_pos + read_len] 292 | .iter() 293 | .zip(end_pos..) 294 | { 295 | self.rabin.slide(b); 296 | // This is the length of the whole chunk, including previous partial data. 297 | // NB. the current byte at `pos` is included, hereby `+1`. 298 | let len = pos - cut_pos + 1; 299 | 300 | // The `MIN_CHUNK_SIZE` guarantees the sliding window is always filled. 301 | if len >= Self::MIN_CHUNK_SIZE && self.rabin.hash & Self::CUT_MASK == Self::CUT_MASK 302 | || len >= Self::MAX_CHUNK_SIZE 303 | { 304 | let cdchunk = &self.chunk_buf[cut_pos..pos]; 305 | cut_pos = pos; 306 | record_chunk(cdchunk)?; 307 | } 308 | } 309 | end_pos += read_len; 310 | 311 | // Shift-down the last partial chunk if we reached the end of buffer. 312 | // For files smaller than `MAX_CHUNK_SIZE`, this path is never entered. 313 | if end_pos >= self.chunk_buf.len() { 314 | debug_assert_eq!(end_pos, self.chunk_buf.len()); 315 | self.chunk_buf.copy_within(cut_pos.., 0); 316 | end_pos -= cut_pos; 317 | cut_pos = 0; 318 | } 319 | } 320 | 321 | if cut_pos < end_pos { 322 | record_chunk(&self.chunk_buf[cut_pos..end_pos])?; 323 | } 324 | 325 | Ok(chunks) 326 | } 327 | } 328 | -------------------------------------------------------------------------------- /dwarfs-enc/src/error.rs: -------------------------------------------------------------------------------- 1 | use std::fmt; 2 | 3 | /// A `Result` with default error [`Error`]. 4 | pub type Result = std::result::Result; 5 | 6 | /// An error representing any possible error raised from this crate. 7 | pub struct Error(Box); 8 | 9 | #[derive(Debug)] 10 | #[cfg_attr(not(feature = "default"), allow(dead_code))] 11 | pub(crate) enum ErrorInner { 12 | Limit(&'static str), 13 | SerializeMetadata(dwarfs::metadata::Error), 14 | DuplicatedEntry, 15 | Compress(std::io::Error), 16 | 17 | Io(std::io::Error), 18 | } 19 | 20 | impl fmt::Debug for Error { 21 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 22 | self.0.fmt(f) 23 | } 24 | } 25 | 26 | impl fmt::Display for Error { 27 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 28 | match &*self.0 { 29 | ErrorInner::DuplicatedEntry => f.pad("duplicated entry names in a directory"), 30 | ErrorInner::Limit(msg) => write!(f, "{msg}"), 31 | ErrorInner::SerializeMetadata(err) => err.fmt(f), 32 | ErrorInner::Compress(err) | ErrorInner::Io(err) => err.fmt(f), 33 | } 34 | } 35 | } 36 | 37 | impl std::error::Error for Error { 38 | fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { 39 | match &*self.0 { 40 | ErrorInner::Compress(err) | ErrorInner::Io(err) => Some(err), 41 | ErrorInner::SerializeMetadata(err) => Some(err), 42 | _ => None, 43 | } 44 | } 45 | } 46 | 47 | impl From for Error { 48 | #[cold] 49 | fn from(err: ErrorInner) -> Self { 50 | Self(Box::new(err)) 51 | } 52 | } 53 | 54 | impl From for Error { 55 | #[cold] 56 | fn from(err: std::io::Error) -> Self { 57 | Self(Box::new(ErrorInner::Io(err))) 58 | } 59 | } 60 | 61 | impl From for Error { 62 | #[cold] 63 | fn from(err: dwarfs::metadata::Error) -> Self { 64 | Self(Box::new(ErrorInner::SerializeMetadata(err))) 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /dwarfs-enc/src/lib.rs: -------------------------------------------------------------------------------- 1 | //! A library for writing [DwarFS][dwarfs] archives (aka. images), 2 | //! building on top of [`dwarfs` crate][::dwarfs]. 3 | //! 4 | //! For reading archives only, check [`dwarfs` crate][::dwarfs] instead. 5 | //! 6 | //! [dwarfs]: https://github.com/mhx/dwarfs 7 | //! 8 | //! Currently, this crate writes DwarFS archive with filesystem version v2.5, 9 | //! which should be compatible with upstream dwarfs v0.7.0..=v0.12.4 (latest at 10 | //! the time of writing). 11 | //! 12 | //! ## Examples 13 | //! 14 | //! ``` 15 | //! use dwarfs_enc::{ 16 | //! chunker::{Chunker, BasicChunker, CdcChunker}, 17 | //! metadata::{Builder as MetaBuilder, InodeMetadata}, 18 | //! section::{Writer as SectionWriter, CompressParam}, 19 | //! }; 20 | //! use std::{fs, time::SystemTime}; 21 | //! 22 | //! # fn work() -> dwarfs_enc::Result<()> { 23 | //! let f = fs::File::create("out.dwarfs")?; 24 | //! 25 | //! // Create inode metadata. 26 | //! let mut dir_meta = InodeMetadata::new(0o755); 27 | //! dir_meta.uid(1000).gid(1000).atime(SystemTime::now()); 28 | //! // ... or initialize from OS metadata. 29 | //! let file_meta = InodeMetadata::from(&fs::metadata("./bar")?); 30 | //! 31 | //! // Create a hierarchy builder initialized with a root inode. 32 | //! let mut meta = MetaBuilder::new(&dir_meta); 33 | //! 34 | //! // Use ZSTD compression level 22, Content Defined Chunking (CDC) for deduplication. 35 | //! let compress = CompressParam::Zstd(22); 36 | //! let writer = SectionWriter::new(f)?; 37 | //! let chunker = BasicChunker::new(writer, meta.block_size(), compress); 38 | //! let mut chunker = CdcChunker::new(chunker); 39 | //! 40 | //! // Put a directories and a symlink. 41 | //! let root = meta.root(); 42 | //! let subdir = meta.put_dir(root, "subdir", &dir_meta)?; 43 | //! meta.put_symlink(subdir, "symlink", &file_meta, "./subdir")?; 44 | //! 45 | //! // Put a regular file, using in-memory data. 46 | //! meta.put_file(root, "foo", &file_meta, chunker.put_bytes(b"hello world")?)?; 47 | //! // Put a regular file, reading from an OS File. 48 | //! let chunks = chunker.put_reader(&mut fs::File::open("bar")?)?; 49 | //! let bar = meta.put_file(root, "bar", &file_meta, chunks)?; 50 | //! 51 | //! // Hard links are also supported. 52 | //! meta.put_hard_link(root, "hardlink", bar)?; 53 | //! 54 | //! // Finalizing data chunks, metadata, and section writer in order. 55 | //! let mut writer = chunker.finish()?; 56 | //! writer.write_metadata_sections(&meta.finish()?, compress)?; 57 | //! writer.finish()?; 58 | //! 59 | //! # Ok(()) } 60 | //! ``` 61 | //! 62 | //! See also the simple `mkdwarfs` impl at `./examples/mkdwarfs.rs`. 63 | //! 64 | //! ## Cargo features 65 | //! 66 | //! - `zstd`, `lzma` *(Only `zstd` is enabled by default)* 67 | //! 68 | //! Enable relevant compression algorithm support. `zstd` is the default 69 | //! compression algorithm `mkdwarfs` uses and it should be enough for most cases. 70 | #![cfg_attr(docsrs, feature(doc_auto_cfg))] 71 | #![forbid(unsafe_code)] 72 | #![warn(missing_debug_implementations)] 73 | #![warn(missing_docs)] 74 | mod error; 75 | 76 | pub mod chunker; 77 | pub mod metadata; 78 | mod ordered_parallel; 79 | pub mod section; 80 | 81 | use self::error::ErrorInner; 82 | pub use self::error::{Error, Result}; 83 | -------------------------------------------------------------------------------- /dwarfs-enc/src/metadata.rs: -------------------------------------------------------------------------------- 1 | //! DwarFS archive hierarchy builder. 2 | //! 3 | //! This module provides [`Builder`] to build [`dwarfs::metadata::Metadata`] of 4 | //! a DwarFS archive, which is the spine structure for directory hierarchy and 5 | //! file chunks information. 6 | //! 7 | //! ## Limitations 8 | //! 9 | //! Due to implementation limitations, the `Metadata` structure cannot exceeds 10 | //! 2³² bytes. This also implies lengths of all substructures, eg. number of 11 | //! files, directories, chunks and etc, must also not exceed 2³². 12 | //! 13 | //! Note that this limitation only applies to `Metadata` itself, not file 14 | //! (chunk) data. The total length of chunks is not limited, as long as it 15 | //! is addressable. Eg. It's possible to have 2²⁰ files each consists of 2²⁰ 16 | //! chunks of 2²⁰ bytes without any issue. 17 | use std::{ 18 | borrow::Cow, 19 | hash::{Hash, Hasher}, 20 | num::NonZero, 21 | time::{Duration, SystemTime}, 22 | }; 23 | 24 | use dwarfs::metadata; 25 | use indexmap::IndexSet; 26 | 27 | use crate::{Error, ErrorInner, Result}; 28 | 29 | // These values are stored on disk, thus should be platform-agnostic. 30 | // But `rustix` does not expose them on non-UNIX platforms yet. 31 | // TODO: Maybe define them in `dwarfs`? 32 | // From: 33 | const S_IFSOCK: u32 = 0o0140000; 34 | const S_IFLNK: u32 = 0o0120000; 35 | const S_IFREG: u32 = 0o0100000; 36 | const S_IFBLK: u32 = 0o0060000; 37 | const S_IFDIR: u32 = 0o0040000; 38 | const S_IFCHR: u32 = 0o0020000; 39 | const S_IFIFO: u32 = 0o0010000; 40 | 41 | /// Metadata construction configurations. 42 | #[derive(Debug, Clone)] 43 | pub struct Config { 44 | block_size: NonZero, 45 | mtime_only: bool, 46 | time_resolution_sec: NonZero, 47 | source_date_epoch: u64, 48 | creator: Option>, 49 | created_timestamp: Option, 50 | } 51 | 52 | impl Default for Config { 53 | fn default() -> Self { 54 | Self { 55 | block_size: NonZero::new(16 << 20).expect("not zero"), 56 | mtime_only: false, 57 | time_resolution_sec: NonZero::new(1).expect("not zero"), 58 | source_date_epoch: u64::MAX, 59 | creator: Some(Cow::Borrowed(Self::DEFAULT_CREATOR_VERSION)), 60 | created_timestamp: None, 61 | } 62 | } 63 | } 64 | 65 | impl Config { 66 | const DEFAULT_CREATOR_VERSION: &str = 67 | concat!(env!("CARGO_PKG_NAME"), " ", env!("CARGO_PKG_VERSION")); 68 | 69 | /// Set the block size of this archive. 70 | /// 71 | /// Default value is 16MiB. 72 | /// 73 | /// Each [`BLOCK` section][dwarfs::section::SectionType::BLOCK] must have 74 | /// this size (before compression) except for the last one. 75 | /// 76 | /// # Panics 77 | /// 78 | /// Panics if `bytes` is not a power of two. 79 | pub fn block_size(&mut self, bytes: NonZero) -> &mut Self { 80 | assert!(bytes.is_power_of_two()); 81 | self.block_size = bytes; 82 | self 83 | } 84 | 85 | /// Only store file modification time (mtime) and ignore access (atime) or 86 | /// change (ctime) times. 87 | /// 88 | /// Default value is `false`. 89 | /// 90 | /// This will cause all access and change times to be ignored, and will set 91 | /// a flag in metadata informing their unavailability. 92 | pub fn mtime_only(&mut self, yes: bool) -> &mut Self { 93 | self.mtime_only = yes; 94 | self 95 | } 96 | 97 | /// Set the minimum resolution of all file times. 98 | /// 99 | /// Default value is 1 second, which is also the minimal possible value. 100 | /// 101 | /// A non-one resolution will cause all file times to be truncated to the 102 | /// max multiples of the resolution not-greater than the original value. 103 | pub fn time_resolution_sec(&mut self, sec: NonZero) -> &mut Self { 104 | self.time_resolution_sec = sec; 105 | self 106 | } 107 | 108 | /// Set the [`SOURCE_DATE_EPOCH`](https://reproducible-builds.org/specs/source-date-epoch/) 109 | /// which clamps all timestamps after it to it. 110 | pub fn source_date_epoch(&mut self, timestamp: u64) -> &mut Self { 111 | self.source_date_epoch = timestamp; 112 | self.clamp_timestamp(); 113 | self 114 | } 115 | 116 | /// Set a custom string indicating the name and version of the creator program. 117 | /// 118 | /// Default value is 119 | #[doc = concat!("`\"", env!("CARGO_PKG_NAME"), " ", env!("CARGO_PKG_VERSION"), "\"`.")] 120 | pub fn creator(&mut self, info: impl Into>>) -> &mut Self { 121 | self.creator = info.into(); 122 | self 123 | } 124 | 125 | /// Set a timestamp indicating the archive creation time. 126 | /// 127 | /// The value will be clamped by [`Config::source_date_epoch`] if both are set. 128 | /// 129 | /// Default value is `None`. 130 | pub fn created_timestamp(&mut self, ts: impl Into>) -> &mut Self { 131 | self.created_timestamp = ts.into(); 132 | self.clamp_timestamp(); 133 | self 134 | } 135 | 136 | fn clamp_timestamp(&mut self) { 137 | if let Some(t) = &mut self.created_timestamp { 138 | *t = self.source_date_epoch.min(*t); 139 | } 140 | } 141 | } 142 | 143 | /// The metadata builder. 144 | /// 145 | /// See [module level documentations][self]. 146 | #[derive(Debug)] 147 | pub struct Builder { 148 | config: Config, 149 | 150 | inodes: Vec, 151 | dir_entries: IndexSet, 152 | chunks: Vec, 153 | file_chunk_start: Vec, 154 | /// Symlinks do not store its target (index) in inode data, but is looked up 155 | /// through an indirect table with its inode. 156 | symlink_target_idxs: Vec, 157 | devices: Vec, 158 | 159 | // TODO: Optimize memory footprint of these small strings. 160 | name_table: IndexSet, 161 | symlink_table: IndexSet, 162 | 163 | modes: IndexSet, 164 | uids: IndexSet, 165 | gids: IndexSet, 166 | } 167 | 168 | impl Builder { 169 | /// Create a builder with default configurations. 170 | pub fn new(root_meta: &InodeMetadata) -> Self { 171 | Self::new_with_config(&Config::default(), root_meta) 172 | } 173 | 174 | /// Create a builder with custom configurations. 175 | pub fn new_with_config(config: &Config, root_meta: &InodeMetadata) -> Self { 176 | let mut this = Self { 177 | config: config.clone(), 178 | inodes: Default::default(), 179 | dir_entries: Default::default(), 180 | chunks: Default::default(), 181 | file_chunk_start: Default::default(), 182 | symlink_target_idxs: Default::default(), 183 | devices: Default::default(), 184 | name_table: Default::default(), 185 | symlink_table: Default::default(), 186 | modes: Default::default(), 187 | uids: Default::default(), 188 | gids: Default::default(), 189 | }; 190 | this.put_inode(S_IFDIR, InodeKind::Dir, root_meta) 191 | .expect("no overflow"); 192 | // NB. The self-link of root directory is handled in `finish`. 193 | // We do not want to check duplicates against te special (0, 0, 0) link. 194 | this 195 | } 196 | 197 | /// Get the configured block size. 198 | pub fn block_size(&self) -> NonZero { 199 | self.config.block_size 200 | } 201 | 202 | /// Get the implicitly created root directory. 203 | #[inline] 204 | pub fn root(&self) -> DirId { 205 | DirId(0) 206 | } 207 | 208 | fn put_inode(&mut self, file_type: u32, kind: InodeKind, meta: &InodeMetadata) -> Result { 209 | let ino = u32::try_from(self.inodes.len()) 210 | .ok() 211 | .ok_or(ErrorInner::Limit("inode count exceeds 2^32"))?; 212 | 213 | let cvt_time = |time: SystemTime| { 214 | let timestamp = time 215 | .duration_since(SystemTime::UNIX_EPOCH) 216 | .map_err(|_| ErrorInner::Limit("timestamp before UNIX epoch is unsupported"))? 217 | .as_secs(); 218 | let multiples = timestamp.min(self.config.source_date_epoch) 219 | / u64::from(self.config.time_resolution_sec.get()); 220 | u32::try_from(multiples) 221 | .map_err(|_| Error::from(ErrorInner::Limit("relative timestamp exceeds 2^32"))) 222 | }; 223 | let mtime_offset = cvt_time(meta.mtime)?; 224 | let (atime_offset, ctime_offset) = if self.config.mtime_only { 225 | (0, 0) 226 | } else { 227 | (cvt_time(meta.atime)?, cvt_time(meta.ctime)?) 228 | }; 229 | 230 | let mode = file_type | meta.mode_without_type; 231 | let mode_idx = self.modes.insert_full(mode).0 as u32; 232 | let uid_idx = self.uids.insert_full(meta.uid).0 as u32; 233 | let gid_idx = self.gids.insert_full(meta.gid).0 as u32; 234 | 235 | self.inodes.push(InodeData { 236 | kind, 237 | orig_ino: ino, 238 | mode_idx, 239 | uid_idx, 240 | gid_idx, 241 | mtime_offset, 242 | atime_offset, 243 | ctime_offset, 244 | }); 245 | 246 | Ok(ino) 247 | } 248 | 249 | fn put_entry_inner(&mut self, parent: DirId, name: &str, child: u32) -> Result<()> { 250 | u32::try_from(self.dir_entries.len()) 251 | .ok() 252 | .ok_or(ErrorInner::Limit("directory entry count exceeds 2^32"))?; 253 | let name_idx = self.name_table.insert_full(name.into()).0 as u32; 254 | let (_, inserted) = self.dir_entries.insert_full(DirEntry { 255 | parent: parent.0, 256 | name_idx, 257 | child, 258 | }); 259 | if !inserted { 260 | return Err(ErrorInner::DuplicatedEntry.into()); 261 | } 262 | Ok(()) 263 | } 264 | 265 | /// Add an empty directory under a directory. 266 | /// 267 | /// # Errors 268 | /// 269 | /// Return `Err` if either: 270 | /// 271 | /// - Inode count overflows. 272 | /// - Directory entry count overflows. 273 | /// - There is already an entry with the same name in the directory. 274 | #[inline] 275 | pub fn put_dir(&mut self, parent: DirId, name: &str, meta: &InodeMetadata) -> Result { 276 | let ino = self.put_inode(S_IFDIR, InodeKind::Dir, meta)?; 277 | self.put_entry_inner(parent, name, ino)?; 278 | Ok(DirId(ino)) 279 | } 280 | 281 | /// Add a hard link to an existing inode under a directory. 282 | /// 283 | /// # Errors 284 | /// 285 | /// See [`Builder::put_dir`]. 286 | pub fn put_hard_link( 287 | &mut self, 288 | parent: DirId, 289 | name: &str, 290 | inode: impl Into, 291 | ) -> Result<()> { 292 | self.put_entry_inner(parent, name, inode.into().0) 293 | } 294 | 295 | /// Add a regular file under a directory. 296 | /// 297 | /// # Panics 298 | /// 299 | /// Panics if any chunk has a offset exceeding [`Config::block_size`]. 300 | /// 301 | /// # Errors 302 | /// 303 | /// See [`Builder::put_dir`]. 304 | pub fn put_file( 305 | &mut self, 306 | parent: DirId, 307 | name: &str, 308 | meta: &InodeMetadata, 309 | chunks: impl IntoIterator, 310 | ) -> Result { 311 | let chunk_start = self.chunks.len() as u32; 312 | self.chunks.extend(chunks); 313 | u32::try_from(self.chunks.len()) 314 | .ok() 315 | .ok_or(ErrorInner::Limit("file chunk count exceeds 2^32"))?; 316 | if let Some(c) = self.chunks[chunk_start as usize..].iter().find(|c| { 317 | c.offset 318 | .checked_add(c.size) 319 | .is_none_or(|end| end > self.config.block_size.get()) 320 | }) { 321 | panic!( 322 | "invalid chunk for block size {}B: {:?}", 323 | self.config.block_size, c, 324 | ); 325 | } 326 | let ino = self.put_inode(S_IFREG, InodeKind::UniqueFile, meta)?; 327 | self.file_chunk_start.push(chunk_start); 328 | self.put_entry_inner(parent, name, ino)?; 329 | Ok(FileId(ino)) 330 | } 331 | 332 | /// Add a symbolic link (symlink) under a directory. 333 | /// 334 | /// # Errors 335 | /// 336 | /// See [`Builder::put_dir`]. 337 | #[inline] 338 | pub fn put_symlink( 339 | &mut self, 340 | parent: DirId, 341 | name: &str, 342 | meta: &InodeMetadata, 343 | target: &str, 344 | ) -> Result { 345 | let ino = self.put_inode(S_IFLNK, InodeKind::Symlink, meta)?; 346 | let tgt_idx = self.symlink_table.insert_full(target.into()).0 as u32; 347 | self.symlink_target_idxs.push(tgt_idx); 348 | self.put_entry_inner(parent, name, ino)?; 349 | Ok(LinkableInodeId(ino)) 350 | } 351 | 352 | /// Add a block device inode under a directory. 353 | /// 354 | /// # Errors 355 | /// 356 | /// See [`Builder::put_dir`]. 357 | #[inline] 358 | pub fn put_block_device( 359 | &mut self, 360 | parent: DirId, 361 | name: &str, 362 | meta: &InodeMetadata, 363 | device_id: u64, 364 | ) -> Result { 365 | let ino = self.put_inode(S_IFBLK, InodeKind::Device, meta)?; 366 | self.devices.push(device_id); 367 | self.put_entry_inner(parent, name, ino)?; 368 | Ok(LinkableInodeId(ino)) 369 | } 370 | 371 | /// Add a character device inode under a directory. 372 | /// 373 | /// # Errors 374 | /// 375 | /// See [`Builder::put_dir`]. 376 | #[inline] 377 | pub fn put_char_device( 378 | &mut self, 379 | parent: DirId, 380 | name: &str, 381 | meta: &InodeMetadata, 382 | device_id: u64, 383 | ) -> Result { 384 | let ino = self.put_inode(S_IFCHR, InodeKind::Device, meta)?; 385 | self.devices.push(device_id); 386 | self.put_entry_inner(parent, name, ino)?; 387 | Ok(LinkableInodeId(ino)) 388 | } 389 | 390 | /// Add a FIFO (named pipe) inode under a directory. 391 | /// 392 | /// # Errors 393 | /// 394 | /// See [`Builder::put_dir`]. 395 | #[inline] 396 | pub fn put_fifo( 397 | &mut self, 398 | parent: DirId, 399 | name: &str, 400 | meta: &InodeMetadata, 401 | ) -> Result { 402 | let ino = self.put_inode(S_IFIFO, InodeKind::Ipc, meta)?; 403 | self.put_entry_inner(parent, name, ino)?; 404 | Ok(LinkableInodeId(ino)) 405 | } 406 | 407 | /// Add a socket inode under a directory. 408 | /// 409 | /// # Errors 410 | /// 411 | /// See [`Builder::put_dir`]. 412 | #[inline] 413 | pub fn put_socket( 414 | &mut self, 415 | parent: DirId, 416 | name: &str, 417 | meta: &InodeMetadata, 418 | ) -> Result { 419 | let ino = self.put_inode(S_IFSOCK, InodeKind::Ipc, meta)?; 420 | self.put_entry_inner(parent, name, ino)?; 421 | Ok(LinkableInodeId(ino)) 422 | } 423 | 424 | // TODO: FSST compressor. 425 | fn build_string_table( 426 | tbl: IndexSet, 427 | err_msg: &'static str, 428 | ) -> Result> { 429 | if tbl.is_empty() { 430 | return Ok(None); 431 | } 432 | 433 | let mut out = metadata::StringTable::default(); 434 | // Delta indices. 435 | out.packed_index = true; 436 | 437 | let total_len = tbl.iter().map(|s| s.len()).sum::(); 438 | u32::try_from(total_len) 439 | .ok() 440 | .ok_or(ErrorInner::Limit(err_msg))?; 441 | out.buffer.reserve(total_len); 442 | 443 | // NB. For `packed_index`, the first zero should be omitted. 444 | out.index.reserve(tbl.len()); 445 | for s in tbl { 446 | out.buffer.extend_from_slice(s.as_bytes()); 447 | out.index.push(s.len() as u32); 448 | } 449 | 450 | Ok(Some(out)) 451 | } 452 | 453 | /// Finalize and construct the result [`Metadata`][dwarfs::metadata::Metadata]. 454 | /// 455 | /// # Errors 456 | /// 457 | /// Returns `Err` if the hierarchy is invalid, or exceeds certain limitations, 458 | /// including and not limited to: 459 | /// - Duplicated entry names in a directory. 460 | /// - Any (intermediate) low-level structures exceeds 2³² bytes. 461 | /// See [module level documentations][self]. 462 | pub fn finish(mut self) -> Result { 463 | let mut out = metadata::Metadata::default(); 464 | let opts = out.options.insert(metadata::FsOptions::default()); 465 | 466 | //// Configurables //// 467 | 468 | opts.mtime_only = self.config.mtime_only; 469 | opts.time_resolution_sec = (self.config.time_resolution_sec.get() != 1) 470 | .then_some(self.config.time_resolution_sec.get()); 471 | // TODO: Pack more fields if possible. 472 | 473 | //// Inodes //// 474 | 475 | // Stable sort to keep relative order unchanged. It is important to keep 476 | // the topological order of directories. 477 | self.inodes.sort_by_key(|inode| inode.kind as u8); 478 | let orig_ino_to_final = { 479 | let mut map = vec![0u32; self.inodes.len()]; 480 | for (final_ino, inode) in self.inodes.iter().enumerate() { 481 | map[inode.orig_ino as usize] = final_ino as u32; 482 | } 483 | map 484 | }; 485 | 486 | out.inodes = self 487 | .inodes 488 | .iter() 489 | .map(|inode| { 490 | let mut data = metadata::InodeData::default(); 491 | data.mode_index = inode.mode_idx; 492 | data.owner_index = inode.uid_idx; 493 | data.group_index = inode.gid_idx; 494 | data.atime_offset = inode.atime_offset; 495 | data.mtime_offset = inode.mtime_offset; 496 | data.ctime_offset = inode.ctime_offset; 497 | data 498 | }) 499 | .collect(); 500 | 501 | //// Directory and entries //// 502 | 503 | let dir_cnt = self 504 | .inodes 505 | .iter() 506 | .take_while(|data| data.kind == InodeKind::Dir) 507 | .count(); 508 | assert_ne!(dir_cnt, 0, "root exists"); 509 | 510 | // Directory relative order is kept unchanged because of stable sort above. 511 | // So this will sort `dir_entries` to the final order. 512 | // Note that `dir_entries[0]` is the self-link for the root directory. 513 | let mut dir_entries = std::iter::once(DirEntry { 514 | parent: 0, 515 | child: 0, 516 | // This index is unused. 517 | name_idx: 0, 518 | }) 519 | .chain(self.dir_entries) 520 | .collect::>(); 521 | dir_entries[1..] 522 | .sort_by_key(|ent| (ent.parent, &self.name_table[ent.name_idx as usize][..])); 523 | // Checked on inserting entries. 524 | debug_assert!( 525 | dir_entries[1..] 526 | .windows(2) 527 | .all(|w| (w[0].parent, w[0].name_idx) != (w[1].parent, w[1].name_idx)) 528 | ); 529 | 530 | // Initialize directories links. 531 | { 532 | // One more sentinel element. 533 | out.directories = vec![Default::default(); dir_cnt + 1]; 534 | 535 | // Skip the 0-th root directory, which should be kept zero-initialized. 536 | let mut offset = 1u32; 537 | for (final_ino, inode) in self.inodes[..dir_cnt].iter().enumerate() { 538 | let dir = &mut out.directories[final_ino]; 539 | dir.first_entry = offset; 540 | // For child directories of root, this is the default 0, as expected. 541 | // For other directories, this should already be initialized by 542 | // the entry traversal of its parent entries, because of 543 | // the topological order enforced by APIs. 544 | let parent_entry = dir.self_entry; 545 | 546 | // Update parent links of child directories. 547 | while let Some(ent) = dir_entries 548 | .get(offset as usize) 549 | .filter(|ent| ent.parent == inode.orig_ino) 550 | { 551 | let child_final_ino = orig_ino_to_final[ent.child as usize] as usize; 552 | if let Some(subdir) = out.directories.get_mut(child_final_ino) { 553 | subdir.self_entry = offset; 554 | subdir.parent_entry = parent_entry; 555 | } 556 | offset += 1; 557 | } 558 | } 559 | debug_assert_eq!(offset as usize, dir_entries.len()); 560 | 561 | // Sentinel. 562 | out.directories.last_mut().unwrap().first_entry = dir_entries.len() as u32; 563 | } 564 | 565 | out.dir_entries = Some( 566 | dir_entries 567 | .into_iter() 568 | .map(|ent| { 569 | let mut out = metadata::DirEntry::default(); 570 | out.name_index = ent.name_idx; 571 | out.inode_num = orig_ino_to_final[ent.child as usize]; 572 | out 573 | }) 574 | .collect(), 575 | ); 576 | 577 | //// String tables //// 578 | 579 | out.compact_names = 580 | Self::build_string_table(self.name_table, "total file name length exceeds 2^32")?; 581 | out.compact_symlinks = 582 | Self::build_string_table(self.symlink_table, "total symlink length exceeds 2^32")?; 583 | 584 | //// Trivial fields //// 585 | 586 | out.block_size = self.config.block_size.get(); 587 | out.total_fs_size = 0; // Not really necessary but only for human. 588 | out.dwarfs_version = self.config.creator.map(|s| String::from(s).into()); 589 | out.create_timestamp = self.config.created_timestamp; 590 | 591 | out.symlink_table = self.symlink_target_idxs; 592 | out.modes = self.modes.into_iter().collect(); 593 | out.uids = self.uids.into_iter().collect(); 594 | out.gids = self.gids.into_iter().collect(); 595 | out.devices = (!self.devices.is_empty()).then_some(self.devices); 596 | 597 | out.chunk_table = self.file_chunk_start; 598 | // Sentinel. 599 | out.chunk_table.push(self.chunks.len() as u32); 600 | 601 | out.chunks = self 602 | .chunks 603 | .into_iter() 604 | .map(|chunk| { 605 | let mut data = metadata::Chunk::default(); 606 | data.block = chunk.section_idx; 607 | data.offset = chunk.offset; 608 | data.size = chunk.size; 609 | data 610 | }) 611 | .collect(); 612 | 613 | Ok(out) 614 | } 615 | } 616 | 617 | #[derive(Debug)] 618 | struct DirEntry { 619 | parent: u32, 620 | name_idx: u32, 621 | child: u32, 622 | } 623 | 624 | // Hash and Eq impls are only on `(parent, name_idx)` pair, because we want to 625 | // check entry names in a directory do not duplicate. 626 | impl Hash for DirEntry { 627 | fn hash(&self, h: &mut H) { 628 | h.write_u64(u64::from(self.parent) | u64::from(self.name_idx) << 32); 629 | } 630 | } 631 | impl PartialEq for DirEntry { 632 | fn eq(&self, other: &Self) -> bool { 633 | (self.parent, self.name_idx) == (other.parent, other.name_idx) 634 | } 635 | } 636 | impl Eq for DirEntry {} 637 | 638 | /// The location of a chunk of data for a regular file. 639 | /// 640 | /// Usually, you should use [`crate::chunker::Chunker`]s to slice file data into 641 | /// [`Chunk`]s and copy data at the same time, rather than manually constructing 642 | /// them. 643 | /// 644 | /// For details about data chunking and the meaning of fields, check 645 | /// [upstream documentations](https://github.com/mhx/dwarfs/blob/v0.12.4/doc/dwarfs-format.md). 646 | #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] 647 | pub struct Chunk { 648 | /// The section index. 649 | pub section_idx: u32, 650 | /// The byte offset inside the section. 651 | pub offset: u32, 652 | /// The size of the chunk. 653 | pub size: u32, 654 | } 655 | 656 | #[derive(Debug, Clone, Copy, PartialEq, Eq)] 657 | enum InodeKind { 658 | // NB. The order matters for sorting. 659 | // It should match the DwarFS inode type order. 660 | Dir, 661 | Symlink, 662 | UniqueFile, 663 | // TODO: SharedFile 664 | Device, 665 | Ipc, 666 | } 667 | 668 | #[derive(Debug)] 669 | struct InodeData { 670 | kind: InodeKind, 671 | // To maintain mapping after sorting inodes by their kinds. 672 | orig_ino: u32, 673 | 674 | mode_idx: u32, 675 | uid_idx: u32, 676 | gid_idx: u32, 677 | mtime_offset: u32, 678 | atime_offset: u32, 679 | ctime_offset: u32, 680 | } 681 | 682 | /// The metadata of an inode. 683 | #[derive(Debug, Clone)] 684 | pub struct InodeMetadata { 685 | mode_without_type: u32, 686 | uid: u32, 687 | gid: u32, 688 | mtime: SystemTime, 689 | atime: SystemTime, 690 | ctime: SystemTime, 691 | } 692 | 693 | impl From<&std::fs::Metadata> for InodeMetadata { 694 | fn from(meta: &std::fs::Metadata) -> Self { 695 | #[cfg(unix)] 696 | use std::os::unix::fs::MetadataExt; 697 | 698 | #[cfg(unix)] 699 | let mode = meta.mode() & 0o777; 700 | #[cfg(not(unix))] 701 | let mode = if meta.is_dir() { 0o755 } else { 0o644 }; 702 | 703 | let mut ret = InodeMetadata::new(mode); 704 | if let Ok(mtime) = meta.modified() { 705 | ret.mtime(mtime); 706 | } 707 | if let Ok(atime) = meta.accessed() { 708 | ret.atime(atime); 709 | } 710 | 711 | #[cfg(unix)] 712 | { 713 | let ctime = meta.ctime(); 714 | let ctime = if ctime >= 0 { 715 | SystemTime::UNIX_EPOCH + Duration::from_secs(meta.ctime() as u64) 716 | } else { 717 | SystemTime::UNIX_EPOCH - Duration::from_secs(-meta.ctime() as u64) 718 | }; 719 | ret.ctime(ctime).uid(meta.uid()).gid(meta.gid()); 720 | } 721 | 722 | ret 723 | } 724 | } 725 | 726 | impl InodeMetadata { 727 | /// Create a default metadata with given [file mode][mode]. 728 | /// 729 | /// [mode]: https://man.archlinux.org/man/inode.7.en#The_file_type_and_mode 730 | pub const fn new(mode_without_type: u32) -> Self { 731 | assert!( 732 | mode_without_type & !0o777 == 0, 733 | "`mode_without_type` should only have 0o7777 bits set", 734 | ); 735 | Self { 736 | mode_without_type, 737 | uid: 0, 738 | gid: 0, 739 | mtime: SystemTime::UNIX_EPOCH, 740 | atime: SystemTime::UNIX_EPOCH, 741 | ctime: SystemTime::UNIX_EPOCH, 742 | } 743 | } 744 | 745 | /// Set the owner numeric id. 746 | /// 747 | /// If unset, it defaults to `0` (root). 748 | pub fn uid(&mut self, uid: u32) -> &mut Self { 749 | self.uid = uid; 750 | self 751 | } 752 | 753 | /// Set the owner group numeric id. 754 | /// 755 | /// If unset, it defaults to `0` (root). 756 | pub fn gid(&mut self, gid: u32) -> &mut Self { 757 | self.gid = gid; 758 | self 759 | } 760 | 761 | /// Set the modification time (mtime). 762 | /// 763 | /// If unset, it defaults to [`SystemTime::UNIX_EPOCH`]. 764 | pub fn mtime(&mut self, timestamp: SystemTime) -> &mut Self { 765 | self.mtime = timestamp; 766 | self 767 | } 768 | 769 | /// Set the access time (atime). 770 | /// 771 | /// If unset, it defaults to [`SystemTime::UNIX_EPOCH`]. 772 | /// If [`Config::mtime_only`] is set, this value is ignored. 773 | pub fn atime(&mut self, timestamp: SystemTime) -> &mut Self { 774 | self.atime = timestamp; 775 | self 776 | } 777 | 778 | /// Set the change time (ctime). 779 | /// 780 | /// If unset, it defaults to [`SystemTime::UNIX_EPOCH`]. 781 | /// If [`Config::mtime_only`] is set, this value is ignored. 782 | pub fn ctime(&mut self, timestamp: SystemTime) -> &mut Self { 783 | self.ctime = timestamp; 784 | self 785 | } 786 | } 787 | 788 | /// A handle to a directory inode. 789 | #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] 790 | pub struct DirId(u32); 791 | 792 | /// A handle to an inode that is allowed to be hard-linked. 793 | /// 794 | /// All inodes except directories are linkable. 795 | #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] 796 | pub struct LinkableInodeId(u32); 797 | 798 | /// A handle to a regular file inode. 799 | /// 800 | /// This type implements `Into`. 801 | #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] 802 | pub struct FileId(u32); 803 | 804 | impl From for LinkableInodeId { 805 | fn from(i: FileId) -> Self { 806 | Self(i.0) 807 | } 808 | } 809 | -------------------------------------------------------------------------------- /dwarfs-enc/src/ordered_parallel.rs: -------------------------------------------------------------------------------- 1 | //! Run tasks in parallel while keeping the original order. 2 | 3 | use std::{fmt, num::NonZero, panic, thread}; 4 | 5 | use crossbeam_channel as mpmc; 6 | 7 | pub struct OrderedParallel { 8 | injector: Option>>, 9 | collector: mpmc::Receiver>, 10 | next_to_send: usize, 11 | next_to_recv: usize, 12 | ring_buf: Box<[Option]>, 13 | 14 | threads: Box<[thread::JoinHandle<()>]>, 15 | } 16 | 17 | impl fmt::Debug for OrderedParallel { 18 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 19 | f.debug_struct("OrderedParallel") 20 | .field("next_to_send", &self.next_to_send) 21 | .field("next_to_recv", &self.next_to_recv) 22 | .field("ring_buf_size", &self.ring_buf.len()) 23 | .field("threads_cnt", &self.threads.len()) 24 | .finish_non_exhaustive() 25 | } 26 | } 27 | 28 | type Task = (usize, Box R + Send>); 29 | type TaskResult = (usize, thread::Result); 30 | 31 | impl Drop for OrderedParallel { 32 | fn drop(&mut self) { 33 | self.injector = None; 34 | let worker_panicked = std::mem::take(&mut self.threads) 35 | .into_iter() 36 | .fold(false, |panicked, j| panicked | j.join().is_err()); 37 | if worker_panicked && !thread::panicking() { 38 | panic!("worker panicked"); 39 | } 40 | } 41 | } 42 | 43 | impl OrderedParallel { 44 | pub fn new(thread_name: &str, thread_cnt: NonZero) -> std::io::Result { 45 | // Random picked: 1.5x. 46 | let max_inflights = thread_cnt.saturating_add(thread_cnt.get().div_ceil(2)); 47 | 48 | let (injector, injector_rx) = mpmc::bounded(max_inflights.get()); 49 | let (collector_tx, collector) = mpmc::bounded(max_inflights.get()); 50 | 51 | let threads = (0..thread_cnt.get()) 52 | .map(|idx| { 53 | let injector_rx = injector_rx.clone(); 54 | let collector_tx = collector_tx.clone(); 55 | std::thread::Builder::new() 56 | .name(format!("{thread_name}-{idx}")) 57 | .spawn(|| Self::worker(injector_rx, collector_tx)) 58 | }) 59 | .collect::>>()?; 60 | 61 | let ring_buf = std::iter::repeat_with(|| None) 62 | .take(max_inflights.get()) 63 | .collect(); 64 | 65 | Ok(Self { 66 | next_to_send: 0, 67 | next_to_recv: 0, 68 | 69 | ring_buf, 70 | 71 | injector: Some(injector), 72 | threads, 73 | collector, 74 | }) 75 | } 76 | 77 | fn worker(injector: mpmc::Receiver>, collector: mpmc::Sender>) { 78 | while let Ok((index, task)) = injector.recv() { 79 | let ret = panic::catch_unwind(panic::AssertUnwindSafe(task)); 80 | if collector.send((index, ret)).is_err() { 81 | break; 82 | } 83 | } 84 | } 85 | 86 | /// Spawn a new task and retrieve some completed tasks. 87 | /// 88 | /// You should always drain the returning iterator, or the behavior is unspecified. 89 | #[must_use = "iterator must be drained"] 90 | pub fn submit_and_get(&mut self, task: F) -> impl Iterator 91 | where 92 | F: FnOnce() -> R + Send + 'static, 93 | { 94 | let index = self.next_to_send; 95 | self.next_to_send += 1; 96 | if self.next_to_send == self.ring_buf.len() { 97 | self.next_to_send = 0; 98 | } 99 | self.send_and_recv_inner((index, Box::new(task))) 100 | } 101 | 102 | fn send_and_recv_inner(&mut self, task: Task) -> impl Iterator { 103 | let injector = self.injector.as_ref().expect("channel closed"); 104 | // Blocking wait for the bottleneck-ed task if the next send would overflow. 105 | // Note that we ensures `ring_buf.len() >= 2` so the first send always does no wait. 106 | if self.next_to_send == self.next_to_recv { 107 | while self.ring_buf[self.next_to_recv].is_none() { 108 | Self::process_ret( 109 | self.collector.recv().expect("channel closed"), 110 | &mut self.ring_buf, 111 | ); 112 | } 113 | } 114 | 115 | injector.try_send(task).expect("channel is not full"); 116 | while let Ok(ret) = self.collector.try_recv() { 117 | Self::process_ret(ret, &mut self.ring_buf); 118 | } 119 | 120 | Self::received_iter(&mut self.next_to_recv, &mut self.ring_buf) 121 | } 122 | 123 | fn process_ret((idx, ret): TaskResult, ring_buf: &mut [Option]) { 124 | let v = match ret { 125 | Ok(v) => v, 126 | Err(_err) => panic!("task panicked"), 127 | }; 128 | assert!(ring_buf[idx].is_none(), "completion buffer overflowed"); 129 | ring_buf[idx] = Some(v); 130 | } 131 | 132 | fn received_iter( 133 | next_to_recv: &mut usize, 134 | ring_buf: &mut [Option], 135 | ) -> impl Iterator { 136 | std::iter::from_fn(|| { 137 | let elem = ring_buf[*next_to_recv].take()?; 138 | *next_to_recv += 1; 139 | if *next_to_recv == ring_buf.len() { 140 | *next_to_recv = 0; 141 | } 142 | Some(elem) 143 | }) 144 | } 145 | 146 | /// Blocking receive some completed results. 147 | /// 148 | /// Return `None` if the channel is closed and all results are drained. 149 | pub fn wait_and_get(&mut self) -> Option> { 150 | while self.ring_buf[self.next_to_recv].is_none() { 151 | let ret = self.collector.recv().ok()?; 152 | Self::process_ret(ret, &mut self.ring_buf); 153 | } 154 | Some(Self::received_iter( 155 | &mut self.next_to_recv, 156 | &mut self.ring_buf, 157 | )) 158 | } 159 | 160 | /// Signal the end of tasks. Stop all workers. 161 | pub fn stop(&mut self) { 162 | self.injector = None; 163 | } 164 | } 165 | -------------------------------------------------------------------------------- /dwarfs-enc/src/section.rs: -------------------------------------------------------------------------------- 1 | //! DwarFS section writer. 2 | use std::io::Write; 3 | use std::num::NonZero; 4 | 5 | use dwarfs::section::{CompressAlgo, Header, MagicVersion, SectionIndexEntry, SectionType}; 6 | use dwarfs::zerocopy::IntoBytes; 7 | use zerocopy::FromBytes; 8 | 9 | use crate::ordered_parallel::OrderedParallel; 10 | use crate::{ErrorInner, Result}; 11 | 12 | /// The section compression parameter. 13 | #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] 14 | #[non_exhaustive] 15 | pub enum CompressParam { 16 | /// No compression. 17 | None, 18 | /// Compress with a given ZSTD level. Requires feature `zstd`. 19 | #[cfg(feature = "zstd")] 20 | Zstd(zstd_safe::CompressionLevel), 21 | /// Compress with a given LZMA (aka. xz) level. Requires feature `lzma`. 22 | #[cfg(feature = "lzma")] 23 | Lzma(u32), 24 | } 25 | 26 | /// DwarFS section writer. 27 | #[derive(Debug)] 28 | pub struct Writer { 29 | workers: OrderedParallel>>, 30 | /// The total number of sections initiated, including ones that are not written yet. 31 | initiated_section_count: u32, 32 | index: IndexBuilder, 33 | 34 | w: W, 35 | } 36 | 37 | #[derive(Debug, Default)] 38 | struct IndexBuilder { 39 | index: Vec, 40 | next_offset: u64, 41 | } 42 | 43 | impl IndexBuilder { 44 | fn push(&mut self, typ: SectionType, sec_raw_len: usize) -> Result<()> { 45 | let ent = SectionIndexEntry::new(typ, self.next_offset).expect("checked by last write"); 46 | self.next_offset = u64::try_from(sec_raw_len) 47 | .ok() 48 | .and_then(|l| l.checked_add(self.next_offset)) 49 | .filter(|&n| n < 1u64 << 48) 50 | .ok_or(ErrorInner::Limit("archive size exceeds 2^48 bytes"))?; 51 | self.index.push(ent); 52 | Ok(()) 53 | } 54 | } 55 | 56 | impl Writer { 57 | /// Create a default multi-threaded section writer. 58 | pub fn new(w: W) -> std::io::Result { 59 | let thread_cnt = std::thread::available_parallelism()?; 60 | Self::new_with_threads(w, thread_cnt) 61 | } 62 | 63 | /// Create a section writer with specific parallelism. 64 | pub fn new_with_threads(w: W, thread_cnt: NonZero) -> std::io::Result { 65 | let workers = OrderedParallel::new("compressor", thread_cnt)?; 66 | Ok(Self { 67 | workers, 68 | initiated_section_count: 0, 69 | index: IndexBuilder::default(), 70 | w, 71 | }) 72 | } 73 | } 74 | 75 | impl Writer { 76 | /// Get a reference to the underlying writer. 77 | pub fn get_ref(&self) -> &W { 78 | &self.w 79 | } 80 | 81 | /// Get a mutable reference tothe underlying writer. 82 | pub fn get_mut(&mut self) -> &mut W { 83 | &mut self.w 84 | } 85 | 86 | /// Retrieve the ownership of the underlying reader. 87 | pub fn into_inner(self) -> W 88 | where 89 | W: Sized, 90 | { 91 | self.w 92 | } 93 | } 94 | 95 | impl Writer { 96 | /// Number of sections of initiated via `write_section`. 97 | #[must_use] 98 | pub fn section_count(&self) -> u32 { 99 | // Checked by `write_section` not to overflow u32. 100 | self.initiated_section_count 101 | } 102 | 103 | /// Finalize and seal the DwarFS archive. 104 | pub fn finish(mut self) -> Result { 105 | // Wait for all proceeding sections to complete, so their offsets are recorded. 106 | self.workers.stop(); 107 | while let Some(iter) = self.workers.wait_and_get() { 108 | Self::commit_completed(iter, &mut self.w, &mut self.index)?; 109 | } 110 | 111 | // The last length is unused. 112 | let index_byte_len = self.index.index.as_bytes().len() + size_of::(); 113 | self.index 114 | .push(SectionType::SECTION_INDEX, index_byte_len)?; 115 | let sec = Self::seal_section( 116 | self.section_count(), 117 | SectionType::SECTION_INDEX, 118 | CompressParam::None, 119 | self.index.index.as_bytes(), 120 | )?; 121 | self.w.write_all(&sec)?; 122 | 123 | Ok(self.w) 124 | } 125 | 126 | fn commit_completed( 127 | completed: impl Iterator>>, 128 | w: &mut W, 129 | index: &mut IndexBuilder, 130 | ) -> Result<()> { 131 | for ret in completed { 132 | let sec = ret?; 133 | let off = std::mem::offset_of!(Header, section_type); 134 | let typ = SectionType::read_from_prefix(&sec[off..]).unwrap().0; 135 | w.write_all(&sec)?; 136 | index.push(typ, sec.len())?; 137 | } 138 | Ok(()) 139 | } 140 | 141 | /// Write a section with given (uncompressed) payload. 142 | pub fn write_section( 143 | &mut self, 144 | section_type: SectionType, 145 | compression: CompressParam, 146 | payload: &[u8], 147 | ) -> Result<()> { 148 | // Should not happen for current machines. 149 | assert!(u64::try_from(size_of::
() + payload.len()).is_ok()); 150 | 151 | let section_number = self.section_count(); 152 | self.initiated_section_count = self 153 | .initiated_section_count 154 | .checked_add(1) 155 | .ok_or(ErrorInner::Limit("section count exceeds 2^32"))?; 156 | 157 | let payload = payload.to_vec(); 158 | Self::commit_completed( 159 | self.workers.submit_and_get(move || { 160 | Self::seal_section(section_number, section_type, compression, &payload) 161 | }), 162 | &mut self.w, 163 | &mut self.index, 164 | ) 165 | } 166 | 167 | /// Compress payload if possible, calculate hashes and fill the section header. 168 | fn seal_section( 169 | section_number: u32, 170 | section_type: SectionType, 171 | compression: CompressParam, 172 | payload: &[u8], 173 | ) -> Result> { 174 | let mut buf = vec![0u8; size_of::
() + payload.len()]; 175 | #[cfg_attr(not(feature = "default"), allow(unused_labels))] 176 | let (compress_algo, compressed_len) = 'compressed: { 177 | let compressed_buf = &mut buf[size_of::
()..]; 178 | match compression { 179 | CompressParam::None => {} 180 | 181 | #[cfg(feature = "zstd")] 182 | #[expect(non_upper_case_globals, reason = "name from C")] 183 | CompressParam::Zstd(lvl) => { 184 | // See: 185 | const ZSTD_error_dstSize_tooSmall: zstd_safe::ErrorCode = -70isize as usize; 186 | 187 | match zstd_safe::compress(compressed_buf, payload, lvl) { 188 | Ok(compressed_len) => { 189 | assert!(compressed_len <= payload.len()); 190 | break 'compressed (CompressAlgo::ZSTD, compressed_len); 191 | } 192 | Err(ZSTD_error_dstSize_tooSmall) => {} 193 | Err(code) => { 194 | let err = std::io::Error::new( 195 | std::io::ErrorKind::InvalidInput, 196 | format!( 197 | "ZSTD compression failed (code={}): {}", 198 | code, 199 | zstd_safe::get_error_name(code), 200 | ), 201 | ); 202 | return Err(ErrorInner::Compress(err).into()); 203 | } 204 | } 205 | } 206 | 207 | #[cfg(feature = "lzma")] 208 | CompressParam::Lzma(lvl) => { 209 | if let Some(compressed_len) = (|| { 210 | use liblzma::stream::{Action, Check, Status, Stream}; 211 | 212 | // The default parameters used by `liblzma::bufread::XzEncoder::new`. 213 | // See: 214 | let mut encoder = Stream::new_easy_encoder(lvl, Check::Crc64)?; 215 | 216 | match encoder.process(payload, compressed_buf, Action::Run)? { 217 | // Treat partial consumption as buffer-too-small. 218 | Status::Ok if encoder.total_in() == payload.len() as u64 => {} 219 | Status::Ok | Status::MemNeeded => return Ok(None), 220 | Status::StreamEnd | Status::GetCheck => unreachable!(), 221 | } 222 | match encoder.process( 223 | &[], 224 | &mut compressed_buf[encoder.total_out() as usize..], 225 | Action::Finish, 226 | )? { 227 | Status::StreamEnd => {} 228 | Status::MemNeeded => return Ok(None), 229 | Status::Ok | Status::GetCheck => unreachable!(), 230 | } 231 | 232 | Ok::<_, std::io::Error>(Some(encoder.total_out() as usize)) 233 | })() 234 | .map_err(ErrorInner::Compress)? 235 | { 236 | break 'compressed (CompressAlgo::LZMA, compressed_len); 237 | } 238 | } 239 | } 240 | compressed_buf.copy_from_slice(payload); 241 | (CompressAlgo::NONE, payload.len()) 242 | }; 243 | buf.truncate(size_of::
() + compressed_len); 244 | let (header_buf, compressed_buf) = buf.split_at_mut(size_of::
()); 245 | 246 | let mut header = Header { 247 | magic_version: MagicVersion::LATEST, 248 | slow_hash: [0u8; 32], 249 | fast_hash: [0u8; 8], 250 | section_number: section_number.into(), 251 | section_type, 252 | compress_algo, 253 | payload_size: 0.into(), 254 | }; 255 | header.update_size_and_checksum(compressed_buf); 256 | header_buf.copy_from_slice(header.as_bytes()); 257 | 258 | Ok(buf) 259 | } 260 | 261 | /// Write metadata sections `METADATA_V2_{,_SCHEMA}`. 262 | pub fn write_metadata_sections( 263 | &mut self, 264 | metadata: &dwarfs::metadata::Metadata, 265 | compression: CompressParam, 266 | ) -> Result<()> { 267 | let (schema, metadata_bytes) = metadata.to_schema_and_bytes()?; 268 | let schema_bytes = schema.to_bytes()?; 269 | self.write_section(SectionType::METADATA_V2_SCHEMA, compression, &schema_bytes)?; 270 | self.write_section(SectionType::METADATA_V2, compression, &metadata_bytes) 271 | } 272 | } 273 | -------------------------------------------------------------------------------- /dwarfs-enc/tests/basic.rs: -------------------------------------------------------------------------------- 1 | use dwarfs::{Archive, AsChunks, InodeKind, archive::IsInode}; 2 | use dwarfs_enc::{ 3 | chunker::{BasicChunker, Chunker}, 4 | metadata::{Builder, InodeMetadata}, 5 | section::{CompressParam, Writer}, 6 | }; 7 | 8 | const META: InodeMetadata = InodeMetadata::new(0o777); 9 | 10 | fn build_with(f: impl FnOnce(&mut Builder, &mut dyn Chunker) -> dwarfs_enc::Result<()>) -> Vec { 11 | let buf = Vec::new(); 12 | let writer = Writer::new(buf).unwrap(); 13 | let mut builder = Builder::new(&META); 14 | let mut chunker = BasicChunker::new(writer, builder.block_size(), CompressParam::Zstd(3)); 15 | f(&mut builder, &mut chunker).unwrap(); 16 | let mut writer = chunker.finish().unwrap(); 17 | writer 18 | .write_metadata_sections(&builder.finish().unwrap(), CompressParam::Zstd(3)) 19 | .unwrap(); 20 | writer.finish().unwrap() 21 | } 22 | 23 | #[test] 24 | fn empty() { 25 | let b = build_with(|_meta, _chunker| Ok(())); 26 | let (index, _archive) = Archive::new(b).unwrap(); 27 | assert_eq!(index.root().entries().len(), 0); 28 | assert_eq!(index.inodes().len(), 1); 29 | assert_eq!(index.directories().len(), 1); 30 | } 31 | 32 | #[test] 33 | fn smoke() { 34 | let b = build_with(|meta, _chunker| { 35 | let root = meta.root(); 36 | meta.put_dir(root, "0dir", &META)?; 37 | let f = meta.put_file(root, "1file", &META, [])?; 38 | meta.put_symlink(root, "2symlink", &META, "target")?; 39 | meta.put_block_device(root, "3blkdev", &META, 0xDEAD_BEEF_DEAD_BEEF)?; 40 | meta.put_char_device(root, "4chardev", &META, 0xBEEF_DEAD_BEEF_DEAD)?; 41 | meta.put_fifo(root, "5fifo", &META)?; 42 | meta.put_socket(root, "6socket", &META)?; 43 | meta.put_hard_link(root, "7hardlink", f)?; 44 | Ok(()) 45 | }); 46 | 47 | let (index, _archive) = Archive::new(b).unwrap(); 48 | let (children, names) = index 49 | .root() 50 | .entries() 51 | .map(|ent| (ent.inode().classify(), ent.name())) 52 | .unzip::<_, _, Vec<_>, Vec<_>>(); 53 | 54 | assert_eq!( 55 | names, 56 | vec![ 57 | "0dir", 58 | "1file", 59 | "2symlink", 60 | "3blkdev", 61 | "4chardev", 62 | "5fifo", 63 | "6socket", 64 | "7hardlink", 65 | ] 66 | ); 67 | 68 | assert!(matches!(children[0], InodeKind::Directory(_))); 69 | assert!(matches!(children[1], InodeKind::File(i) if i.as_chunks().len() == 0)); 70 | assert!(matches!(children[2], InodeKind::Symlink(i) if i.target() == "target")); 71 | assert!(matches!(children[3], InodeKind::Device(i) if i.device_id() == 0xDEAD_BEEF_DEAD_BEEF)); 72 | assert!(matches!(children[4], InodeKind::Device(i) if i.device_id() == 0xBEEF_DEAD_BEEF_DEAD)); 73 | assert!(matches!(children[5], InodeKind::Ipc(_))); 74 | assert!(matches!(children[6], InodeKind::Ipc(_))); 75 | assert_eq!(children[7].inode_num(), children[1].inode_num()); 76 | } 77 | -------------------------------------------------------------------------------- /dwarfs-test/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "dwarfs-test" 3 | version = "0.0.0" 4 | edition = "2024" 5 | publish = false 6 | license = "MIT OR Apache-2.0" 7 | 8 | [dependencies] 9 | dwarfs = { path = "../dwarfs", features = ["serialize", "lzma", "lz4"] } 10 | env_logger = "0.11.8" 11 | tempfile = "3.20.0" 12 | xshell = "0.2.7" 13 | 14 | [target.'cfg(unix)'.dependencies] 15 | # use-libc is necessary to support fakeroot. 16 | rustix = { version = "1.0.7", features = ["fs", "process", "use-libc"] } 17 | 18 | [dev-dependencies] 19 | hex = "0.4.3" 20 | sha2 = "0.10.9" 21 | -------------------------------------------------------------------------------- /dwarfs-test/LICENSE-APACHE: -------------------------------------------------------------------------------- 1 | ../LICENSE-APACHE -------------------------------------------------------------------------------- /dwarfs-test/LICENSE-MIT: -------------------------------------------------------------------------------- 1 | ../LICENSE-MIT -------------------------------------------------------------------------------- /dwarfs-test/src/lib.rs: -------------------------------------------------------------------------------- 1 | pub mod mtree; 2 | pub mod traverse; 3 | -------------------------------------------------------------------------------- /dwarfs-test/src/main.rs: -------------------------------------------------------------------------------- 1 | use xshell::{Shell, cmd}; 2 | 3 | fn main() { 4 | env_logger::init(); 5 | let args = std::env::args().collect::>(); 6 | let output = match &*args { 7 | [_, cmd, output] if cmd == "gen-privileged-archive" => output, 8 | _ => panic!("invalid argument"), 9 | }; 10 | 11 | #[cfg(not(unix))] 12 | { 13 | let _ = output; 14 | panic!("only UNIX platform is supported"); 15 | } 16 | 17 | // Used by `../tests/tests/rs`. 18 | #[cfg(unix)] 19 | { 20 | use rustix::fs as ufs; 21 | 22 | assert!( 23 | rustix::process::geteuid().is_root(), 24 | "gen-privileged-archive must be executed under root or 'fakeroot'", 25 | ); 26 | 27 | let sh = Shell::new().unwrap(); 28 | let temp_dir = tempfile::tempdir().expect("failed to create tempdir"); 29 | let src_path = temp_dir.path().join("root"); 30 | std::fs::create_dir(&src_path).unwrap(); 31 | ufs::mknodat( 32 | ufs::ABS, 33 | src_path.join("bdev"), 34 | ufs::FileType::BlockDevice, 35 | ufs::Mode::from_bits_truncate(0o777), 36 | 0x0123_4567_89AB_CDEF, 37 | ) 38 | .unwrap(); 39 | ufs::mknodat( 40 | ufs::ABS, 41 | src_path.join("cdev"), 42 | ufs::FileType::CharacterDevice, 43 | ufs::Mode::from_bits_truncate(0o777), 44 | 0xFEDC_BA98_7654_3210, 45 | ) 46 | .unwrap(); 47 | 48 | cmd!( 49 | sh, 50 | "mkdwarfs -i {src_path} -o {output} --no-progress --log-level=error --with-devices" 51 | ) 52 | .run() 53 | .expect("failed to run 'mkdwarfs'"); 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /dwarfs-test/src/mtree.rs: -------------------------------------------------------------------------------- 1 | use std::io::{Result, Write}; 2 | 3 | use dwarfs::{ArchiveIndex, AsChunks, Dir, InodeKind}; 4 | 5 | pub fn dump(w: &mut dyn Write, index: &ArchiveIndex) -> Result<()> { 6 | writeln!(w, "#mtree")?; 7 | dump_dir(w, index.root(), &mut String::from(".")) 8 | } 9 | 10 | /// mtree escapes '/' and non-printable chars as `\ooo`. 11 | /// See: 12 | fn escape_into(buf: &mut String, s: &str) { 13 | for &b in s.as_bytes() { 14 | // ASCII printables. 15 | if (33..=126).contains(&b) && !b"\\/#".contains(&b) { 16 | buf.push(b as char); 17 | } else { 18 | buf.push('\\'); 19 | let digit = |x: u8| (b'0' + x) as char; 20 | buf.push(digit(b / 64)); 21 | buf.push(digit(b / 8 % 8)); 22 | buf.push(digit(b % 8)); 23 | } 24 | } 25 | } 26 | 27 | fn dump_dir(w: &mut dyn Write, dir: Dir<'_>, path: &mut String) -> Result<()> { 28 | for only_dir in [false, true] { 29 | for ent in dir.entries() { 30 | let name = ent.name(); 31 | let ino = ent.inode(); 32 | let prev_len = path.len(); 33 | path.push('/'); 34 | escape_into(path, name); 35 | 36 | let meta = ino.metadata(); 37 | let mtime = meta.mtime(); 38 | let mode = meta.file_type_mode().permission_bits(); 39 | let gid = meta.gid(); 40 | let uid = meta.uid(); 41 | if let Some(d) = ino.as_dir() { 42 | if only_dir { 43 | writeln!( 44 | w, 45 | "{path} time={mtime}.0 mode={mode:03o} gid={gid} uid={uid} type=dir", 46 | )?; 47 | dump_dir(w, d, path)?; 48 | } 49 | } else if !only_dir { 50 | if let Some(f) = ino.as_file() { 51 | let size = f.as_chunks().total_size(); 52 | writeln!( 53 | w, 54 | "{path} time={mtime}.0 mode={mode:03o} gid={gid} uid={uid} type=file size={size}", 55 | )?; 56 | } else { 57 | let kind = ino.classify(); 58 | if let InodeKind::Symlink(sym) = kind { 59 | let tgt = sym.target(); 60 | writeln!( 61 | w, 62 | "{path} time={mtime}.0 mode={mode:03o} gid={gid} uid={uid} type=link link={tgt}", 63 | )?; 64 | } else { 65 | unimplemented!("{kind:?}"); 66 | } 67 | } 68 | } 69 | 70 | path.truncate(prev_len); 71 | } 72 | } 73 | Ok(()) 74 | } 75 | -------------------------------------------------------------------------------- /dwarfs-test/src/traverse.rs: -------------------------------------------------------------------------------- 1 | use dwarfs::{ArchiveIndex, AsChunks, File}; 2 | 3 | /// Traverse all files in file offset order, for efficient content access. 4 | pub fn traverse_files(index: &ArchiveIndex) -> Vec<(String, File<'_>)> { 5 | let mut files = Vec::with_capacity(index.inodes().len() - index.directories().len()); 6 | let mut queue = Vec::new(); 7 | queue.push((String::new(), index.root())); 8 | 9 | while let Some((mut path, dir)) = queue.pop() { 10 | path.push('/'); 11 | let prev_len = path.len(); 12 | 13 | for ent in dir.entries() { 14 | let name = ent.name(); 15 | let ino = ent.inode(); 16 | path.push_str(name); 17 | 18 | if let Some(d) = ino.as_dir() { 19 | queue.push((path.clone(), d)); 20 | } else if let Some(f) = ino.as_file() { 21 | let start_sec_idx = f.as_chunks().next().map_or(0, |c| c.section_idx()); 22 | files.push((start_sec_idx, path.clone(), f)); 23 | } 24 | 25 | path.truncate(prev_len); 26 | } 27 | } 28 | 29 | files.sort_by_key(|(sec_idx, ..)| *sec_idx); 30 | 31 | files.into_iter().map(|(_, path, f)| (path, f)).collect() 32 | } 33 | -------------------------------------------------------------------------------- /dwarfs-test/tests/basic.rs: -------------------------------------------------------------------------------- 1 | //! Basic functionality tests. 2 | use std::{ 3 | fs, 4 | io::BufRead, 5 | path::{Path, PathBuf}, 6 | time::{Duration, SystemTime}, 7 | }; 8 | 9 | use dwarfs::{ 10 | Archive, AsChunks, 11 | archive::{Config, IsInode, SectionIndexStrategy}, 12 | }; 13 | use xshell::{Shell, TempDir, cmd}; 14 | 15 | fn debug_print_mtree(sh: &Shell, archive_path: &Path) { 16 | let mtree_out = cmd!( 17 | sh, 18 | "dwarfsextract -i {archive_path} -f mtree --log-level=error" 19 | ) 20 | .read() 21 | .unwrap(); 22 | eprintln!("{mtree_out}"); 23 | } 24 | 25 | #[track_caller] 26 | fn build_archive(sh: &Shell, out: &str, opts: &str) -> PathBuf { 27 | let opts = opts.split_ascii_whitespace(); 28 | cmd!( 29 | sh, 30 | "mkdwarfs -i ./root -o {out} --no-progress --log-level=error {opts...}" 31 | ) 32 | .run() 33 | .unwrap(); 34 | debug_print_mtree(sh, out.as_ref()); 35 | sh.current_dir().join(out) 36 | } 37 | 38 | fn new_temp_shell() -> (Shell, TempDir) { 39 | let sh = Shell::new().unwrap(); 40 | let temp_dir = sh.create_temp_dir().unwrap(); 41 | sh.change_dir(temp_dir.path()); 42 | (sh, temp_dir) 43 | } 44 | 45 | #[test] 46 | fn empty() { 47 | let (sh, _temp_dir) = new_temp_shell(); 48 | sh.create_dir("root").unwrap(); 49 | let archive_path = build_archive(&sh, "img.dwarfs", "--set-time=42"); 50 | let (index, _archive) = Archive::new(fs::File::open(&archive_path).unwrap()).unwrap(); 51 | 52 | let root = index.root(); 53 | assert_eq!(root.inode_num(), 0); 54 | assert_eq!(index.inodes().len(), 1); 55 | assert_eq!(index.inodes().next().unwrap().inode_num(), root.inode_num()); 56 | assert_eq!(index.directories().len(), 1); 57 | assert_eq!( 58 | index.directories().next().unwrap().inode_num(), 59 | root.inode_num() 60 | ); 61 | 62 | assert_eq!(root.entries().len(), 0); 63 | assert!(root.get("").is_none()); 64 | 65 | assert_eq!(index.get_inode(0).unwrap().inode_num(), 0); 66 | 67 | assert_eq!( 68 | index 69 | .get_path(std::iter::empty::<&str>()) 70 | .unwrap() 71 | .inode_num(), 72 | root.inode_num() 73 | ); 74 | 75 | let meta = root.metadata(); 76 | assert_eq!(meta.mtime(), 42); 77 | assert_eq!(meta.atime(), None); 78 | assert_eq!(meta.ctime(), None); 79 | #[cfg(unix)] 80 | assert_eq!( 81 | meta.file_type_mode().type_bits(), 82 | rustix::fs::FileType::Directory.as_raw_mode(), 83 | ); 84 | } 85 | 86 | #[test] 87 | fn basics() { 88 | let (sh, _temp_dir) = new_temp_shell(); 89 | sh.create_dir("root").unwrap(); 90 | sh.create_dir("root/dir1").unwrap(); 91 | sh.create_dir("root/dir2").unwrap(); 92 | sh.write_file("root/dir2/foo.txt", "bar").unwrap(); 93 | sh.write_file("root/empty", "").unwrap(); 94 | fs::File::open(sh.current_dir().join("root/dir2/foo.txt")) 95 | .unwrap() 96 | .set_times( 97 | fs::FileTimes::new() 98 | .set_modified(SystemTime::UNIX_EPOCH + Duration::from_secs(42)) 99 | .set_accessed(SystemTime::UNIX_EPOCH + Duration::from_secs(666)), 100 | ) 101 | .unwrap(); 102 | let archive_path = build_archive(&sh, "img.dwarfs", "--keep-all-times"); 103 | 104 | let (index, mut archive) = Archive::new(fs::File::open(&archive_path).unwrap()).unwrap(); 105 | let root = index.root(); 106 | assert_eq!( 107 | root.entries().map(|ent| ent.name()).collect::>(), 108 | ["dir1", "dir2", "empty"], 109 | ); 110 | 111 | let dir1 = root.get("dir1").unwrap().inode().as_dir().unwrap(); 112 | assert_eq!(dir1.entries().len(), 0); 113 | 114 | let empty = root.get("empty").unwrap().inode().as_file().unwrap(); 115 | assert_eq!(empty.read_to_vec(&mut archive).unwrap(), []); 116 | assert_eq!(empty.as_chunks().len(), 0); 117 | assert_eq!(empty.as_chunks().total_size(), 0); 118 | assert_eq!(empty.as_reader(&mut archive).total_size(), 0); 119 | assert_eq!(empty.as_reader(&mut archive).fill_buf().unwrap(), []); 120 | 121 | let dir2 = root.get("dir2").unwrap().inode().as_dir().unwrap(); 122 | let foo = dir2.get("foo.txt").unwrap().inode(); 123 | let foo2 = index.get_path(["dir2", "foo.txt"]).unwrap(); 124 | assert_eq!(foo.inode_num(), foo2.inode_num()); 125 | let foo = foo.as_file().unwrap(); 126 | 127 | assert_eq!(foo.as_chunks().len(), 1); 128 | assert_eq!(foo.as_chunks().total_size(), 3); 129 | assert_eq!( 130 | foo.as_chunks() 131 | .next() 132 | .unwrap() 133 | .read_cached(&mut archive) 134 | .unwrap(), 135 | b"bar" 136 | ); 137 | assert_eq!(foo.read_to_vec(&mut archive).unwrap(), b"bar"); 138 | 139 | let meta = foo.metadata(); 140 | assert_eq!(meta.mtime(), 42); 141 | assert_eq!(meta.atime(), Some(666)); 142 | assert!(meta.ctime().is_some()); 143 | #[cfg(unix)] 144 | assert_eq!( 145 | meta.file_type_mode().type_bits(), 146 | rustix::fs::FileType::RegularFile.as_raw_mode(), 147 | ); 148 | } 149 | 150 | #[cfg(unix)] 151 | #[test] 152 | fn unix_specials() { 153 | use dwarfs::InodeKind; 154 | use rustix::fs::{self as ufs, FileType, Mode}; 155 | 156 | let (sh, _temp_dir) = new_temp_shell(); 157 | let src_path = sh.create_dir("root").unwrap(); 158 | ufs::symlink("/absolute/path", src_path.join("abs")).unwrap(); 159 | ufs::symlink("/absolute/path", src_path.join("dup")).unwrap(); 160 | ufs::symlink("../relative/path", src_path.join("rel")).unwrap(); 161 | 162 | // Do not mask. We make assertions on permissions below. 163 | rustix::process::umask(Mode::empty()); 164 | 165 | ufs::mkdir( 166 | src_path.join("sticky"), 167 | Mode::RWXU | Mode::XOTH | Mode::SVTX, 168 | ) 169 | .unwrap(); 170 | ufs::mknodat( 171 | ufs::ABS, 172 | src_path.join("pipe"), 173 | FileType::Fifo, 174 | Mode::RWXU | Mode::SUID, 175 | 0, 176 | ) 177 | .unwrap(); 178 | ufs::mknodat( 179 | ufs::ABS, 180 | src_path.join("sock"), 181 | FileType::Socket, 182 | Mode::RWXG | Mode::SGID, 183 | 0, 184 | ) 185 | .unwrap(); 186 | 187 | let archive_path = build_archive(&sh, "img.dwarfs", "--with-specials"); 188 | let (index, _archive) = Archive::new(fs::File::open(&archive_path).unwrap()).unwrap(); 189 | let root = index.root(); 190 | 191 | assert!(matches!(root.get("abs").unwrap().inode().classify(), 192 | InodeKind::Symlink(f) if f.target() == "/absolute/path")); 193 | assert!(matches!(root.get("dup").unwrap().inode().classify(), 194 | InodeKind::Symlink(f) if f.target() == "/absolute/path")); 195 | assert!(matches!(root.get("rel").unwrap().inode().classify(), 196 | InodeKind::Symlink(f) if f.target() == "../relative/path")); 197 | 198 | let sticky = root.get("sticky").unwrap().inode().as_dir().unwrap(); 199 | let sticky_mode = sticky.metadata().file_type_mode(); 200 | assert_eq!( 201 | FileType::from_raw_mode(sticky_mode.type_bits()), 202 | FileType::Directory 203 | ); 204 | assert_eq!( 205 | Mode::from_bits(sticky_mode.mode_bits()), 206 | Some(Mode::RWXU | Mode::XOTH | Mode::SVTX) 207 | ); 208 | 209 | let pipe = root.get("pipe").unwrap().inode(); 210 | let pipe_mode = pipe.metadata().file_type_mode(); 211 | assert!(matches!(pipe.classify(), InodeKind::Ipc(_))); 212 | assert_eq!( 213 | FileType::from_raw_mode(pipe_mode.type_bits()), 214 | FileType::Fifo, 215 | ); 216 | assert_eq!( 217 | Mode::from_bits(pipe_mode.mode_bits()), 218 | Some(Mode::RWXU | Mode::SUID) 219 | ); 220 | 221 | let sock = root.get("sock").unwrap().inode(); 222 | let sock_mode = sock.metadata().file_type_mode(); 223 | assert!(matches!(sock.classify(), InodeKind::Ipc(_))); 224 | assert_eq!( 225 | FileType::from_raw_mode(sock_mode.type_bits()), 226 | FileType::Socket, 227 | ); 228 | assert_eq!( 229 | Mode::from_bits(sock_mode.mode_bits()), 230 | Some(Mode::RWXG | Mode::SGID) 231 | ); 232 | } 233 | 234 | #[cfg(unix)] 235 | #[test] 236 | fn unix_devices() { 237 | use dwarfs::InodeKind; 238 | use rustix::fs::FileType; 239 | 240 | let (sh, _temp_dir) = new_temp_shell(); 241 | let exe = env!("CARGO_BIN_EXE_dwarfs-test"); 242 | cmd!(sh, "fakeroot -- {exe} gen-privileged-archive img.dwarfs") 243 | .run() 244 | .unwrap(); 245 | let archive_path = sh.current_dir().join("img.dwarfs"); 246 | debug_print_mtree(&sh, &archive_path); 247 | 248 | let (index, _archive) = Archive::new(fs::File::open(&archive_path).unwrap()).unwrap(); 249 | let root = index.root(); 250 | 251 | let bdev = root.get("bdev").unwrap().inode(); 252 | let InodeKind::Device(bdev) = bdev.classify() else { 253 | panic!("wrong file type") 254 | }; 255 | assert_eq!(bdev.device_id(), 0x0123_4567_89AB_CDEF); 256 | assert_eq!( 257 | FileType::from_raw_mode(bdev.metadata().file_type_mode().type_bits()), 258 | FileType::BlockDevice, 259 | ); 260 | 261 | let cdev = root.get("cdev").unwrap().inode(); 262 | let InodeKind::Device(cdev) = cdev.classify() else { 263 | panic!("wrong file type") 264 | }; 265 | assert_eq!(cdev.device_id(), 0xFEDC_BA98_7654_3210); 266 | assert_eq!( 267 | FileType::from_raw_mode(cdev.metadata().file_type_mode().type_bits()), 268 | FileType::CharacterDevice, 269 | ); 270 | } 271 | 272 | #[test] 273 | fn section_index() { 274 | let (sh, _temp_dir) = new_temp_shell(); 275 | sh.create_dir("root").unwrap(); 276 | 277 | let load = |f: &Path, strategy: SectionIndexStrategy| { 278 | Archive::new_with_config( 279 | fs::File::open(f).unwrap(), 280 | Config::default().section_index_strategy(strategy), 281 | ) 282 | }; 283 | 284 | let with_index = build_archive(&sh, "with_index.dwarfs", ""); 285 | load(&with_index, SectionIndexStrategy::UseEmbeddedIfExists).unwrap(); 286 | load(&with_index, SectionIndexStrategy::Build).unwrap(); 287 | load(&with_index, SectionIndexStrategy::UseEmbedded).unwrap(); 288 | 289 | let no_index = build_archive(&sh, "no_index.dwarfs", "--no-section-index"); 290 | load(&no_index, SectionIndexStrategy::UseEmbeddedIfExists).unwrap(); 291 | load(&no_index, SectionIndexStrategy::Build).unwrap(); 292 | 293 | let err = load(&no_index, SectionIndexStrategy::UseEmbedded).unwrap_err(); 294 | assert_eq!(err.to_string(), "missing section SECTION_INDEX"); 295 | } 296 | 297 | #[test] 298 | fn packed_metadata() { 299 | let (sh, _temp_dir) = new_temp_shell(); 300 | let src_dir = sh.create_dir("root").unwrap(); 301 | sh.create_dir("root/foo/foo/baz").unwrap(); 302 | sh.write_file("root/foo/baz", "hello world").unwrap(); 303 | sh.write_file("root/baz", "").unwrap(); 304 | 305 | #[cfg(unix)] 306 | { 307 | use std::os::unix::fs as ufs; 308 | ufs::symlink("foo", src_dir.join("sym1")).unwrap(); 309 | ufs::symlink("foo", src_dir.join("sym2")).unwrap(); 310 | ufs::symlink("bar", src_dir.join("sym3")).unwrap(); 311 | } 312 | 313 | let pack_none = build_archive(&sh, "none.dwarfs", "--pack-metadata=none"); 314 | Archive::new(fs::File::open(pack_none).unwrap()).unwrap(); 315 | 316 | let pack_all = build_archive(&sh, "all.dwarfs", "--pack-metadata=all"); 317 | Archive::new(fs::File::open(pack_all).unwrap()).unwrap(); 318 | } 319 | 320 | #[test] 321 | fn symtab() { 322 | let (sh, _temp_dir) = new_temp_shell(); 323 | let names = (0..32) 324 | .map(|i| format!("a_very_common_prefix.{i:02}.txt")) 325 | // Single occurrence byte. 326 | .chain(["~".to_owned()]) 327 | .collect::>(); 328 | 329 | for name in &names { 330 | sh.write_file(format!("root/{name}"), "").unwrap(); 331 | } 332 | 333 | let archive_path = build_archive(&sh, "img.dwarfs", "--pack-metadata=names,force"); 334 | let (index, _) = Archive::new(fs::File::open(archive_path).unwrap()).unwrap(); 335 | let root = index.root(); 336 | assert_eq!( 337 | root.entries().map(|ent| ent.name()).collect::>(), 338 | names, 339 | ); 340 | } 341 | 342 | #[test] 343 | fn shared_files() { 344 | let (sh, _temp_dir) = new_temp_shell(); 345 | let content = (0..1024) 346 | .map(|i| format!("{i:04}")) 347 | .collect::() 348 | .into_bytes(); 349 | sh.write_file("root/a.txt", &content).unwrap(); 350 | sh.write_file("root/b.txt", &content).unwrap(); 351 | 352 | let archive_path = build_archive(&sh, "img.dwarfs", "--pack-metadata=shared_files,force"); 353 | let (index, mut archive) = Archive::new(fs::File::open(archive_path).unwrap()).unwrap(); 354 | let root = index.root(); 355 | let a = root.get("a.txt").unwrap().inode().as_file().unwrap(); 356 | let b = root.get("b.txt").unwrap().inode().as_file().unwrap(); 357 | 358 | assert_eq!(a.as_chunks().len(), 1); 359 | assert_eq!(b.as_chunks().len(), 1); 360 | assert_eq!( 361 | a.as_chunks().next().unwrap().offset(), 362 | b.as_chunks().next().unwrap().offset(), 363 | ); 364 | 365 | assert_eq!(a.read_to_vec(&mut archive).unwrap(), content); 366 | assert_eq!(b.read_to_vec(&mut archive).unwrap(), content); 367 | } 368 | -------------------------------------------------------------------------------- /dwarfs-test/tests/large.rs: -------------------------------------------------------------------------------- 1 | //! Large tests on real production archives. 2 | use std::{ 3 | io::{BufRead, Seek, SeekFrom, Write}, 4 | sync::LazyLock, 5 | time::Instant, 6 | }; 7 | 8 | use dwarfs::{ 9 | Archive, AsChunks, 10 | metadata::{Metadata, Schema}, 11 | positioned_io::ReadAt, 12 | section::{CompressAlgo, Header, MagicVersion, SectionIndexEntry, SectionReader, SectionType}, 13 | }; 14 | use tempfile::NamedTempFile; 15 | use xshell::{Shell, cmd}; 16 | 17 | static TEST_FILES: LazyLock> = LazyLock::new(|| { 18 | std::env::var("DWARFS_LARGE_TEST_FILES") 19 | .expect("DWARFS_LARGE_TEST_FILES is not set") 20 | .split_ascii_whitespace() 21 | .map(Into::into) 22 | .collect() 23 | }); 24 | 25 | fn with_tests(mut f: impl FnMut(&str, std::fs::File)) { 26 | for path in &*TEST_FILES { 27 | eprintln!("Testing {path}"); 28 | let file = std::fs::File::open(path).unwrap(); 29 | f(path, file); 30 | } 31 | } 32 | 33 | fn read_section_by_type( 34 | rdr: &mut SectionReader, 35 | sec_index: &[SectionIndexEntry], 36 | typ: SectionType, 37 | ) -> Vec { 38 | let offset = sec_index 39 | .iter() 40 | .find_map(|i| (i.section_type() == typ).then_some(i.offset())) 41 | .expect("missing section"); 42 | let (_, bytes) = rdr 43 | .read_section_at(offset, 16 << 20) 44 | .expect("failed to read section"); 45 | bytes 46 | } 47 | 48 | /// Update the schema and metadata section of an existing DwarFS archive. 49 | fn patch_schema_and_metadata( 50 | mut orig_file: &std::fs::File, 51 | index: &[SectionIndexEntry], 52 | schema_bytes: &[u8], 53 | metadata_bytes: &[u8], 54 | ) -> NamedTempFile { 55 | // For typical archives, all non-BLOCK sections are at the end, after all BLOCK sections. 56 | let data_sections = index 57 | .iter() 58 | .position(|&ent| ent.section_type() != SectionType::BLOCK) 59 | .unwrap(); 60 | assert!( 61 | index[data_sections..] 62 | .iter() 63 | .all(|ent| ent.section_type() != SectionType::BLOCK) 64 | ); 65 | let data_end_pos = index[data_sections].offset(); 66 | 67 | let mut patched_file = NamedTempFile::new().unwrap(); 68 | let fout = patched_file.as_file_mut(); 69 | std::io::copy(&mut orig_file, fout).unwrap(); 70 | fout.set_len(data_end_pos).unwrap(); 71 | fout.seek(SeekFrom::End(0)).unwrap(); 72 | 73 | for (i, typ, payload) in [ 74 | (0, SectionType::METADATA_V2_SCHEMA, schema_bytes), 75 | (1, SectionType::METADATA_V2, metadata_bytes), 76 | ] { 77 | write_section(fout, data_sections as u32 + i, typ, payload).unwrap(); 78 | } 79 | 80 | patched_file 81 | } 82 | 83 | fn write_section( 84 | w: &mut dyn Write, 85 | section_num: u32, 86 | typ: SectionType, 87 | payload: &[u8], 88 | ) -> std::io::Result<()> { 89 | use dwarfs::zerocopy::IntoBytes; 90 | 91 | let mut header = Header { 92 | magic_version: MagicVersion::LATEST, 93 | slow_hash: [0; 32], 94 | fast_hash: [0; 8], 95 | section_number: section_num.into(), 96 | section_type: typ, 97 | compress_algo: CompressAlgo::NONE, 98 | payload_size: 0.into(), 99 | }; 100 | header.update_size_and_checksum(payload); 101 | w.write_all(header.as_bytes())?; 102 | w.write_all(payload) 103 | } 104 | 105 | fn test_reserialize(schema_only: bool) { 106 | let sh = Shell::new().unwrap(); 107 | 108 | with_tests(|orig_path, file| { 109 | let dump1 = cmd!(sh, "dwarfsck -i {orig_path} -d metadata_full_dump") 110 | .read() 111 | .unwrap(); 112 | 113 | let file_size = file.metadata().expect("failed to get file size").len(); 114 | let mut rdr = SectionReader::new(file); 115 | let (_, sec_index) = rdr 116 | .read_section_index(file_size, 16 << 20) 117 | .expect("failed to read section index") 118 | .expect("missing section index"); 119 | let mut schema_bytes = 120 | read_section_by_type(&mut rdr, &sec_index, SectionType::METADATA_V2_SCHEMA); 121 | let mut metadata_bytes = 122 | read_section_by_type(&mut rdr, &sec_index, SectionType::METADATA_V2); 123 | let schema = Schema::parse(&schema_bytes).expect("failed to parse schema"); 124 | 125 | if schema_only { 126 | let schema_ser = schema.to_bytes().unwrap(); 127 | let schema2 = Schema::parse(&schema_ser).unwrap(); 128 | assert_eq!(schema, schema2); 129 | schema_bytes = schema_ser; 130 | } else { 131 | let metadata = Metadata::parse(&schema, &metadata_bytes).unwrap(); 132 | let (schema2, metadata_ser) = metadata.to_schema_and_bytes().unwrap(); 133 | let metadata2 = Metadata::parse(&schema2, &metadata_ser).unwrap(); 134 | assert_eq!(metadata, metadata2); 135 | let schema_ser = schema2.to_bytes().unwrap(); 136 | (schema_bytes, metadata_bytes) = (schema_ser, metadata_ser); 137 | } 138 | 139 | let patched_file = 140 | patch_schema_and_metadata(rdr.get_ref(), &sec_index, &schema_bytes, &metadata_bytes); 141 | let patched_path = patched_file.path(); 142 | let dump2 = cmd!(sh, "dwarfsck -i {patched_path} -d metadata_full_dump") 143 | .read() 144 | .unwrap(); 145 | if dump1 != dump2 { 146 | std::fs::write("./result-metadata-dump-before.txt", &dump1).unwrap(); 147 | std::fs::write("./result-metadata-dump-after.txt", &dump2).unwrap(); 148 | panic!("metadata dump differs, results saved to result-metadata-dump-*.txt"); 149 | } 150 | }); 151 | } 152 | 153 | #[test] 154 | #[ignore = "large test"] 155 | fn schema_roundtrip() { 156 | test_reserialize(true); 157 | } 158 | 159 | #[test] 160 | #[ignore = "large test"] 161 | fn metadata_roundtrip() { 162 | test_reserialize(false); 163 | } 164 | 165 | #[test] 166 | #[ignore = "large test"] 167 | fn dump_mtree() { 168 | let sh = Shell::new().unwrap(); 169 | with_tests(|path, file| { 170 | let expect = cmd!(sh, "dwarfsextract -i {path} -f mtree --log-level=error") 171 | .read() 172 | .unwrap(); 173 | let expect = expect.trim_ascii_end(); 174 | 175 | let mut got = Vec::new(); 176 | let (index, _archive) = Archive::new(file).unwrap(); 177 | dwarfs_test::mtree::dump(&mut got, &index).unwrap(); 178 | let actual = str::from_utf8(&got).unwrap().trim_ascii_end(); 179 | 180 | if actual != expect { 181 | std::fs::write("result-actual.mtree", actual).unwrap(); 182 | std::fs::write("result-expect.mtree", expect).unwrap(); 183 | panic!("mtree mismatch"); 184 | } 185 | }); 186 | } 187 | 188 | #[test] 189 | #[ignore = "large test"] 190 | fn dump_content() { 191 | use sha2::{Digest, Sha512_256}; 192 | assert!( 193 | !cfg!(debug_assertions), 194 | "requires '--release' or it will be too slow", 195 | ); 196 | 197 | let sh = Shell::new().unwrap(); 198 | with_tests(|archive_path, archive_file| { 199 | let inst = Instant::now(); 200 | let output = cmd!( 201 | sh, 202 | "dwarfsck --checksum=sha512-256 -i {archive_path} --log-level=error" 203 | ) 204 | .read() 205 | .unwrap(); 206 | eprintln!("dwarfsck completes in {:?}", inst.elapsed()); 207 | 208 | let mut expect = output 209 | .lines() 210 | .map(|line| line.split_once(" ").unwrap()) 211 | .collect::>(); 212 | expect.sort_unstable_by_key(|(_, name)| *name); 213 | let expect = expect 214 | .iter() 215 | .flat_map(|(hash, path)| [hash, " ", path, "\n"]) 216 | .collect::(); 217 | 218 | let inst = Instant::now(); 219 | let (index, mut archive) = Archive::new(archive_file).unwrap(); 220 | let mut actual = Vec::with_capacity(index.inodes().len()); 221 | let mut h = Sha512_256::new(); 222 | let files = dwarfs_test::traverse::traverse_files(&index); 223 | eprintln!("traversal completes in {:?}", inst.elapsed()); 224 | for (path, file) in files { 225 | let mut rdr = file.as_reader(&mut archive); 226 | loop { 227 | let buf = rdr.fill_buf().unwrap(); 228 | if buf.is_empty() { 229 | break; 230 | } 231 | h.update(buf); 232 | let len = buf.len(); 233 | rdr.consume(len); 234 | } 235 | let digest = hex::encode(h.finalize_reset().as_slice()); 236 | actual.push((digest, path)); 237 | } 238 | actual.sort_unstable_by(|(_, lhs), (_, rhs)| Ord::cmp(lhs, rhs)); 239 | let actual = actual 240 | .iter() 241 | // Exclude leading `/`. 242 | .flat_map(|(hash, path)| [hash, " ", &path[1..], "\n"]) 243 | .collect::(); 244 | eprintln!("traversal+checksum completes in {:?}", inst.elapsed()); 245 | 246 | if actual != expect { 247 | std::fs::write("result-actual.cksum", actual).unwrap(); 248 | std::fs::write("result-expect.cksum", expect).unwrap(); 249 | panic!("results mismatch") 250 | } 251 | }); 252 | } 253 | -------------------------------------------------------------------------------- /dwarfs/CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Change Log 2 | 3 | All notable changes to this project will be documented in this file. 4 | 5 | The format is based on [Keep a Changelog](https://keepachangelog.com/) 6 | and this project adheres to [Semantic Versioning](https://semver.org/). 7 | 8 | ## v0.2.1 9 | 10 | ### Changed 11 | 12 | ### Added 13 | 14 | - FSST symtab serialization `fsst::to_bytes`. 15 | 16 | - DwarFS metadata serialization `Metadata::to_schema_and_bytes`. 17 | 18 | This implements basic serialization support of Frozen. It uses fixed-width 19 | integers and does not yet support bit-packing. 20 | 21 | - Missed `Metadata::reg_file_size_cache` field. 22 | 23 | - `section::Header::update_size_and_checksum` 24 | 25 | - `section::MagicVersion::LATEST` 26 | 27 | ### Others 28 | 29 | - Switch from `xz2` to `liblzma` crate for LZMA decompression. 30 | 31 | - Remove unused high-level wrapper crate `zstd` and use `zstd-safe` directly. 32 | 33 | - Add more tests. 34 | 35 | ## v0.2.0 36 | 37 | ### Changed 38 | 39 | - `metadata::Schema::to_bytes` is now gated under a disabled-by-default 40 | feature `serialize`. 41 | 42 | - `fsst` module is refactored. Failable methods of `fsst::Decoder` now returns 43 | `Result<_, fsst::Error>` instead of `Option<_>`. 44 | 45 | `Decoder::parse_symtab` is now renamed to `parse` for consistency. 46 | 47 | ### Added 48 | 49 | - Re-export of dependency `zerocopy`. 50 | - `section::Header::calculate_{fast,slow}_checksum`. 51 | 52 | ### Fixed 53 | 54 | - A bug causing any valid section index to be rejected. 55 | 56 | - False errors when loading empty archives. 57 | 58 | - Incorrect behavior of `Dir::get`. 59 | 60 | - An off-by-one bug when unpacking string tables. 61 | 62 | ### Others 63 | 64 | - Added more tests. 65 | 66 | ## v0.1.0 67 | 68 | Initial release. 69 | -------------------------------------------------------------------------------- /dwarfs/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "dwarfs" 3 | version = "0.2.1" 4 | edition = "2024" 5 | description = "A library for reading DwarFS archives (aka. DwarFS images)" 6 | license = "MIT OR Apache-2.0" 7 | keywords = ["dwarfs", "archive", "compression"] 8 | categories = ["compression", "filesystem"] 9 | repository = "https://github.com/oxalica/dwarfs-rs" 10 | 11 | [features] 12 | default = ["zstd", "log"] 13 | 14 | # Compression algorithms support. 15 | zstd = ["dep:zstd-safe"] 16 | lzma = ["dep:liblzma"] 17 | lz4 = ["dep:lz4"] 18 | 19 | # Extra functionalities. 20 | log = ["dep:log", "dep:measure_time"] 21 | serialize = ["dep:indexmap"] 22 | 23 | [dependencies] 24 | bstr = { version = "1.12.0", features = ["serde"] } 25 | indexmap = { version = "2.9.0", optional = true } 26 | log = { version = "0.4.27", optional = true } 27 | lru = "0.14.0" 28 | lz4 = { version = "1.28.1", optional = true } 29 | measure_time = { version = "0.9.0", optional = true } 30 | positioned-io = { version = "0.3.4", default-features = false } 31 | serde = { version = "1.0.219", features = ["derive"] } 32 | sha2 = "0.10.9" 33 | xxhash-rust = { version = "0.8.15", features = ["xxh3"] } 34 | liblzma = { version = "0.4.1", optional = true } 35 | zerocopy = { version = "0.8.25", features = ["derive", "std"] } 36 | zstd-safe = { version = "7.2.4", optional = true, default-features = false } 37 | 38 | [lints.clippy] 39 | dbg-macro = "warn" 40 | todo = "warn" 41 | print-stdout = "warn" 42 | print-stderr = "warn" 43 | -------------------------------------------------------------------------------- /dwarfs/LICENSE-APACHE: -------------------------------------------------------------------------------- 1 | ../LICENSE-APACHE -------------------------------------------------------------------------------- /dwarfs/LICENSE-MIT: -------------------------------------------------------------------------------- 1 | ../LICENSE-MIT -------------------------------------------------------------------------------- /dwarfs/README.md: -------------------------------------------------------------------------------- 1 | # dwarfs 2 | 3 | [![crates.io](https://img.shields.io/crates/v/dwarfs)](https://crates.io/crates/dwarfs) 4 | [![docs.rs](https://img.shields.io/docsrs/dwarfs)][docs] 5 | 6 | A library for reading [DwarFS][dwarfs] archives (aka. DwarFS images). 7 | 8 | See [documentations][docs] for more details. 9 | 10 | [dwarfs]: https://github.com/mhx/dwarfs 11 | [docs]: https://docs.rs/dwarfs 12 | 13 | #### License 14 | 15 | 16 | Licensed under either of Apache License, Version 17 | 2.0 or MIT license at your option. 18 | 19 | 20 |
21 | 22 | 23 | Unless you explicitly state otherwise, any contribution intentionally submitted 24 | for inclusion in this crate by you, as defined in the Apache-2.0 license, shall 25 | be dual licensed as above, without any additional terms or conditions. 26 | 27 | -------------------------------------------------------------------------------- /dwarfs/src/fsst.rs: -------------------------------------------------------------------------------- 1 | //! The [Fast Static Symbol Table (FSST)][fsst] decoder for compressed string 2 | //! tables [`StringTable::symtab`][crate::metadata::StringTable::symtab]. 3 | //! 4 | //! [fsst]: https://github.com/cwida/fsst 5 | 6 | use std::fmt; 7 | 8 | use bstr::{BStr, BString}; 9 | use zerocopy::IntoBytes; 10 | 11 | type Sym = u64; 12 | 13 | const VERSION: u32 = 2019_0218; 14 | const SYM_CORRUPT: Sym = u64::from_ne_bytes(*b"corrupt\0"); 15 | 16 | /// The max length of one symbol. 17 | pub const MAX_SYMBOL_LEN: usize = 8; 18 | 19 | type Result = std::result::Result; 20 | 21 | /// A symbol table decoding error. 22 | pub struct Error(ErrorInner); 23 | 24 | #[derive(Debug)] 25 | enum ErrorInner { 26 | InputEof, 27 | InvalidMagic, 28 | NulMode, 29 | CodeOverflow, 30 | 31 | BufTooSmall, 32 | InvalidEscape, 33 | InvalidSymbol, 34 | 35 | #[cfg(feature = "serialize")] 36 | IncorrectSymbolOrder, 37 | } 38 | 39 | impl fmt::Debug for Error { 40 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 41 | self.0.fmt(f) 42 | } 43 | } 44 | 45 | impl fmt::Display for Error { 46 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 47 | f.pad(match self.0 { 48 | ErrorInner::InputEof => "unexpected end of input", 49 | ErrorInner::InvalidMagic => "missing header magic", 50 | ErrorInner::NulMode => "unsupported null-terminated mode", 51 | ErrorInner::CodeOverflow => "too many symbols", 52 | ErrorInner::BufTooSmall => "output buffer is too small", 53 | ErrorInner::InvalidEscape => "invalid escape byte at the end of input", 54 | ErrorInner::InvalidSymbol => "invalid symbol", 55 | #[cfg(feature = "serialize")] 56 | ErrorInner::IncorrectSymbolOrder => "symbols must be ordered in length 2,3,4,5,6,7,8,1", 57 | }) 58 | } 59 | } 60 | 61 | impl std::error::Error for Error {} 62 | 63 | impl From for Error { 64 | #[cold] 65 | #[inline] 66 | fn from(err: ErrorInner) -> Self { 67 | Self(err) 68 | } 69 | } 70 | 71 | /// The Fast Static Symbol Table (FSST) decoder. 72 | /// 73 | /// See [module level documentations](self). Note that this struct contains a 74 | /// ~2KiB large array, and you may want to box it for fast moving. 75 | pub struct Decoder { 76 | /// Code -> symbol mapping, stored in native-endian, with trailing bytes filled by NUL. 77 | symbols: [Sym; 255], 78 | } 79 | 80 | impl fmt::Debug for Decoder { 81 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 82 | struct SymbolMap<'a>(&'a Decoder); 83 | 84 | impl fmt::Debug for SymbolMap<'_> { 85 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 86 | f.debug_map() 87 | .entries(self.0.symbols.iter().enumerate().map(|(i, sym)| { 88 | let len = Decoder::symbol_len(*sym); 89 | let sym = &sym.as_bytes()[..len]; 90 | (i, BStr::new(sym)) 91 | })) 92 | .finish() 93 | } 94 | } 95 | 96 | f.debug_struct("Decoder") 97 | .field("symbols", &SymbolMap(self)) 98 | .finish() 99 | } 100 | } 101 | 102 | impl Decoder { 103 | /// The max length of one symbol. 104 | pub const MAX_SYMBOL_LEN: usize = MAX_SYMBOL_LEN; 105 | 106 | const ALL_CORRUPT: Self = Decoder { 107 | symbols: [SYM_CORRUPT; 255], 108 | }; 109 | 110 | /// Iterate over `len_histo` for (symbol_length, count). 111 | fn len_histo_iter(histo: &[u8; 8]) -> impl Iterator { 112 | // Semantically: zip([2,3,4,5,6,7,8,1], histo[...[1,2,3,4,5,6,7,0]]) 113 | (1..=8).map(|i| ((i & 7) + 1, histo[i & 7])) 114 | } 115 | 116 | /// Parse the symbol table `symtab`, from the serialization format from libfstt. 117 | /// 118 | /// This is re-implemented in Rust with the reference of 119 | /// [libfstt's `fsst_import`](https://github.com/cwida/fsst/blob/b228af6356196095eaf9f8f5654b0635f969661e/libfsst.cpp#L555). 120 | /// 121 | /// Some notable differences: 122 | /// - More error checking on short inputs, no buffer overflow, because we're Rust. 123 | /// - More permissive on version endianness. Allow both little and big endian versions. 124 | /// Symbols are still always little-endian, as upstream. 125 | /// - Zero-terminated mode (NUL as the first symbol) is unsupported and rejected. 126 | /// - Encoder state bytes are ignored. 127 | /// - Trailing bytes are allowed but ignored. 128 | /// 129 | /// License of libfstt: MIT License, Copyright 2018-2020, CWI, TU Munich, FSU Jena 130 | /// 131 | /// # Errors 132 | /// 133 | /// Returns `None` if the input cannot be successfully parsed. 134 | pub fn parse(bytes: &[u8]) -> Result { 135 | let mut this = Self::ALL_CORRUPT; 136 | 137 | let (&version_bytes, rest) = bytes.split_first_chunk::<8>().ok_or(ErrorInner::InputEof)?; 138 | let (&zero_terminated, rest) = rest.split_first().ok_or(ErrorInner::InputEof)?; 139 | let (&len_histo, rest) = rest.split_first_chunk::<8>().ok_or(ErrorInner::InputEof)?; 140 | 141 | // FIXME: This is in native endian, thus is non-portable and non-deterministic! 142 | // Here we use little-endian first, detect and fix the endianness by 143 | // using the fact the most-significant byte is always zero while the 144 | // least-significant byte is always non-zero. 145 | // Need further discussion with upstream. 146 | let mut version = u64::from_le_bytes(version_bytes); 147 | #[allow(clippy::verbose_bit_mask, reason = "less clear")] 148 | if version & 0xFF == 0 { 149 | version = version.swap_bytes(); 150 | } 151 | if version >> 32 != u64::from(VERSION) { 152 | return Err(ErrorInner::InvalidMagic.into()); 153 | } 154 | 155 | // Zero terminated flag is not supported. 156 | if zero_terminated & 1 != 0 { 157 | return Err(ErrorInner::NulMode.into()); 158 | } 159 | 160 | let mut code = 0; 161 | let mut pos = 0; 162 | for (sym_len, cnt) in Self::len_histo_iter(&len_histo) { 163 | for _ in 0..cnt { 164 | let mut sym = 0u64; 165 | // TODO: Bound check before? 166 | sym.as_mut_bytes()[..sym_len] 167 | .copy_from_slice(rest.get(pos..pos + sym_len).ok_or(ErrorInner::InputEof)?); 168 | *this.symbols.get_mut(code).ok_or(ErrorInner::CodeOverflow)? = sym; 169 | pos += sym_len; 170 | code += 1; 171 | } 172 | } 173 | 174 | Ok(this) 175 | } 176 | 177 | /// Return the max possible decoded length of `input_len` length input. 178 | #[inline] 179 | #[must_use] 180 | pub fn max_decode_len(input_len: usize) -> usize { 181 | // `usize::MAX` on overflow will guarantee a OOM on allocation. 182 | input_len.checked_mul(8).unwrap_or(usize::MAX) 183 | } 184 | 185 | #[inline] 186 | fn symbol_len(sym: Sym) -> usize { 187 | if cfg!(target_endian = "little") { 188 | 8 - sym.leading_zeros() as usize / 8 189 | } else { 190 | 8 - sym.trailing_zeros() as usize / 8 191 | } 192 | } 193 | 194 | /// Decode `input` into `output` and return the number of decoded length. 195 | /// 196 | /// # Errors 197 | /// 198 | /// If `output.len() < Self::max_decode_len(input.len())`, or an error occurs 199 | /// during decoding, `None` is returned. 200 | #[allow(clippy::missing_panics_doc, reason = "never panics")] 201 | pub fn decode_into(&self, input: &[u8], mut output: &mut [u8]) -> Result { 202 | if input.is_empty() { 203 | return Ok(0); 204 | } 205 | if output.len() < Self::max_decode_len(input.len()) { 206 | return Err(ErrorInner::BufTooSmall.into()); 207 | } 208 | if input.last() == Some(&0xFF) { 209 | return Err(ErrorInner::InvalidEscape.into()); 210 | } 211 | 212 | let prev_output_len = output.len(); 213 | let mut i = 0; 214 | // The second condition is a loop invariant, not an exit condition. 215 | while i < input.len() && output.len() >= MAX_SYMBOL_LEN { 216 | let b = input[i]; 217 | if b < 0xFF { 218 | let sym = self.symbols[b as usize]; 219 | if sym == 0 { 220 | return Err(ErrorInner::InvalidSymbol.into()); 221 | } 222 | // We always use max possible decode length, so output[..8] will never fail. 223 | *output.first_chunk_mut().expect("loop invariant") = sym.to_ne_bytes(); 224 | output = &mut output[Self::symbol_len(sym)..]; 225 | // This condition is always true due to the initial check, 226 | // but is here for better codegen. 227 | } else if i + 1 < input.len() { 228 | i += 1; 229 | output[0] = input[i]; 230 | output = &mut output[1..]; 231 | } 232 | i += 1; 233 | } 234 | Ok(prev_output_len - output.len()) 235 | } 236 | 237 | /// Decode `input` into an owned byte string. 238 | /// 239 | /// # Errors 240 | /// 241 | /// If an error occurs during decoding, `None` is returned. 242 | pub fn decode(&self, input: &[u8]) -> Result { 243 | let mut buf = vec![0u8; Self::max_decode_len(input.len())]; 244 | let len = self.decode_into(input, &mut buf)?; 245 | buf.truncate(len); 246 | Ok(buf.into()) 247 | } 248 | } 249 | 250 | /// Serialize symbol table consists of given symbols into bytes. 251 | /// 252 | /// `symbols` is an iterator of FSST symbols for code `0..`. It must be ordered 253 | /// in length `2,3,4,5,6,7,8,1`. 254 | /// 255 | /// # Errors 256 | /// 257 | /// Returns `Err` if either: 258 | /// - `symbols` has are more than 255 elements, or not in the expected order. 259 | /// - A symbol has length outside range `1..=8`. 260 | /// - A symbol contains a zero (NUL) byte. 261 | #[cfg(feature = "serialize")] 262 | pub fn to_bytes(symbols: I) -> Result> 263 | where 264 | I: IntoIterator, 265 | I::Item: AsRef<[u8]>, 266 | { 267 | let mut tbl = [0u64; 255]; 268 | let mut len_histo = [0u8; 8]; 269 | let mut prev_len_order = 0usize; 270 | let mut code = 0usize; 271 | for bytes in symbols { 272 | if code >= 0xFF { 273 | return Err(ErrorInner::CodeOverflow.into()); 274 | } 275 | let bytes = bytes.as_ref(); 276 | let len = bytes.len(); 277 | if !(1..=8).contains(&len) || bytes.contains(&0) { 278 | return Err(ErrorInner::InvalidSymbol.into()); 279 | } 280 | // 23456781 => 0123456MAX 281 | let len_order = len.wrapping_sub(2); 282 | if prev_len_order > len_order { 283 | return Err(ErrorInner::IncorrectSymbolOrder.into()); 284 | } 285 | prev_len_order = len_order; 286 | 287 | let mut sym = 0u64; 288 | sym.as_mut_bytes()[..len].copy_from_slice(bytes); 289 | tbl[code] = sym; 290 | code += 1; 291 | len_histo[len - 1] += 1; 292 | } 293 | 294 | let mut out = Vec::with_capacity(8 + 1 + 8 + MAX_SYMBOL_LEN * 255); 295 | // Magic bytes, with no parameters set. 296 | let magic = u64::from(VERSION) << 32 | 0xFF; 297 | out.extend_from_slice(&magic.to_le_bytes()); 298 | // Disable `zero_terminated` mode. 299 | out.push(0x00); 300 | // Lengths. 301 | out.extend_from_slice(&len_histo); 302 | 303 | for sym in &tbl[..code] { 304 | let len = Decoder::symbol_len(*sym); 305 | out.extend_from_slice(&sym.as_bytes()[..len]); 306 | } 307 | 308 | Ok(out) 309 | } 310 | 311 | #[cfg(test)] 312 | mod tests { 313 | use super::*; 314 | 315 | #[test] 316 | #[allow(clippy::print_stderr)] 317 | fn smoke() { 318 | let tbl = Decoder { 319 | symbols: [u64::from_ne_bytes(*b"hello\0\0\0"); 255], 320 | }; 321 | let debug = format!("{tbl:#?}"); 322 | eprintln!("{debug}"); 323 | assert!(debug.contains(r#"42: "hello","#)); 324 | 325 | assert_eq!(tbl.decode(b"").unwrap(), ""); 326 | assert_eq!( 327 | tbl.decode(b"\xFF").unwrap_err().to_string(), 328 | "invalid escape byte at the end of input", 329 | ); 330 | assert_eq!( 331 | tbl.decode_into(b"\0", &mut [0u8; 4]) 332 | .unwrap_err() 333 | .to_string(), 334 | "output buffer is too small", 335 | ); 336 | 337 | let got = tbl.decode(b"\0\xFF,\0").unwrap(); 338 | assert_eq!(got, "hello,hello"); 339 | } 340 | 341 | #[test] 342 | #[cfg(feature = "serialize")] 343 | fn serialize() { 344 | let bytes = to_bytes([&b"hello"[..], b"world", b"!"]).unwrap(); 345 | let tbl = Decoder::parse(&bytes).unwrap(); 346 | assert_eq!(tbl.decode(b"\0\xFF,\x01\x02").unwrap(), "hello,world!"); 347 | 348 | assert_eq!( 349 | to_bytes([&b"!"[..], b"hello"]).unwrap_err().to_string(), 350 | "symbols must be ordered in length 2,3,4,5,6,7,8,1", 351 | ); 352 | assert_eq!( 353 | to_bytes([b"123456789"]).unwrap_err().to_string(), 354 | "invalid symbol", 355 | ); 356 | assert_eq!( 357 | to_bytes(&[b"a"].repeat(256)).unwrap_err().to_string(), 358 | "too many symbols", 359 | ); 360 | } 361 | } 362 | -------------------------------------------------------------------------------- /dwarfs/src/lib.rs: -------------------------------------------------------------------------------- 1 | //! A library for reading [DwarFS][dwarfs] archives (aka. images). 2 | //! 3 | //! Currently, DwarFS filesystem version 2.3..=2.5 is supported, 4 | //! which should be compatible with files generated by 5 | //! [upstream `mkdwarfs`][dwarfs] v0.5.0..=v0.12.4 (latest at the time of 6 | //! writing). Other versions may also be readable but are not guaranteed. 7 | //! 8 | //! [dwarfs]: https://github.com/mhx/dwarfs 9 | //! 10 | //! ``` 11 | //! use dwarfs::{Archive, ArchiveIndex, AsChunks}; 12 | //! use std::fs::File; 13 | //! 14 | //! # fn wrap() -> dwarfs::Result<()> { 15 | //! // Open an archive file and load the metadata of it. 16 | //! let file = File::open("./my.dwarfs")?; 17 | //! let (index, mut archive) = Archive::new(file)?; 18 | //! 19 | //! // Hierarchy traversal. 20 | //! for entry in index.root().entries() { 21 | //! let inode = entry.inode(); 22 | //! println!("/{} mode={}", entry.name(), inode.metadata().file_type_mode()); 23 | //! if let Some(deep) = inode.as_dir() { 24 | //! for entry in deep.entries() { 25 | //! // ... 26 | //! } 27 | //! } 28 | //! } 29 | //! 30 | //! // Resolve paths. 31 | //! let file: dwarfs::File = index.get_path(["src", "Cargo.toml"]) 32 | //! .expect("does not exist") 33 | //! .as_file() 34 | //! .expect("not a file"); 35 | //! // The simple way to read content. 36 | //! let bytes: Vec = file.read_to_vec(&mut archive)?; 37 | //! 38 | //! # Ok(()) } 39 | //! ``` 40 | //! 41 | //! ## Cargo features 42 | //! 43 | //! - `zstd`, `lzma`, `lz4` *(Only `zstd` is enabled by default)* 44 | //! 45 | //! Enable relevant decompression algorithm support. `zstd` is the default 46 | //! compression algorithm `mkdwarfs` uses and it should be enough for most cases. 47 | //! 48 | //! - `log` *(Enabled by default)* 49 | //! 50 | //! Enable trace-level logging and time measurement for internal events via 51 | //! [`log` crate][log]. Useful for profiling or debugging. Should not 52 | //! have performance penalty unless trace-level log is enabled. 53 | //! 54 | //! - `serialize` *(Disabled by default)* 55 | //! 56 | //! Enable serialization support for various structures. It enables: 57 | //! - [`metadata::Schema::to_bytes`] 58 | //! - [`metadata::Metadata::to_schema_and_bytes`] 59 | //! - [`fsst::to_bytes`] 60 | //! 61 | //! [log]: https://crates.io/crates/log 62 | #![cfg_attr(docsrs, feature(doc_auto_cfg))] 63 | #![forbid(unsafe_code)] 64 | #![warn(missing_debug_implementations)] 65 | #![warn(missing_docs)] 66 | 67 | #[cfg(feature = "log")] 68 | #[macro_use(trace_time)] 69 | extern crate measure_time; 70 | 71 | #[cfg(feature = "log")] 72 | #[macro_use(trace)] 73 | extern crate log; 74 | 75 | #[cfg(not(feature = "log"))] 76 | #[macro_use] 77 | mod macros { 78 | macro_rules! trace { 79 | ($($tt:tt)*) => { 80 | let _ = if false { 81 | let _ = ::std::format_args!($($tt)*); 82 | }; 83 | }; 84 | } 85 | 86 | macro_rules! trace_time { 87 | ($($tt:tt)*) => { 88 | trace!($($tt)*) 89 | }; 90 | } 91 | } 92 | 93 | macro_rules! bail { 94 | ($err:expr $(,)?) => { 95 | return Err(Into::into($err)) 96 | }; 97 | } 98 | 99 | pub mod archive; 100 | pub mod fsst; 101 | pub mod metadata; 102 | pub mod section; 103 | 104 | pub extern crate positioned_io; 105 | pub extern crate zerocopy; 106 | 107 | /// The range of filesystem version tuple `(major, minor)` supported by this library. 108 | /// 109 | /// Currently this is `(2, 3)..=(2, 5)`. 110 | // TODO: We could lower this. 111 | pub const SUPPORTED_VERSION_RANGE: std::ops::RangeInclusive<(u8, u8)> = (2, 3)..=(2, 5); 112 | 113 | #[doc(inline)] 114 | pub use archive::{ 115 | Archive, ArchiveIndex, AsChunks, Device, Dir, DirEntry, Error, File, Inode, InodeKind, 116 | InodeMetadata, Ipc, Result, Symlink, 117 | }; 118 | -------------------------------------------------------------------------------- /dwarfs/src/metadata.rs: -------------------------------------------------------------------------------- 1 | //! The low-level metadata structures and parsers. 2 | //! 3 | //! The parsed [`Metadata`] and [`Schema`] is given as-is from the underlying 4 | //! structure without additional modification. Notably, for `Metadata`, no 5 | //! unpacking is performed, no value validation is performed, and only binary 6 | //! syntax and structure are validated. 7 | //! 8 | //! For high-level access of the image hierarchy and content, use 9 | //! [`Archive`][crate::Archive] instead. 10 | //! 11 | //! See upstream documentations for the meaning of structs and fields: 12 | //! 13 | //! - Metadata definition: 14 | //! 15 | //! - Frozen schema definition: 16 | //! 17 | //! Typically, users should treat [`Schema`] as an opaque type, because the 18 | //! definition in this crate is specialized only for [`Metadata::parse`]. 19 | use std::{borrow::Borrow, fmt, marker::PhantomData, ops}; 20 | 21 | use bstr::BString; 22 | use serde::{Deserialize, Serialize, de}; 23 | 24 | mod de_frozen; 25 | mod de_thrift; 26 | 27 | #[cfg(feature = "serialize")] 28 | mod ser_frozen; 29 | #[cfg(feature = "serialize")] 30 | mod ser_thrift; 31 | 32 | #[cfg(test)] 33 | mod tests; 34 | 35 | type Result = std::result::Result; 36 | 37 | /// An error raised from parsing schema or metadata. 38 | #[derive(Debug)] 39 | pub struct Error(Box); 40 | 41 | impl fmt::Display for Error { 42 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 43 | self.0.fmt(f) 44 | } 45 | } 46 | 47 | impl std::error::Error for Error {} 48 | 49 | /// A dense map of i16 -> T, stored as `Vec>` for quick indexing. 50 | #[derive(Default, Clone, PartialEq, Eq, Hash)] 51 | pub struct DenseMap(pub Vec>); 52 | 53 | impl fmt::Debug for DenseMap { 54 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 55 | f.debug_map().entries(self.iter()).finish() 56 | } 57 | } 58 | 59 | impl<'de, T: de::Deserialize<'de>> de::Deserialize<'de> for DenseMap { 60 | fn deserialize>(de: D) -> Result { 61 | struct Visitor(PhantomData); 62 | 63 | impl<'de, T: de::Deserialize<'de>> de::Visitor<'de> for Visitor { 64 | type Value = DenseMap; 65 | 66 | fn expecting(&self, f: &mut fmt::Formatter) -> fmt::Result { 67 | f.write_str("a dense map") 68 | } 69 | 70 | fn visit_map(self, mut map: A) -> Result 71 | where 72 | A: de::MapAccess<'de>, 73 | { 74 | // Keys start at 1. 75 | let len = map.size_hint().unwrap_or(0) + 1; 76 | let mut vecmap = Vec::with_capacity(len); 77 | while let Some((k, v)) = map.next_entry::()? { 78 | let k = usize::try_from(k).map_err(|_| { 79 | de::Error::invalid_value( 80 | de::Unexpected::Signed(k.into()), 81 | &"an unsigned dense map key", 82 | ) 83 | })?; 84 | if vecmap.len() <= k { 85 | vecmap.resize_with(k + 1, || None); 86 | } 87 | vecmap[k] = Some(v); 88 | } 89 | Ok(DenseMap(vecmap)) 90 | } 91 | } 92 | 93 | de.deserialize_map(Visitor::(PhantomData)) 94 | } 95 | } 96 | 97 | impl Serialize for DenseMap { 98 | fn serialize(&self, ser: S) -> Result 99 | where 100 | S: serde::Serializer, 101 | { 102 | use serde::ser::SerializeMap; 103 | 104 | let size = self.iter().count(); 105 | let mut ser = ser.serialize_map(Some(size))?; 106 | for (k, v) in self.iter() { 107 | ser.serialize_entry(&k, v)?; 108 | } 109 | ser.end() 110 | } 111 | } 112 | 113 | impl ops::Index for DenseMap { 114 | type Output = T; 115 | 116 | fn index(&self, index: i16) -> &Self::Output { 117 | self.get(index).expect("index out of bound") 118 | } 119 | } 120 | 121 | impl DenseMap { 122 | fn is_empty(&self) -> bool { 123 | self.0.is_empty() 124 | } 125 | 126 | fn get(&self, i: i16) -> Option<&T> { 127 | self.0.get(usize::try_from(i).ok()?)?.as_ref() 128 | } 129 | 130 | fn iter(&self) -> impl Iterator + use<'_, T> { 131 | self.0 132 | .iter() 133 | .enumerate() 134 | .filter_map(|(k, v)| Some((k as i16, v.as_ref()?))) 135 | } 136 | } 137 | 138 | /// The Frozen schema. You should treat this type as opaque. 139 | /// 140 | /// See [module level documentation][self] for details. 141 | #[expect(missing_docs, reason = "users should check upstream docs")] 142 | #[derive(Debug, Default, Clone, PartialEq, Eq, Deserialize, Serialize)] 143 | #[non_exhaustive] 144 | pub struct Schema { 145 | // NB. Field order matters for ser/de impl. 146 | #[serde(default, skip_serializing_if = "is_default")] 147 | pub relax_type_checks: bool, 148 | pub layouts: DenseMap, 149 | #[serde(default, skip_serializing_if = "is_default")] 150 | pub root_layout: i16, 151 | #[serde(default, skip_serializing_if = "is_default")] 152 | pub file_version: i32, 153 | } 154 | 155 | /// You should treat this type as opaque. 156 | /// 157 | /// See [module level documentation][self] for details. 158 | #[expect(missing_docs, reason = "users should check upstream docs")] 159 | #[derive(Debug, Default, Clone, PartialEq, Eq, Hash, Deserialize, Serialize)] 160 | #[non_exhaustive] 161 | pub struct SchemaLayout { 162 | // NB. Field order matters for ser/de impl. 163 | #[serde(default, skip_serializing_if = "is_default")] 164 | pub size: i32, 165 | #[serde(default, skip_serializing_if = "is_default")] 166 | pub bits: i16, 167 | pub fields: DenseMap, 168 | pub type_name: String, 169 | } 170 | 171 | fn is_default(v: &T) -> bool { 172 | *v == T::default() 173 | } 174 | 175 | /// You should treat this type as opaque. 176 | /// 177 | /// See [module level documentation][self] for details. 178 | #[expect(missing_docs, reason = "users should check upstream docs")] 179 | #[derive(Debug, Default, Clone, PartialEq, Eq, Hash, Deserialize, Serialize)] 180 | #[non_exhaustive] 181 | pub struct SchemaField { 182 | // NB. Field order matters for ser/de impl. 183 | pub layout_id: i16, 184 | #[serde(default, skip_serializing_if = "is_default")] 185 | pub offset: i16, 186 | } 187 | 188 | impl SchemaField { 189 | fn offset_bits(&self) -> u16 { 190 | let o = self.offset; 191 | if o >= 0 { o as u16 * 8 } else { (-o) as u16 } 192 | } 193 | } 194 | 195 | impl Schema { 196 | /// Parse the schema from the on-disk serialized from 197 | /// ([`SectionType::METADATA_V2_SCHEMA`](crate::section::SectionType::METADATA_V2_SCHEMA)), 198 | /// 199 | /// The schema type and parser are specialized for [`Metadata::parse`]. It 200 | /// should not be used for Frozen schema of other data structures. 201 | /// 202 | /// # Errors 203 | /// 204 | /// Returns `Err` if fails to parse the input, or the parsed result fails 205 | /// basic invariant validations. Currently only index ranges are checked, 206 | /// the validated invariants may change in the future. 207 | pub fn parse(input: &[u8]) -> Result { 208 | let this = de_thrift::deserialize_struct::(input) 209 | .map_err(|err| Error(format!("failed to parse schema: {err}").into()))?; 210 | this.validate()?; 211 | Ok(this) 212 | } 213 | 214 | /// Serialize the schema to on-disk bytes, does the reverse of [`Schema::parse`]. 215 | /// 216 | /// The serialization format is not canonical and the result may change 217 | /// between versions of this library. It is not considered a breaking 218 | /// change but a minor change. 219 | /// 220 | /// # Properties 221 | /// 222 | /// - If `schema1 == schema2`, then `schema1.to_bytes()? == schema2.to_bytes()?` 223 | /// 224 | /// - `Schema::parse(schema.to_bytes()?)? == schema` 225 | /// 226 | /// - `Schema::parse(bytes)?.to_bytes() == bytes` may *NOT* hold. 227 | /// 228 | /// # Errors 229 | /// 230 | /// Returns `Err` if serialization fails. Currently this can happen on 231 | /// overly large collections whose length exceeds `i32::MAX`. 232 | #[cfg(feature = "serialize")] 233 | pub fn to_bytes(&self) -> Result> { 234 | ser_thrift::serialize_struct(self) 235 | .map_err(|err| Error(format!("failed to serialize schema: {err}").into())) 236 | } 237 | 238 | fn validate(&self) -> Result<()> { 239 | self.validate_inner() 240 | .map_err(|msg| Error(msg.into_boxed_str())) 241 | } 242 | 243 | fn validate_inner(&self) -> Result<(), String> { 244 | const FILE_VERSION: i32 = 1; 245 | 246 | if self.file_version != FILE_VERSION { 247 | bail!(format!( 248 | "unsupported schema file_version {:?}", 249 | self.file_version 250 | )); 251 | } 252 | if self.layouts.get(self.root_layout).is_none() { 253 | bail!("missing root_layout"); 254 | } 255 | 256 | for (layout_id, layout) in self.layouts.iter() { 257 | if layout.fields.is_empty() && layout.bits > 64 { 258 | bail!(format!( 259 | "layout {}: primitive type is too large to have {}bits", 260 | layout_id, layout.bits, 261 | )); 262 | } 263 | 264 | for (field_id, field) in layout.fields.iter() { 265 | (|| -> Result<(), &str> { 266 | let field_layout = self 267 | .layouts 268 | .get(field.layout_id) 269 | .ok_or("layout index out of range")?; 270 | let bit_offset = if field.offset >= 0 { 271 | field.offset.checked_mul(8) 272 | } else { 273 | field.offset.checked_neg() 274 | }; 275 | if field_layout.bits < 0 { 276 | bail!("layout bits cannot be negative"); 277 | } 278 | let bit_total_size = bit_offset 279 | .and_then(|off| (off as u16).checked_add(field_layout.bits as u16)); 280 | bit_total_size.ok_or("offset overflows")?; 281 | Ok(()) 282 | })() 283 | .map_err(|err| format!("field {field_id} of layout {layout_id}: {err}"))?; 284 | } 285 | } 286 | 287 | Ok(()) 288 | } 289 | } 290 | 291 | /// A wrapper of a `Vec` representing a ordered set of ascending `T`. 292 | #[derive(Default, Clone, PartialEq, Deserialize, Serialize)] 293 | #[serde(transparent)] 294 | pub struct OrderedSet(pub Vec); 295 | 296 | impl fmt::Debug for OrderedSet { 297 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 298 | f.debug_set().entries(&self.0).finish() 299 | } 300 | } 301 | 302 | impl OrderedSet { 303 | /// Returns the number of elements in the underlying `Vec`. 304 | #[must_use] 305 | #[inline] 306 | pub fn len(&self) -> usize { 307 | self.0.len() 308 | } 309 | 310 | /// Returns true if the underlying `Vec` contains no elements. 311 | #[must_use] 312 | #[inline] 313 | pub fn is_empty(&self) -> bool { 314 | self.0.is_empty() 315 | } 316 | 317 | /// Return true if the underlying `Vec` contains `value`. 318 | /// 319 | /// This uses binary search and the underlying `Vec` must be sorted by 320 | /// ascending `T`, otherwise, it will return an unspecified result but will 321 | /// not panic. 322 | #[must_use] 323 | pub fn contains(&self, value: &Q) -> bool 324 | where 325 | T: Borrow + Ord, 326 | Q: Ord + ?Sized, 327 | { 328 | self.0 329 | .binary_search_by(|probe| Ord::cmp(probe.borrow(), value)) 330 | .is_ok() 331 | } 332 | } 333 | 334 | /// A wrapper of a `Vec<(K, V)>` representing a ordered map of ascending key `K`. 335 | #[derive(Default, Clone, PartialEq)] 336 | pub struct OrderedMap(pub Vec<(K, V)>); 337 | 338 | impl fmt::Debug for OrderedMap { 339 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 340 | f.debug_map() 341 | .entries(self.0.iter().map(|(k, v)| (k, v))) 342 | .finish() 343 | } 344 | } 345 | 346 | impl<'de, K: Deserialize<'de>, V: Deserialize<'de>> Deserialize<'de> for OrderedMap { 347 | fn deserialize(de: D) -> std::result::Result 348 | where 349 | D: de::Deserializer<'de>, 350 | { 351 | struct Visitor(PhantomData<(K, V)>); 352 | 353 | impl<'de, K: Deserialize<'de>, V: Deserialize<'de>> de::Visitor<'de> for Visitor { 354 | type Value = OrderedMap; 355 | 356 | fn expecting(&self, f: &mut fmt::Formatter) -> fmt::Result { 357 | f.write_str("a map") 358 | } 359 | 360 | fn visit_map(self, mut map: A) -> Result 361 | where 362 | A: de::MapAccess<'de>, 363 | { 364 | let mut v = Vec::with_capacity(map.size_hint().unwrap_or(0)); 365 | while let Some(pair) = map.next_entry()? { 366 | v.push(pair); 367 | } 368 | Ok(OrderedMap(v)) 369 | } 370 | } 371 | 372 | de.deserialize_map(Visitor::(PhantomData)) 373 | } 374 | } 375 | 376 | impl Serialize for OrderedMap { 377 | fn serialize(&self, ser: S) -> Result 378 | where 379 | S: serde::Serializer, 380 | { 381 | ser.collect_map(self.0.iter().map(|(k, v)| (k, v))) 382 | } 383 | } 384 | 385 | impl OrderedMap { 386 | /// Returns the number of elements in the underlying `Vec`. 387 | #[must_use] 388 | #[inline] 389 | pub fn len(&self) -> usize { 390 | self.0.len() 391 | } 392 | 393 | /// Returns true if the underlying `Vec` contains no elements. 394 | #[must_use] 395 | #[inline] 396 | pub fn is_empty(&self) -> bool { 397 | self.0.is_empty() 398 | } 399 | 400 | /// Search and get the `value` corresponding to `key` in the map. 401 | /// 402 | /// This uses binary search and the underlying `Vec` must be sorted by 403 | /// ascending `K`, otherwise, it will return an unspecified result but will 404 | /// not panic. 405 | #[must_use] 406 | pub fn get(&self, key: &Q) -> Option<&V> 407 | where 408 | K: Borrow + Ord, 409 | Q: Ord + ?Sized, 410 | { 411 | let i = self 412 | .0 413 | .binary_search_by(|(probe, _)| Ord::cmp(probe.borrow(), key)) 414 | .ok()?; 415 | Some(&self.0[i].1) 416 | } 417 | } 418 | 419 | impl Metadata { 420 | /// Parse the metadata from on-disk serialized form 421 | /// ([`SectionType::METADATA_V2`](crate::section::SectionType::METADATA_V2)), 422 | /// using layout defined by the given schema. 423 | /// 424 | /// # Errors 425 | /// 426 | /// Returns `Err` if fails to deserialize. This can happen on invalid input 427 | /// bytes, invalid `schema`, length overflows and etc. 428 | /// 429 | /// Since only structures but not values are checked, this method may 430 | /// optmisticly accept some "semantically invalid" `Metadata`. 431 | /// The tolorence on invalid parts may change in the future. 432 | pub fn parse(schema: &Schema, bytes: &[u8]) -> Result { 433 | de_frozen::deserialize(schema, bytes) 434 | .map_err(|err| Error(format!("failed to parse metadata: {err}").into())) 435 | } 436 | 437 | /// Serialize the metadata to on-disk bytes, does the reverse of [`Metadata::parse`]. 438 | /// 439 | /// The serialization format is not canonical and the result may change 440 | /// between versions of this library. It is not considered a breaking 441 | /// change but a minor change. 442 | /// 443 | /// # Properties 444 | /// 445 | /// - If `meta1 == meta2`, then `meta1.to_schema_and_bytes()? == meta2.to_schema_and_bytesto_bytes()?` 446 | /// 447 | /// - `let (schema, bytes) = meta.to_schema_and_bytes()?; meta == Metadata::parse(&schema, &bytes)?` 448 | /// 449 | /// # Errors 450 | /// 451 | /// Returns `Err` if serialization fails. Currently this can happen on 452 | /// overly large collections whose length exceeds `i32::MAX`. 453 | #[cfg(feature = "serialize")] 454 | pub fn to_schema_and_bytes(&self) -> Result<(Schema, Vec)> { 455 | ser_frozen::serialize_struct(self) 456 | .map_err(|err| Error(format!("failed to serialize metadata: {err}").into())) 457 | } 458 | } 459 | 460 | /// See [module level documentation][self] for details. 461 | #[expect(missing_docs, reason = "users should check upstream docs")] 462 | #[derive(Default, Debug, Clone, PartialEq, Deserialize, Serialize)] 463 | #[non_exhaustive] 464 | #[serde(default)] 465 | pub struct Metadata { 466 | // NB. Field order matters for ser/de impl. 467 | // #1 468 | pub chunks: Vec, 469 | pub directories: Vec, 470 | pub inodes: Vec, 471 | pub chunk_table: Vec, 472 | #[deprecated = "deprecated since DwarFS 2.3"] 473 | pub entry_table: Vec, 474 | pub symlink_table: Vec, 475 | pub uids: Vec, 476 | pub gids: Vec, 477 | pub modes: Vec, 478 | pub names: Vec, 479 | pub symlinks: Vec, 480 | pub timestamp_base: u64, 481 | 482 | // #13 483 | pub chunk_inode_offset: u32, 484 | pub link_inode_offset: u32, 485 | 486 | // #15 487 | pub block_size: u32, 488 | pub total_fs_size: u64, 489 | 490 | // #17 491 | pub devices: Option>, 492 | pub options: Option, 493 | 494 | // #19 495 | pub dir_entries: Option>, 496 | pub shared_files_table: Option>, 497 | pub total_hardlink_size: Option, 498 | pub dwarfs_version: Option, 499 | pub create_timestamp: Option, 500 | pub compact_names: Option, 501 | pub compact_symlinks: Option, 502 | 503 | // #26 504 | pub preferred_path_separator: Option, 505 | pub features: Option>, 506 | pub category_names: Option>, 507 | pub block_categories: Option>, 508 | pub reg_file_size_cache: Option, 509 | } 510 | 511 | /// See [module level documentation][self] for details. 512 | #[expect(missing_docs, reason = "users should check upstream docs")] 513 | #[derive(Default, Debug, Clone, PartialEq, Deserialize, Serialize)] 514 | #[non_exhaustive] 515 | #[serde(default)] 516 | pub struct Chunk { 517 | // NB. Field order matters for ser/de impl. 518 | pub block: u32, 519 | pub offset: u32, 520 | pub size: u32, 521 | } 522 | 523 | /// See [module level documentation][self] for details. 524 | #[expect(missing_docs, reason = "users should check upstream docs")] 525 | #[derive(Default, Debug, Clone, PartialEq, Deserialize, Serialize)] 526 | #[non_exhaustive] 527 | #[serde(default)] 528 | pub struct Directory { 529 | // NB. Field order matters for ser/de impl. 530 | pub parent_entry: u32, 531 | pub first_entry: u32, 532 | pub self_entry: u32, 533 | } 534 | 535 | /// See [module level documentation][self] for details. 536 | #[expect(missing_docs, reason = "users should check upstream docs")] 537 | #[derive(Default, Debug, Clone, PartialEq, Deserialize, Serialize)] 538 | #[non_exhaustive] 539 | #[serde(default)] 540 | pub struct InodeData { 541 | // NB. Field order matters for ser/de impl. 542 | #[deprecated = "deprecated since DwarFS 2.3"] 543 | pub name_index: u32, 544 | pub mode_index: u32, 545 | #[deprecated = "deprecated since DwarFS 2.3"] 546 | pub inode: u32, 547 | pub owner_index: u32, 548 | pub group_index: u32, 549 | pub atime_offset: u32, 550 | pub mtime_offset: u32, 551 | pub ctime_offset: u32, 552 | } 553 | 554 | /// See [module level documentation][self] for details. 555 | #[expect(missing_docs, reason = "users should check upstream docs")] 556 | #[derive(Default, Debug, Clone, PartialEq, Deserialize, Serialize)] 557 | #[non_exhaustive] 558 | #[serde(default)] 559 | pub struct DirEntry { 560 | // NB. Field order matters for ser/de impl. 561 | pub name_index: u32, 562 | pub inode_num: u32, 563 | } 564 | 565 | /// See [module level documentation][self] for details. 566 | #[expect(missing_docs, reason = "users should check upstream docs")] 567 | #[expect(clippy::struct_excessive_bools, reason = "follows upstream")] 568 | #[derive(Default, Debug, Clone, PartialEq, Deserialize, Serialize)] 569 | #[non_exhaustive] 570 | #[serde(default)] 571 | pub struct FsOptions { 572 | // NB. Field order matters for ser/de impl. 573 | pub mtime_only: bool, 574 | pub time_resolution_sec: Option, 575 | pub packed_chunk_table: bool, 576 | pub packed_directories: bool, 577 | pub packed_shared_files_table: bool, 578 | } 579 | 580 | /// See [module level documentation][self] for details. 581 | #[expect(missing_docs, reason = "users should check upstream docs")] 582 | #[derive(Default, Debug, Clone, PartialEq, Deserialize, Serialize)] 583 | #[non_exhaustive] 584 | #[serde(default)] 585 | pub struct StringTable { 586 | // NB. Field order matters for ser/de impl. 587 | pub buffer: BString, 588 | pub symtab: Option, 589 | pub index: Vec, 590 | pub packed_index: bool, 591 | } 592 | 593 | /// See [module level documentation][self] for details. 594 | #[expect(missing_docs, reason = "users should check upstream docs")] 595 | #[derive(Default, Debug, Clone, PartialEq, Deserialize, Serialize)] 596 | #[non_exhaustive] 597 | #[serde(default)] 598 | pub struct InodeSizeCache { 599 | // NB. Field order matters for ser/de impl. 600 | pub lookup: OrderedMap, 601 | pub min_chunk_count: u64, 602 | } 603 | -------------------------------------------------------------------------------- /dwarfs/src/metadata/de_frozen.rs: -------------------------------------------------------------------------------- 1 | //! fbthrift's Frozen2 format, a bit-compressed compact format that has 2 | //! nothing to do with Thrift. 3 | //! 4 | //! Here we use serde for structure deserialization. The metadata is expected to 5 | //! be fully unpacked in memory for performance, thus it makes little sense to 6 | //! lazily parse it, except seeking for more trouble on invalid unread data. 7 | //! 8 | //! Source: 9 | //! 10 | //! There is almost no documentation about this format. The details are mostly from: 11 | //! - Helps from Marcus Holland-Moritz , who wrote some explanation 12 | //! and examples: 13 | //! 14 | //! 15 | //! - Me (oxalica) reverse engineering bytes layouts in DwarFS metadata block, and 16 | //! comparing with the metadata dump from: 17 | //! `dwarfsck $imgfile -d metadata_full_dump` 18 | 19 | use serde::{de, forward_to_deserialize_any}; 20 | 21 | use super::{Schema, SchemaLayout}; 22 | 23 | type Result = std::result::Result; 24 | type Error = de::value::Error; 25 | 26 | /// The offset type we use to index into metadata bytes. 27 | /// 28 | /// We expect metadata to be relatively small comparing to the actual data and 29 | /// it's efficiently bit-packed. Assume 4GiB is enough for it. 30 | pub(crate) type Offset = u32; 31 | 32 | // Assert that offset -> usize never overflows. 33 | fn to_usize(offset: Offset) -> usize { 34 | const _: () = assert!(size_of::() <= size_of::()); 35 | offset as usize 36 | } 37 | 38 | pub(crate) fn deserialize(schema: &Schema, bytes: &[u8]) -> Result { 39 | let root_layout = schema.layouts.get(schema.root_layout).expect("validated"); 40 | let de = Deserializer { 41 | src: &Source { schema, bytes }, 42 | layout: Some(root_layout), 43 | bit_offset: 0, 44 | storage_start: 0, 45 | }; 46 | T::deserialize(de) 47 | } 48 | 49 | /// The input raw bytes with attached schema. 50 | #[derive(Clone, Copy)] 51 | struct Source<'a> { 52 | schema: &'a Schema, 53 | bytes: &'a [u8], 54 | } 55 | 56 | impl Source<'_> { 57 | /// Load 1 bits at `base_bit`, using little-endian. 58 | /// 59 | /// This assumes the input is in bound. Validation should be done on structs. 60 | fn load_bit(&self, base_bit: Offset) -> Result { 61 | let (byte_idx, bit_idx) = (to_usize(base_bit) / 8, base_bit % 8); 62 | let b = *self 63 | .bytes 64 | .get(byte_idx) 65 | .ok_or_else(|| de::Error::custom("bit location overflow"))?; 66 | Ok((b >> bit_idx) & 1 != 0) 67 | } 68 | 69 | /// Load `bits` bits starting at `base_bit`, using little-endian, 70 | /// fill upper bits as 0. 71 | /// 72 | /// This assumes the input is in bound. Validation should be done on structs. 73 | fn load_bits(&self, base_bit: Offset, bits: u16) -> Result { 74 | // Already checked by schema validation. 75 | debug_assert!(bits > 0); 76 | debug_assert!(bits <= 64); 77 | let (byte_idx, bit_start) = (to_usize(base_bit) / 8, base_bit as u16 % 8); 78 | let last_byte_idx = (base_bit + Offset::from(bits) - 1) / 8; 79 | if to_usize(last_byte_idx) >= self.bytes.len() { 80 | return Err(de::Error::custom("bits location overflow")); 81 | } 82 | 83 | // Always load a 8-byte chunk for performance. 84 | let rest = &self.bytes[byte_idx..]; 85 | let x = if rest.len() >= 8 { 86 | u64::from_le_bytes(rest[..8].try_into().unwrap()) 87 | } else { 88 | let mut buf = [0u8; 8]; 89 | buf[..rest.len()].copy_from_slice(rest); 90 | u64::from_le_bytes(buf) 91 | }; 92 | 93 | let start_and_bits = bit_start + bits; 94 | Ok(if start_and_bits <= 64 { 95 | // Simple case: 96 | // Bit | 63, 62, ... 1, 0 | 97 | // |up_bits| bits | bit_start | 98 | // ~~~~~~~~~ target 99 | x << (64 - start_and_bits) >> (64 - bits) 100 | } else { 101 | // Overshooting case: 102 | // Bit | 71 .. 64 | 63, 62, ... 1, 0 | 103 | // | | bits | bit_start | 104 | // ~~~~~~~~~~~~~~~~~~~~~~ target 105 | 106 | // We need the 9-th (idx=8) byte. This can only happen if bits >= 56. 107 | let overshooting_bits = start_and_bits & 63; 108 | let hi = u64::from(rest[8]); 109 | x >> bit_start | hi << (64 - overshooting_bits) >> (64 - bits) 110 | }) 111 | } 112 | } 113 | 114 | #[derive(Clone, Copy)] 115 | struct Deserializer<'a, 'de> { 116 | src: &'a Source<'de>, 117 | layout: Option<&'de SchemaLayout>, 118 | bit_offset: Offset, 119 | storage_start: Offset, 120 | } 121 | 122 | impl<'de> Deserializer<'_, 'de> { 123 | fn field_deserializer(&self, i: i16) -> Self { 124 | let (layout, offset_bits) = if let Some(field) = self.layout.and_then(|l| l.fields.get(i)) { 125 | ( 126 | self.src.schema.layouts.get(field.layout_id), 127 | field.offset_bits(), 128 | ) 129 | } else { 130 | (None, 0) 131 | }; 132 | Self { 133 | src: self.src, 134 | layout, 135 | bit_offset: self.bit_offset + Offset::from(offset_bits), 136 | storage_start: self.storage_start, 137 | } 138 | } 139 | 140 | fn deserialize_field>(&self, i: i16) -> Result { 141 | de::Deserialize::deserialize(self.field_deserializer(i)) 142 | } 143 | } 144 | 145 | impl<'de> de::Deserializer<'de> for Deserializer<'_, 'de> { 146 | type Error = Error; 147 | 148 | fn is_human_readable(&self) -> bool { 149 | false 150 | } 151 | 152 | fn deserialize_any(self, _visitor: V) -> Result 153 | where 154 | V: de::Visitor<'de>, 155 | { 156 | // Not used. 157 | unimplemented!() 158 | } 159 | 160 | fn deserialize_bool(self, visitor: V) -> Result 161 | where 162 | V: de::Visitor<'de>, 163 | { 164 | let b = self.layout.is_some() 165 | && self 166 | .src 167 | .load_bit(self.storage_start * 8 + self.bit_offset)?; 168 | visitor.visit_bool(b) 169 | } 170 | 171 | fn deserialize_u32(self, visitor: V) -> Result 172 | where 173 | V: de::Visitor<'de>, 174 | { 175 | self.deserialize_u64(visitor) 176 | } 177 | 178 | fn deserialize_u64(self, visitor: V) -> Result 179 | where 180 | V: de::Visitor<'de>, 181 | { 182 | let Some(layout) = self.layout else { 183 | return visitor.visit_u64(0); 184 | }; 185 | if !layout.fields.is_empty() { 186 | return Err(de::Error::invalid_type( 187 | de::Unexpected::Other("a schema layout with some fields"), 188 | &"an unsigned integer", 189 | )); 190 | } 191 | let bits = layout.bits; 192 | if !(0..=64).contains(&bits) { 193 | return Err(de::Error::custom("too many bits for an unsigned int")); 194 | } 195 | visitor.visit_u64( 196 | self.src 197 | .load_bits(self.storage_start * 8 + self.bit_offset, bits as u16)?, 198 | ) 199 | } 200 | 201 | fn deserialize_byte_buf(self, visitor: V) -> Result 202 | where 203 | V: de::Visitor<'de>, 204 | { 205 | self.deserialize_bytes(visitor) 206 | } 207 | 208 | fn deserialize_bytes(self, visitor: V) -> Result 209 | where 210 | V: de::Visitor<'de>, 211 | { 212 | let distance = self.deserialize_field::(1)?; 213 | let len = self.deserialize_field::(2)?; 214 | 215 | let content = (|| { 216 | let start = self.storage_start.checked_add(distance)?; 217 | let end = start.checked_add(len)?; 218 | self.src 219 | .bytes 220 | .get(usize::try_from(start).ok()?..usize::try_from(end).ok()?) 221 | })() 222 | .ok_or_else(|| ::custom("string offset or length overflow"))?; 223 | visitor.visit_borrowed_bytes(content) 224 | } 225 | 226 | fn deserialize_seq(self, visitor: V) -> Result 227 | where 228 | V: de::Visitor<'de>, 229 | { 230 | let distance = self.deserialize_field::(1)?; 231 | let len = self.deserialize_field::(2)?; 232 | let elem_layout = self.layout.and_then(|l| { 233 | let id = l.fields.get(3)?.layout_id; 234 | Some(self.src.schema.layouts.get(id).expect("validated")) 235 | }); 236 | visitor.visit_seq(CollectionDeserializer { 237 | elem_de: Self { 238 | src: self.src, 239 | layout: elem_layout, 240 | bit_offset: 0, 241 | storage_start: self.storage_start + distance, 242 | }, 243 | len, 244 | }) 245 | } 246 | 247 | fn deserialize_map(self, visitor: V) -> Result 248 | where 249 | V: de::Visitor<'de>, 250 | { 251 | let distance = self.deserialize_field::(1)?; 252 | let len = self.deserialize_field::(2)?; 253 | let elem_layout = self.layout.and_then(|l| { 254 | let id = l.fields.get(3)?.layout_id; 255 | Some(self.src.schema.layouts.get(id).expect("validated")) 256 | }); 257 | visitor.visit_map(CollectionDeserializer { 258 | elem_de: Self { 259 | src: self.src, 260 | layout: elem_layout, 261 | bit_offset: 0, 262 | storage_start: self.storage_start + distance, 263 | }, 264 | len, 265 | }) 266 | } 267 | 268 | fn deserialize_option(self, visitor: V) -> Result 269 | where 270 | V: de::Visitor<'de>, 271 | { 272 | if !self.deserialize_field::(1)? { 273 | return visitor.visit_none(); 274 | } 275 | visitor.visit_some(self.field_deserializer(2)) 276 | } 277 | 278 | fn deserialize_struct( 279 | self, 280 | _name: &'static str, 281 | _fields: &'static [&'static str], 282 | visitor: V, 283 | ) -> Result 284 | where 285 | V: de::Visitor<'de>, 286 | { 287 | visitor.visit_map(StructDeserializer { 288 | de: self, 289 | field_id: 0, 290 | }) 291 | } 292 | 293 | fn deserialize_ignored_any(self, visitor: V) -> Result 294 | where 295 | V: de::Visitor<'de>, 296 | { 297 | visitor.visit_unit() 298 | } 299 | 300 | forward_to_deserialize_any! { 301 | i8 i16 i32 i64 i128 u8 u16 u128 f32 f64 char str string 302 | unit unit_struct newtype_struct tuple 303 | tuple_struct enum identifier 304 | } 305 | } 306 | 307 | struct StructDeserializer<'i, 'de> { 308 | de: Deserializer<'i, 'de>, 309 | field_id: usize, 310 | } 311 | 312 | impl<'de> de::MapAccess<'de> for StructDeserializer<'_, 'de> { 313 | type Error = Error; 314 | 315 | fn next_key_seed(&mut self, seed: K) -> Result> 316 | where 317 | K: de::DeserializeSeed<'de>, 318 | { 319 | let Some(layout) = self.de.layout else { 320 | return Ok(None); 321 | }; 322 | 323 | let fields = &layout.fields.0; 324 | while self.field_id < fields.len() { 325 | if fields[self.field_id].is_some() { 326 | // Map 1.. to 0.. for serde. 327 | let serde_field_id = self.field_id as u64 - 1; 328 | return seed 329 | .deserialize(de::value::U64Deserializer::new(serde_field_id)) 330 | .map(Some); 331 | } 332 | self.field_id += 1; 333 | } 334 | Ok(None) 335 | } 336 | 337 | fn next_value_seed(&mut self, seed: V) -> Result 338 | where 339 | V: de::DeserializeSeed<'de>, 340 | { 341 | self.field_id += 1; 342 | seed.deserialize(self.de.field_deserializer(self.field_id as i16 - 1)) 343 | } 344 | } 345 | 346 | struct CollectionDeserializer<'a, 'de> { 347 | elem_de: Deserializer<'a, 'de>, 348 | len: u32, 349 | } 350 | 351 | impl<'de> de::SeqAccess<'de> for CollectionDeserializer<'_, 'de> { 352 | type Error = Error; 353 | 354 | fn size_hint(&self) -> Option { 355 | self.len.try_into().ok() 356 | } 357 | 358 | fn next_element_seed(&mut self, seed: T) -> Result> 359 | where 360 | T: de::DeserializeSeed<'de>, 361 | { 362 | if self.len == 0 { 363 | return Ok(None); 364 | } 365 | 366 | let ret = seed.deserialize(self.elem_de); 367 | self.len -= 1; 368 | if let Some(layout) = self.elem_de.layout { 369 | self.elem_de.bit_offset += layout.bits as Offset; 370 | } 371 | ret.map(Some) 372 | } 373 | } 374 | 375 | impl<'de> de::MapAccess<'de> for CollectionDeserializer<'_, 'de> { 376 | type Error = Error; 377 | 378 | fn size_hint(&self) -> Option { 379 | self.len.try_into().ok() 380 | } 381 | 382 | fn next_key_seed(&mut self, seed: K) -> Result> 383 | where 384 | K: de::DeserializeSeed<'de>, 385 | { 386 | if self.len == 0 { 387 | return Ok(None); 388 | } 389 | self.len -= 1; 390 | 391 | seed.deserialize(self.elem_de.field_deserializer(1)) 392 | .map(Some) 393 | } 394 | 395 | fn next_value_seed(&mut self, seed: V) -> Result 396 | where 397 | V: de::DeserializeSeed<'de>, 398 | { 399 | let ret = seed.deserialize(self.elem_de.field_deserializer(2)); 400 | if let Some(layout) = self.elem_de.layout { 401 | self.elem_de.bit_offset += layout.bits as Offset; 402 | } 403 | ret 404 | } 405 | } 406 | -------------------------------------------------------------------------------- /dwarfs/src/metadata/de_thrift.rs: -------------------------------------------------------------------------------- 1 | //! Dwarven thrift, with fbthrift flavor. 2 | //! 3 | //! This implements just enough features to handle DwarFS schema type (Frozen 2 schema). 4 | //! It is and will never be standard compliant. 5 | //! Supported types: struct, map, string, bool, i16, i32, u32 (map/string length). 6 | //! 7 | //! Currently it will reject unsupported types thus is not future-proof. I'm not 8 | //! expecting it to change in the near future and DwarFS, as an on disk format, 9 | //! should not eagerly update its Frozen dependency. 10 | //! 11 | //! Frozen 2 schema: 12 | //! 13 | //! Thrift-compact spec: 14 | //! NB. fbthrift has different handling of varints which seems to be not incompatible with Apache Thrift. 15 | use serde::{de, forward_to_deserialize_any}; 16 | 17 | type Result = std::result::Result; 18 | type Error = de::value::Error; 19 | 20 | pub(crate) fn deserialize_struct(input: &[u8]) -> Result { 21 | let mut de = ValueDeserializer { 22 | rest: input, 23 | typ: Tag::Struct, 24 | }; 25 | let v = T::deserialize(&mut de)?; 26 | if !de.rest.is_empty() { 27 | return Err(de::Error::custom(format_args!( 28 | "unexpected trailing bytes at {}", 29 | input.len() - de.rest.len(), 30 | ))); 31 | } 32 | Ok(v) 33 | } 34 | 35 | #[derive(Debug, Clone, Copy, PartialEq, Eq)] 36 | #[repr(u8)] 37 | pub(crate) enum Tag { 38 | BoolTrue = 1, 39 | BoolFalse = 2, 40 | I16 = 4, 41 | I32 = 5, 42 | Binary = 8, 43 | Map = 11, 44 | Struct = 12, 45 | 46 | // Pseudo tags. 47 | UnknownBool = 0, 48 | Invalid = 15, 49 | } 50 | 51 | impl Tag { 52 | fn without_inline_bool(self) -> Self { 53 | if let Self::BoolTrue | Self::BoolFalse = self { 54 | Self::UnknownBool 55 | } else { 56 | self 57 | } 58 | } 59 | } 60 | 61 | impl TryFrom for Tag { 62 | type Error = Error; 63 | 64 | fn try_from(typ: u8) -> Result { 65 | Ok(match typ { 66 | 1 => Tag::BoolTrue, 67 | 2 => Tag::BoolFalse, 68 | // 3: i8 69 | 4 => Tag::I16, 70 | 5 => Tag::I32, 71 | // 6: i64 72 | // 7: double 73 | 8 => Tag::Binary, 74 | // 9: list 75 | // 10: set 76 | 11 => Tag::Map, 77 | 12 => Tag::Struct, 78 | // 13: float 79 | _ => { 80 | return Err(de::Error::custom(format_args!( 81 | "invalid or unsupported type tag: {typ:#x}" 82 | ))); 83 | } 84 | }) 85 | } 86 | } 87 | 88 | struct ValueDeserializer<'de> { 89 | rest: &'de [u8], 90 | typ: Tag, 91 | } 92 | 93 | impl<'de> ValueDeserializer<'de> { 94 | fn eat_byte(&mut self) -> Result { 95 | let (&fst, rest) = self 96 | .rest 97 | .split_first() 98 | .ok_or_else(|| de::Error::custom("unexpected EOF"))?; 99 | self.rest = rest; 100 | Ok(fst) 101 | } 102 | 103 | fn eat_varint(&mut self) -> Result { 104 | let mut x = 0u32; 105 | for i in 0..5 { 106 | let b = self.eat_byte()?; 107 | x += u32::from(b & 0x7F) << (i * 7); 108 | if b & 0x80 == 0 { 109 | return Ok(x); 110 | } 111 | } 112 | Err(de::Error::custom("encoded varint is too long")) 113 | } 114 | 115 | fn eat_zigzag(&mut self) -> Result { 116 | let x = self.eat_varint()?; 117 | Ok((x >> 1) as i32 ^ -(x as i32 & 1)) 118 | } 119 | } 120 | 121 | impl<'de> de::Deserializer<'de> for &mut ValueDeserializer<'de> { 122 | type Error = Error; 123 | 124 | fn is_human_readable(&self) -> bool { 125 | false 126 | } 127 | 128 | fn deserialize_any(self, visitor: V) -> Result 129 | where 130 | V: de::Visitor<'de>, 131 | { 132 | match self.typ { 133 | Tag::UnknownBool => visitor.visit_bool(match self.eat_byte()? { 134 | 1 => true, 135 | 2 => false, 136 | x => { 137 | return Err(de::Error::custom(format_args!( 138 | "invalid value for bool: {x:#x}" 139 | ))); 140 | } 141 | }), 142 | Tag::BoolTrue => visitor.visit_bool(true), 143 | Tag::BoolFalse => visitor.visit_bool(false), 144 | Tag::I16 | Tag::I32 => visitor.visit_i32(self.eat_zigzag()?), 145 | Tag::Binary => { 146 | let len = self.eat_varint()?; 147 | // If overflows, it will fail on next slicing anyway. 148 | let len = usize::try_from(len).unwrap_or(usize::MAX); 149 | let (data, rest) = self 150 | .rest 151 | .split_at_checked(len) 152 | .ok_or_else(|| de::Error::custom("input data is too short"))?; 153 | self.rest = rest; 154 | visitor.visit_borrowed_bytes(data) 155 | } 156 | Tag::Map => { 157 | let len = self.eat_varint()?; 158 | let (ktype, vtype) = if len == 0 { 159 | (Tag::Invalid, Tag::Invalid) 160 | } else { 161 | let typ = self.eat_byte()?; 162 | let ktype = Tag::try_from(typ >> 4)?.without_inline_bool(); 163 | let vtype = Tag::try_from(typ & 0xF)?.without_inline_bool(); 164 | (ktype, vtype) 165 | }; 166 | visitor.visit_map(MapDeserializer { 167 | de: self, 168 | len, 169 | ktype, 170 | vtype, 171 | }) 172 | } 173 | Tag::Struct => visitor.visit_map(StructDeserializer { 174 | de: self, 175 | field_id: 0, 176 | value_type: Tag::Invalid, 177 | }), 178 | 179 | Tag::Invalid => unreachable!(), 180 | } 181 | } 182 | 183 | forward_to_deserialize_any! { 184 | bool i8 i16 i32 i64 i128 u8 u16 u32 u64 u128 f32 f64 char str string 185 | bytes byte_buf option unit unit_struct newtype_struct seq tuple 186 | tuple_struct map struct enum identifier ignored_any 187 | } 188 | } 189 | 190 | struct StructDeserializer<'a, 'de> { 191 | de: &'a mut ValueDeserializer<'de>, 192 | field_id: i16, 193 | value_type: Tag, 194 | } 195 | 196 | impl<'de> de::MapAccess<'de> for StructDeserializer<'_, 'de> { 197 | type Error = Error; 198 | 199 | fn next_key_seed(&mut self, seed: K) -> Result> 200 | where 201 | K: de::DeserializeSeed<'de>, 202 | { 203 | let b = self.de.eat_byte()?; 204 | if b == 0 { 205 | return Ok(None); 206 | } 207 | 208 | let id_delta = i16::from(b >> 4); 209 | self.field_id = if id_delta != 0 { 210 | self.field_id.checked_add(id_delta) 211 | } else { 212 | i16::try_from(self.de.eat_zigzag()?).ok() 213 | } 214 | .ok_or_else(|| de::Error::custom("field id overflow"))?; 215 | 216 | self.value_type = Tag::try_from(b & 0xF)?; 217 | 218 | // Map range 1.. to 0.. for serde. 219 | let field_id = (self.field_id - 1) as u64; 220 | seed.deserialize(de::value::U64Deserializer::new(field_id)) 221 | .map(Some) 222 | } 223 | 224 | fn next_value_seed(&mut self, seed: V) -> Result 225 | where 226 | V: de::DeserializeSeed<'de>, 227 | { 228 | let prev_typ = std::mem::replace(&mut self.de.typ, self.value_type); 229 | let v = seed.deserialize(&mut *self.de); 230 | self.de.typ = prev_typ; 231 | v 232 | } 233 | } 234 | 235 | struct MapDeserializer<'a, 'de> { 236 | de: &'a mut ValueDeserializer<'de>, 237 | len: u32, 238 | ktype: Tag, 239 | vtype: Tag, 240 | } 241 | 242 | impl<'de> de::MapAccess<'de> for MapDeserializer<'_, 'de> { 243 | type Error = Error; 244 | 245 | fn size_hint(&self) -> Option { 246 | usize::try_from(self.len).ok() 247 | } 248 | 249 | fn next_key_seed(&mut self, seed: K) -> Result> 250 | where 251 | K: de::DeserializeSeed<'de>, 252 | { 253 | if self.len == 0 { 254 | return Ok(None); 255 | } 256 | self.len -= 1; 257 | 258 | let prev_typ = std::mem::replace(&mut self.de.typ, self.ktype); 259 | let k = seed.deserialize(&mut *self.de); 260 | self.de.typ = prev_typ; 261 | k.map(Some) 262 | } 263 | 264 | fn next_value_seed(&mut self, seed: V) -> Result 265 | where 266 | V: de::DeserializeSeed<'de>, 267 | { 268 | let prev_typ = std::mem::replace(&mut self.de.typ, self.vtype); 269 | let v = seed.deserialize(&mut *self.de); 270 | self.de.typ = prev_typ; 271 | v 272 | } 273 | } 274 | -------------------------------------------------------------------------------- /dwarfs/src/metadata/ser_thrift.rs: -------------------------------------------------------------------------------- 1 | //! The minimal serialization support for minithrift, 2 | //! specialized for DwarFS schema type only. 3 | //! 4 | //! See [`super::de_thrift`] for more details. 5 | use serde::{de, ser}; 6 | 7 | use super::de_thrift::Tag; 8 | 9 | type Result = std::result::Result; 10 | type Error = de::value::Error; 11 | 12 | pub(crate) fn serialize_struct(input: &T) -> Result> { 13 | // TODO: Set a good default capacity here. 14 | let mut out = Vec::new(); 15 | input.serialize(ValueSerializer { 16 | w: &mut out, 17 | inline_bool: false, 18 | })?; 19 | Ok(out) 20 | } 21 | 22 | pub(crate) struct ValueSerializer<'w> { 23 | w: &'w mut Vec, 24 | inline_bool: bool, 25 | } 26 | 27 | impl ValueSerializer<'_> { 28 | fn write_varint(&mut self, mut v: u32) { 29 | loop { 30 | let more = v >> 7; 31 | let has_more = more > 0; 32 | self.w.push((v as u8 & 0x7F) | ((has_more as u8) << 7)); 33 | v = more; 34 | if !has_more { 35 | break; 36 | } 37 | } 38 | } 39 | 40 | fn write_zigzag(&mut self, v: i32) { 41 | self.write_varint((v << 1 ^ (v >> 31)) as u32); 42 | } 43 | } 44 | 45 | impl<'w> ser::Serializer for ValueSerializer<'w> { 46 | type Ok = Tag; 47 | type Error = Error; 48 | type SerializeSeq = ser::Impossible; 49 | type SerializeTuple = ser::Impossible; 50 | type SerializeTupleStruct = ser::Impossible; 51 | type SerializeTupleVariant = ser::Impossible; 52 | type SerializeMap = MapSerializer<'w>; 53 | type SerializeStruct = StructSerializer<'w>; 54 | type SerializeStructVariant = ser::Impossible; 55 | 56 | fn serialize_bool(self, v: bool) -> Result { 57 | let tag = if v { Tag::BoolTrue } else { Tag::BoolFalse }; 58 | if self.inline_bool { 59 | Ok(tag) 60 | } else { 61 | self.w.push(tag as u8); 62 | // TODO: Check behavior of fbthrift on this. 63 | Ok(Tag::BoolTrue) 64 | } 65 | } 66 | 67 | fn serialize_i16(mut self, v: i16) -> Result { 68 | self.write_zigzag(v.into()); 69 | Ok(Tag::I16) 70 | } 71 | 72 | fn serialize_i32(mut self, v: i32) -> Result { 73 | self.write_zigzag(v); 74 | Ok(Tag::I32) 75 | } 76 | 77 | fn serialize_str(mut self, s: &str) -> Result { 78 | let len = u32::try_from(s.len()) 79 | .map_err(|_| ser::Error::custom("string length exceeds u32 range"))?; 80 | self.write_varint(len); 81 | self.w.extend_from_slice(s.as_bytes()); 82 | Ok(Tag::Binary) 83 | } 84 | 85 | fn serialize_struct(self, _name: &'static str, _len: usize) -> Result { 86 | Ok(StructSerializer { 87 | w: self.w, 88 | field_id_diff_tag: 0x10, 89 | }) 90 | } 91 | 92 | fn serialize_map(mut self, len: Option) -> Result { 93 | let len = len 94 | .and_then(|len| u32::try_from(len).ok()) 95 | .expect("map must have known u32 size"); 96 | self.write_varint(len); 97 | Ok(MapSerializer { 98 | type_pos: self.w.len(), 99 | w: self.w, 100 | ktype: None, 101 | vtype: None, 102 | }) 103 | } 104 | 105 | //// Not needed //// 106 | 107 | fn serialize_i8(self, _: i8) -> Result { 108 | unimplemented!() 109 | } 110 | 111 | fn serialize_i64(self, _: i64) -> Result { 112 | unimplemented!() 113 | } 114 | 115 | fn serialize_u8(self, _: u8) -> Result { 116 | unimplemented!() 117 | } 118 | 119 | fn serialize_u16(self, _: u16) -> Result { 120 | unimplemented!() 121 | } 122 | 123 | fn serialize_u32(self, _: u32) -> Result { 124 | unimplemented!() 125 | } 126 | 127 | fn serialize_u64(self, _: u64) -> Result { 128 | unimplemented!() 129 | } 130 | 131 | fn serialize_f32(self, _: f32) -> Result { 132 | unimplemented!() 133 | } 134 | 135 | fn serialize_f64(self, _: f64) -> Result { 136 | unimplemented!() 137 | } 138 | 139 | fn serialize_char(self, _: char) -> Result { 140 | unimplemented!() 141 | } 142 | 143 | fn serialize_bytes(self, _: &[u8]) -> Result { 144 | unimplemented!() 145 | } 146 | 147 | fn serialize_none(self) -> Result { 148 | unimplemented!() 149 | } 150 | 151 | fn serialize_some(self, _value: &T) -> Result 152 | where 153 | T: ?Sized + ser::Serialize, 154 | { 155 | unimplemented!() 156 | } 157 | 158 | fn serialize_unit(self) -> Result { 159 | unimplemented!() 160 | } 161 | 162 | fn serialize_unit_struct(self, _name: &'static str) -> Result { 163 | unimplemented!() 164 | } 165 | 166 | fn serialize_unit_variant( 167 | self, 168 | _name: &'static str, 169 | _variant_index: u32, 170 | _variant: &'static str, 171 | ) -> Result { 172 | unimplemented!() 173 | } 174 | 175 | fn serialize_newtype_struct(self, _name: &'static str, _value: &T) -> Result 176 | where 177 | T: ?Sized + ser::Serialize, 178 | { 179 | unimplemented!() 180 | } 181 | 182 | fn serialize_newtype_variant( 183 | self, 184 | _name: &'static str, 185 | _variant_index: u32, 186 | _variant: &'static str, 187 | _value: &T, 188 | ) -> Result 189 | where 190 | T: ?Sized + ser::Serialize, 191 | { 192 | unimplemented!() 193 | } 194 | 195 | fn serialize_seq(self, _len: Option) -> Result { 196 | unimplemented!() 197 | } 198 | 199 | fn serialize_tuple(self, _len: usize) -> Result { 200 | unimplemented!() 201 | } 202 | 203 | fn serialize_tuple_struct( 204 | self, 205 | _name: &'static str, 206 | _len: usize, 207 | ) -> Result { 208 | unimplemented!() 209 | } 210 | 211 | fn serialize_tuple_variant( 212 | self, 213 | _name: &'static str, 214 | _variant_index: u32, 215 | _variant: &'static str, 216 | _len: usize, 217 | ) -> Result { 218 | unimplemented!() 219 | } 220 | 221 | fn serialize_struct_variant( 222 | self, 223 | _name: &'static str, 224 | _variant_index: u32, 225 | _variant: &'static str, 226 | _len: usize, 227 | ) -> Result { 228 | unimplemented!() 229 | } 230 | } 231 | 232 | pub(crate) struct StructSerializer<'w> { 233 | w: &'w mut Vec, 234 | /// 0bxxxx0000 where xxxx is the `field_id` delta from the previous field. 235 | field_id_diff_tag: u8, 236 | } 237 | 238 | impl ser::SerializeStruct for StructSerializer<'_> { 239 | type Ok = Tag; 240 | type Error = Error; 241 | 242 | fn skip_field(&mut self, _key: &'static str) -> Result<()> { 243 | self.field_id_diff_tag = self 244 | .field_id_diff_tag 245 | .checked_add(0x10) 246 | .expect("field count overflow"); 247 | Ok(()) 248 | } 249 | 250 | fn serialize_field(&mut self, _key: &'static str, value: &T) -> Result<()> 251 | where 252 | T: ?Sized + ser::Serialize, 253 | { 254 | // Field id & type. 255 | let pos = self.w.len(); 256 | self.w.push(0); 257 | 258 | let tag = value.serialize(ValueSerializer { 259 | w: self.w, 260 | inline_bool: true, 261 | })?; 262 | self.w[pos] = self.field_id_diff_tag | tag as u8; 263 | self.field_id_diff_tag = 0x10; 264 | Ok(()) 265 | } 266 | 267 | fn end(self) -> Result { 268 | self.w.push(0); 269 | Ok(Tag::Struct) 270 | } 271 | } 272 | 273 | pub(crate) struct MapSerializer<'w> { 274 | w: &'w mut Vec, 275 | type_pos: usize, 276 | ktype: Option, 277 | vtype: Option, 278 | } 279 | 280 | impl ser::SerializeMap for MapSerializer<'_> { 281 | type Ok = Tag; 282 | type Error = Error; 283 | 284 | fn serialize_key(&mut self, key: &T) -> Result<()> 285 | where 286 | T: ?Sized + ser::Serialize, 287 | { 288 | if self.ktype.is_none() { 289 | // Reserve a type byte. 290 | self.w.push(0); 291 | } 292 | 293 | let tag = key.serialize(ValueSerializer { 294 | w: self.w, 295 | inline_bool: false, 296 | })?; 297 | let prev = *self.ktype.get_or_insert(tag); 298 | debug_assert_eq!(prev, tag); 299 | Ok(()) 300 | } 301 | 302 | fn serialize_value(&mut self, value: &T) -> Result<()> 303 | where 304 | T: ?Sized + ser::Serialize, 305 | { 306 | let tag = value.serialize(ValueSerializer { 307 | w: self.w, 308 | inline_bool: false, 309 | })?; 310 | let prev = *self.vtype.get_or_insert(tag); 311 | debug_assert_eq!(prev, tag); 312 | Ok(()) 313 | } 314 | 315 | fn end(self) -> Result { 316 | // This condition is false if the map contains zero elements. 317 | if self.type_pos < self.w.len() { 318 | self.w[self.type_pos] = (self.ktype.unwrap() as u8) << 4 | self.vtype.unwrap() as u8; 319 | } 320 | Ok(Tag::Map) 321 | } 322 | } 323 | -------------------------------------------------------------------------------- /dwarfs/src/metadata/tests.rs: -------------------------------------------------------------------------------- 1 | use super::*; 2 | 3 | #[test] 4 | #[cfg(feature = "serialize")] 5 | fn serde_schema() { 6 | let schema = Schema { 7 | relax_type_checks: true, 8 | layouts: DenseMap(vec![Some(SchemaLayout { 9 | size: 1, 10 | bits: 0, 11 | fields: DenseMap::default(), 12 | type_name: String::new(), 13 | })]), 14 | root_layout: 0, 15 | file_version: 1, 16 | }; 17 | let bytes = schema.to_bytes().unwrap(); 18 | 19 | let expect = [ 20 | // struct 21 | 0x11, // field `relax_type_checks` (id = 1), value = true 22 | 0x1b, // field `layouts` (id = 0 + 1 = 1), type = 0xb map 23 | 0x01, // map size = 1 24 | 0x4c, // key = i16, value = struct 25 | 0x00, // key i16 = 0 = zigzag(0) 26 | /**/ // value struct 27 | 0x15, // field `size` (id = 0 + 1 = 1) 28 | 0x02, // 2 = zigzag(1) 29 | 0x2b, // field `fields` (id = 1 + 2 = 3), type = 0xb map 30 | 0x00, // len = 0 31 | 0x18, // field `type_name` (id = 3 + 1 = 4), type = 0x8 binary 32 | 0x00, // len = 0 33 | 0x00, // struct end 34 | 0x25, // field `field_version` (id = 1 + 2 = 3), type = 0x5 i32 35 | 0x02, // 2 = zigzag(1) 36 | 0x00, // struct end 37 | ]; 38 | assert_eq!(bytes, expect); 39 | 40 | let got = Schema::parse(&bytes).unwrap(); 41 | assert_eq!(got, schema); 42 | } 43 | 44 | #[test] 45 | fn de_frozen() { 46 | #[derive(Debug, PartialEq, Eq, Deserialize)] 47 | struct Pair { 48 | a: u32, 49 | #[serde(default)] 50 | b: u32, 51 | c: u32, 52 | } 53 | 54 | let schema = Schema { 55 | relax_type_checks: true, 56 | layouts: DenseMap(vec![ 57 | None, 58 | Some(SchemaLayout { 59 | size: 0, 60 | bits: 8, 61 | fields: DenseMap(vec![ 62 | None, 63 | Some(SchemaField { 64 | layout_id: 2, 65 | offset: 0, 66 | }), 67 | None, 68 | Some(SchemaField { 69 | layout_id: 2, 70 | offset: -4, 71 | }), 72 | ]), 73 | type_name: String::new(), 74 | }), 75 | Some(SchemaLayout { 76 | size: 0, 77 | bits: 4, 78 | fields: DenseMap::default(), 79 | type_name: String::new(), 80 | }), 81 | ]), 82 | root_layout: 1, 83 | file_version: 1, 84 | }; 85 | 86 | let de = de_frozen::deserialize::(&schema, b"\x42\0\0\0\0\0\0\0").unwrap(); 87 | assert_eq!( 88 | de, 89 | Pair { 90 | a: 0x2, 91 | b: 0, 92 | c: 0x4 93 | } 94 | ); 95 | } 96 | -------------------------------------------------------------------------------- /flake.lock: -------------------------------------------------------------------------------- 1 | { 2 | "nodes": { 3 | "nixpkgs": { 4 | "locked": { 5 | "lastModified": 1748186667, 6 | "narHash": "sha256-UQubDNIQ/Z42R8tPCIpY+BOhlxO8t8ZojwC9o2FW3c8=", 7 | "owner": "NixOS", 8 | "repo": "nixpkgs", 9 | "rev": "bdac72d387dca7f836f6ef1fe547755fb0e9df61", 10 | "type": "github" 11 | }, 12 | "original": { 13 | "owner": "NixOS", 14 | "ref": "nixpkgs-unstable", 15 | "repo": "nixpkgs", 16 | "type": "github" 17 | } 18 | }, 19 | "root": { 20 | "inputs": { 21 | "nixpkgs": "nixpkgs" 22 | } 23 | } 24 | }, 25 | "root": "root", 26 | "version": 7 27 | } 28 | -------------------------------------------------------------------------------- /flake.nix: -------------------------------------------------------------------------------- 1 | { 2 | inputs.nixpkgs.url = "github:NixOS/nixpkgs/nixpkgs-unstable"; 3 | 4 | outputs = 5 | { self, nixpkgs }: 6 | let 7 | inherit (nixpkgs) lib; 8 | eachSystems = 9 | f: lib.genAttrs lib.systems.flakeExposed (system: f system nixpkgs.legacyPackages.${system}); 10 | in 11 | { 12 | devShells = eachSystems ( 13 | system: pkgs: rec { 14 | without-rust = pkgs.mkShell { 15 | nativeBuildInputs = [ 16 | pkgs.dwarfs 17 | pkgs.fakeroot 18 | ]; 19 | }; 20 | } 21 | ); 22 | }; 23 | } 24 | -------------------------------------------------------------------------------- /typos.toml: -------------------------------------------------------------------------------- 1 | [default.extend-words] 2 | # A workaround for keyword "type". 3 | typ = "typ" --------------------------------------------------------------------------------