├── .gitignore
├── Cargo.lock
├── Cargo.toml
├── LICENSE-APACHE
├── LICENSE-GPL-3.0
├── LICENSE-MIT
├── README.md
├── clippy.toml
├── dwarfs-enc
    ├── Cargo.toml
    ├── LICENSE-GPL-3.0
    ├── README.md
    ├── examples
    │   └── mkdwarfs.rs
    ├── src
    │   ├── chunker.rs
    │   ├── error.rs
    │   ├── lib.rs
    │   ├── metadata.rs
    │   ├── ordered_parallel.rs
    │   └── section.rs
    └── tests
    │   └── basic.rs
├── dwarfs-test
    ├── Cargo.toml
    ├── LICENSE-APACHE
    ├── LICENSE-MIT
    ├── src
    │   ├── lib.rs
    │   ├── main.rs
    │   ├── mtree.rs
    │   └── traverse.rs
    └── tests
    │   ├── basic.rs
    │   └── large.rs
├── dwarfs
    ├── CHANGELOG.md
    ├── Cargo.toml
    ├── LICENSE-APACHE
    ├── LICENSE-MIT
    ├── README.md
    └── src
    │   ├── archive.rs
    │   ├── fsst.rs
    │   ├── lib.rs
    │   ├── metadata.rs
    │   ├── metadata
    │       ├── de_frozen.rs
    │       ├── de_thrift.rs
    │       ├── ser_frozen.rs
    │       ├── ser_thrift.rs
    │       └── tests.rs
    │   └── section.rs
├── flake.lock
├── flake.nix
└── typos.toml


/.gitignore:
--------------------------------------------------------------------------------
1 | /target
2 | result
3 | result-*
4 | *.dwarfs
5 | 
6 | perf*.data*
7 | flamegraph*.svg
8 | 


--------------------------------------------------------------------------------
/Cargo.lock:
--------------------------------------------------------------------------------
  1 | # This file is automatically @generated by Cargo.
  2 | # It is not intended for manual editing.
  3 | version = 4
  4 | 
  5 | [[package]]
  6 | name = "aho-corasick"
  7 | version = "1.1.3"
  8 | source = "registry+https://github.com/rust-lang/crates.io-index"
  9 | checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916"
 10 | dependencies = [
 11 |  "memchr",
 12 | ]
 13 | 
 14 | [[package]]
 15 | name = "allocator-api2"
 16 | version = "0.2.21"
 17 | source = "registry+https://github.com/rust-lang/crates.io-index"
 18 | checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923"
 19 | 
 20 | [[package]]
 21 | name = "anstream"
 22 | version = "0.6.18"
 23 | source = "registry+https://github.com/rust-lang/crates.io-index"
 24 | checksum = "8acc5369981196006228e28809f761875c0327210a891e941f4c683b3a99529b"
 25 | dependencies = [
 26 |  "anstyle",
 27 |  "anstyle-parse",
 28 |  "anstyle-query",
 29 |  "anstyle-wincon",
 30 |  "colorchoice",
 31 |  "is_terminal_polyfill",
 32 |  "utf8parse",
 33 | ]
 34 | 
 35 | [[package]]
 36 | name = "anstyle"
 37 | version = "1.0.10"
 38 | source = "registry+https://github.com/rust-lang/crates.io-index"
 39 | checksum = "55cc3b69f167a1ef2e161439aa98aed94e6028e5f9a59be9a6ffb47aef1651f9"
 40 | 
 41 | [[package]]
 42 | name = "anstyle-parse"
 43 | version = "0.2.6"
 44 | source = "registry+https://github.com/rust-lang/crates.io-index"
 45 | checksum = "3b2d16507662817a6a20a9ea92df6652ee4f94f914589377d69f3b21bc5798a9"
 46 | dependencies = [
 47 |  "utf8parse",
 48 | ]
 49 | 
 50 | [[package]]
 51 | name = "anstyle-query"
 52 | version = "1.1.2"
 53 | source = "registry+https://github.com/rust-lang/crates.io-index"
 54 | checksum = "79947af37f4177cfead1110013d678905c37501914fba0efea834c3fe9a8d60c"
 55 | dependencies = [
 56 |  "windows-sys",
 57 | ]
 58 | 
 59 | [[package]]
 60 | name = "anstyle-wincon"
 61 | version = "3.0.7"
 62 | source = "registry+https://github.com/rust-lang/crates.io-index"
 63 | checksum = "ca3534e77181a9cc07539ad51f2141fe32f6c3ffd4df76db8ad92346b003ae4e"
 64 | dependencies = [
 65 |  "anstyle",
 66 |  "once_cell",
 67 |  "windows-sys",
 68 | ]
 69 | 
 70 | [[package]]
 71 | name = "bitflags"
 72 | version = "2.9.0"
 73 | source = "registry+https://github.com/rust-lang/crates.io-index"
 74 | checksum = "5c8214115b7bf84099f1309324e63141d4c5d7cc26862f97a0a857dbefe165bd"
 75 | 
 76 | [[package]]
 77 | name = "block-buffer"
 78 | version = "0.10.4"
 79 | source = "registry+https://github.com/rust-lang/crates.io-index"
 80 | checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71"
 81 | dependencies = [
 82 |  "generic-array",
 83 | ]
 84 | 
 85 | [[package]]
 86 | name = "bstr"
 87 | version = "1.12.0"
 88 | source = "registry+https://github.com/rust-lang/crates.io-index"
 89 | checksum = "234113d19d0d7d613b40e86fb654acf958910802bcceab913a4f9e7cda03b1a4"
 90 | dependencies = [
 91 |  "memchr",
 92 |  "regex-automata",
 93 |  "serde",
 94 | ]
 95 | 
 96 | [[package]]
 97 | name = "bumpalo"
 98 | version = "3.18.1"
 99 | source = "registry+https://github.com/rust-lang/crates.io-index"
100 | checksum = "793db76d6187cd04dff33004d8e6c9cc4e05cd330500379d2394209271b4aeee"
101 | 
102 | [[package]]
103 | name = "cc"
104 | version = "1.2.22"
105 | source = "registry+https://github.com/rust-lang/crates.io-index"
106 | checksum = "32db95edf998450acc7881c932f94cd9b05c87b4b2599e8bab064753da4acfd1"
107 | dependencies = [
108 |  "jobserver",
109 |  "libc",
110 |  "shlex",
111 | ]
112 | 
113 | [[package]]
114 | name = "cfg-if"
115 | version = "1.0.0"
116 | source = "registry+https://github.com/rust-lang/crates.io-index"
117 | checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
118 | 
119 | [[package]]
120 | name = "clap"
121 | version = "4.5.39"
122 | source = "registry+https://github.com/rust-lang/crates.io-index"
123 | checksum = "fd60e63e9be68e5fb56422e397cf9baddded06dae1d2e523401542383bc72a9f"
124 | dependencies = [
125 |  "clap_builder",
126 |  "clap_derive",
127 | ]
128 | 
129 | [[package]]
130 | name = "clap_builder"
131 | version = "4.5.39"
132 | source = "registry+https://github.com/rust-lang/crates.io-index"
133 | checksum = "89cc6392a1f72bbeb820d71f32108f61fdaf18bc526e1d23954168a67759ef51"
134 | dependencies = [
135 |  "anstream",
136 |  "anstyle",
137 |  "clap_lex",
138 |  "strsim",
139 | ]
140 | 
141 | [[package]]
142 | name = "clap_derive"
143 | version = "4.5.32"
144 | source = "registry+https://github.com/rust-lang/crates.io-index"
145 | checksum = "09176aae279615badda0765c0c0b3f6ed53f4709118af73cf4655d85d1530cd7"
146 | dependencies = [
147 |  "heck",
148 |  "proc-macro2",
149 |  "quote",
150 |  "syn",
151 | ]
152 | 
153 | [[package]]
154 | name = "clap_lex"
155 | version = "0.7.4"
156 | source = "registry+https://github.com/rust-lang/crates.io-index"
157 | checksum = "f46ad14479a25103f283c0f10005961cf086d8dc42205bb44c46ac563475dca6"
158 | 
159 | [[package]]
160 | name = "colorchoice"
161 | version = "1.0.3"
162 | source = "registry+https://github.com/rust-lang/crates.io-index"
163 | checksum = "5b63caa9aa9397e2d9480a9b13673856c78d8ac123288526c37d7839f2a86990"
164 | 
165 | [[package]]
166 | name = "console"
167 | version = "0.15.11"
168 | source = "registry+https://github.com/rust-lang/crates.io-index"
169 | checksum = "054ccb5b10f9f2cbf51eb355ca1d05c2d279ce1804688d0db74b4733a5aeafd8"
170 | dependencies = [
171 |  "encode_unicode",
172 |  "libc",
173 |  "once_cell",
174 |  "unicode-width",
175 |  "windows-sys",
176 | ]
177 | 
178 | [[package]]
179 | name = "cpufeatures"
180 | version = "0.2.17"
181 | source = "registry+https://github.com/rust-lang/crates.io-index"
182 | checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280"
183 | dependencies = [
184 |  "libc",
185 | ]
186 | 
187 | [[package]]
188 | name = "crossbeam-channel"
189 | version = "0.5.15"
190 | source = "registry+https://github.com/rust-lang/crates.io-index"
191 | checksum = "82b8f8f868b36967f9606790d1903570de9ceaf870a7bf9fbbd3016d636a2cb2"
192 | dependencies = [
193 |  "crossbeam-utils",
194 | ]
195 | 
196 | [[package]]
197 | name = "crossbeam-utils"
198 | version = "0.8.21"
199 | source = "registry+https://github.com/rust-lang/crates.io-index"
200 | checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28"
201 | 
202 | [[package]]
203 | name = "crypto-common"
204 | version = "0.1.6"
205 | source = "registry+https://github.com/rust-lang/crates.io-index"
206 | checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3"
207 | dependencies = [
208 |  "generic-array",
209 |  "typenum",
210 | ]
211 | 
212 | [[package]]
213 | name = "digest"
214 | version = "0.10.7"
215 | source = "registry+https://github.com/rust-lang/crates.io-index"
216 | checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292"
217 | dependencies = [
218 |  "block-buffer",
219 |  "crypto-common",
220 | ]
221 | 
222 | [[package]]
223 | name = "dwarfs"
224 | version = "0.2.1"
225 | dependencies = [
226 |  "bstr",
227 |  "indexmap",
228 |  "liblzma",
229 |  "log",
230 |  "lru",
231 |  "lz4",
232 |  "measure_time",
233 |  "positioned-io",
234 |  "serde",
235 |  "sha2",
236 |  "xxhash-rust",
237 |  "zerocopy",
238 |  "zstd-safe",
239 | ]
240 | 
241 | [[package]]
242 | name = "dwarfs-enc"
243 | version = "0.1.0"
244 | dependencies = [
245 |  "clap",
246 |  "crossbeam-channel",
247 |  "dwarfs",
248 |  "indexmap",
249 |  "indicatif",
250 |  "liblzma",
251 |  "rustic_cdc",
252 |  "rustix",
253 |  "serde",
254 |  "sha2",
255 |  "zerocopy",
256 |  "zstd-safe",
257 | ]
258 | 
259 | [[package]]
260 | name = "dwarfs-test"
261 | version = "0.0.0"
262 | dependencies = [
263 |  "dwarfs",
264 |  "env_logger",
265 |  "hex",
266 |  "rustix",
267 |  "sha2",
268 |  "tempfile",
269 |  "xshell",
270 | ]
271 | 
272 | [[package]]
273 | name = "encode_unicode"
274 | version = "1.0.0"
275 | source = "registry+https://github.com/rust-lang/crates.io-index"
276 | checksum = "34aa73646ffb006b8f5147f3dc182bd4bcb190227ce861fc4a4844bf8e3cb2c0"
277 | 
278 | [[package]]
279 | name = "env_filter"
280 | version = "0.1.3"
281 | source = "registry+https://github.com/rust-lang/crates.io-index"
282 | checksum = "186e05a59d4c50738528153b83b0b0194d3a29507dfec16eccd4b342903397d0"
283 | dependencies = [
284 |  "log",
285 |  "regex",
286 | ]
287 | 
288 | [[package]]
289 | name = "env_logger"
290 | version = "0.11.8"
291 | source = "registry+https://github.com/rust-lang/crates.io-index"
292 | checksum = "13c863f0904021b108aa8b2f55046443e6b1ebde8fd4a15c399893aae4fa069f"
293 | dependencies = [
294 |  "anstream",
295 |  "anstyle",
296 |  "env_filter",
297 |  "jiff",
298 |  "log",
299 | ]
300 | 
301 | [[package]]
302 | name = "equivalent"
303 | version = "1.0.2"
304 | source = "registry+https://github.com/rust-lang/crates.io-index"
305 | checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f"
306 | 
307 | [[package]]
308 | name = "errno"
309 | version = "0.3.12"
310 | source = "registry+https://github.com/rust-lang/crates.io-index"
311 | checksum = "cea14ef9355e3beab063703aa9dab15afd25f0667c341310c1e5274bb1d0da18"
312 | dependencies = [
313 |  "libc",
314 |  "windows-sys",
315 | ]
316 | 
317 | [[package]]
318 | name = "fastrand"
319 | version = "2.3.0"
320 | source = "registry+https://github.com/rust-lang/crates.io-index"
321 | checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be"
322 | 
323 | [[package]]
324 | name = "foldhash"
325 | version = "0.1.5"
326 | source = "registry+https://github.com/rust-lang/crates.io-index"
327 | checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2"
328 | 
329 | [[package]]
330 | name = "generic-array"
331 | version = "0.14.7"
332 | source = "registry+https://github.com/rust-lang/crates.io-index"
333 | checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a"
334 | dependencies = [
335 |  "typenum",
336 |  "version_check",
337 | ]
338 | 
339 | [[package]]
340 | name = "getrandom"
341 | version = "0.3.3"
342 | source = "registry+https://github.com/rust-lang/crates.io-index"
343 | checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4"
344 | dependencies = [
345 |  "cfg-if",
346 |  "libc",
347 |  "r-efi",
348 |  "wasi",
349 | ]
350 | 
351 | [[package]]
352 | name = "hashbrown"
353 | version = "0.15.3"
354 | source = "registry+https://github.com/rust-lang/crates.io-index"
355 | checksum = "84b26c544d002229e640969970a2e74021aadf6e2f96372b9c58eff97de08eb3"
356 | dependencies = [
357 |  "allocator-api2",
358 |  "equivalent",
359 |  "foldhash",
360 | ]
361 | 
362 | [[package]]
363 | name = "heck"
364 | version = "0.5.0"
365 | source = "registry+https://github.com/rust-lang/crates.io-index"
366 | checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
367 | 
368 | [[package]]
369 | name = "hex"
370 | version = "0.4.3"
371 | source = "registry+https://github.com/rust-lang/crates.io-index"
372 | checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70"
373 | 
374 | [[package]]
375 | name = "indexmap"
376 | version = "2.9.0"
377 | source = "registry+https://github.com/rust-lang/crates.io-index"
378 | checksum = "cea70ddb795996207ad57735b50c5982d8844f38ba9ee5f1aedcfb708a2aa11e"
379 | dependencies = [
380 |  "equivalent",
381 |  "hashbrown",
382 | ]
383 | 
384 | [[package]]
385 | name = "indicatif"
386 | version = "0.17.11"
387 | source = "registry+https://github.com/rust-lang/crates.io-index"
388 | checksum = "183b3088984b400f4cfac3620d5e076c84da5364016b4f49473de574b2586235"
389 | dependencies = [
390 |  "console",
391 |  "number_prefix",
392 |  "portable-atomic",
393 |  "unicode-width",
394 |  "web-time",
395 | ]
396 | 
397 | [[package]]
398 | name = "is_terminal_polyfill"
399 | version = "1.70.1"
400 | source = "registry+https://github.com/rust-lang/crates.io-index"
401 | checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf"
402 | 
403 | [[package]]
404 | name = "jiff"
405 | version = "0.2.13"
406 | source = "registry+https://github.com/rust-lang/crates.io-index"
407 | checksum = "f02000660d30638906021176af16b17498bd0d12813dbfe7b276d8bc7f3c0806"
408 | dependencies = [
409 |  "jiff-static",
410 |  "log",
411 |  "portable-atomic",
412 |  "portable-atomic-util",
413 |  "serde",
414 | ]
415 | 
416 | [[package]]
417 | name = "jiff-static"
418 | version = "0.2.13"
419 | source = "registry+https://github.com/rust-lang/crates.io-index"
420 | checksum = "f3c30758ddd7188629c6713fc45d1188af4f44c90582311d0c8d8c9907f60c48"
421 | dependencies = [
422 |  "proc-macro2",
423 |  "quote",
424 |  "syn",
425 | ]
426 | 
427 | [[package]]
428 | name = "jobserver"
429 | version = "0.1.33"
430 | source = "registry+https://github.com/rust-lang/crates.io-index"
431 | checksum = "38f262f097c174adebe41eb73d66ae9c06b2844fb0da69969647bbddd9b0538a"
432 | dependencies = [
433 |  "getrandom",
434 |  "libc",
435 | ]
436 | 
437 | [[package]]
438 | name = "js-sys"
439 | version = "0.3.77"
440 | source = "registry+https://github.com/rust-lang/crates.io-index"
441 | checksum = "1cfaf33c695fc6e08064efbc1f72ec937429614f25eef83af942d0e227c3a28f"
442 | dependencies = [
443 |  "once_cell",
444 |  "wasm-bindgen",
445 | ]
446 | 
447 | [[package]]
448 | name = "libc"
449 | version = "0.2.172"
450 | source = "registry+https://github.com/rust-lang/crates.io-index"
451 | checksum = "d750af042f7ef4f724306de029d18836c26c1765a54a6a3f094cbd23a7267ffa"
452 | 
453 | [[package]]
454 | name = "liblzma"
455 | version = "0.4.1"
456 | source = "registry+https://github.com/rust-lang/crates.io-index"
457 | checksum = "66352d7a8ac12d4877b6e6ea5a9b7650ee094257dc40889955bea5bc5b08c1d0"
458 | dependencies = [
459 |  "liblzma-sys",
460 | ]
461 | 
462 | [[package]]
463 | name = "liblzma-sys"
464 | version = "0.4.3"
465 | source = "registry+https://github.com/rust-lang/crates.io-index"
466 | checksum = "5839bad90c3cc2e0b8c4ed8296b80e86040240f81d46b9c0e9bc8dd51ddd3af1"
467 | dependencies = [
468 |  "cc",
469 |  "libc",
470 |  "pkg-config",
471 | ]
472 | 
473 | [[package]]
474 | name = "linux-raw-sys"
475 | version = "0.9.4"
476 | source = "registry+https://github.com/rust-lang/crates.io-index"
477 | checksum = "cd945864f07fe9f5371a27ad7b52a172b4b499999f1d97574c9fa68373937e12"
478 | 
479 | [[package]]
480 | name = "log"
481 | version = "0.4.27"
482 | source = "registry+https://github.com/rust-lang/crates.io-index"
483 | checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94"
484 | 
485 | [[package]]
486 | name = "lru"
487 | version = "0.14.0"
488 | source = "registry+https://github.com/rust-lang/crates.io-index"
489 | checksum = "9f8cc7106155f10bdf99a6f379688f543ad6596a415375b36a59a054ceda1198"
490 | dependencies = [
491 |  "hashbrown",
492 | ]
493 | 
494 | [[package]]
495 | name = "lz4"
496 | version = "1.28.1"
497 | source = "registry+https://github.com/rust-lang/crates.io-index"
498 | checksum = "a20b523e860d03443e98350ceaac5e71c6ba89aea7d960769ec3ce37f4de5af4"
499 | dependencies = [
500 |  "lz4-sys",
501 | ]
502 | 
503 | [[package]]
504 | name = "lz4-sys"
505 | version = "1.11.1+lz4-1.10.0"
506 | source = "registry+https://github.com/rust-lang/crates.io-index"
507 | checksum = "6bd8c0d6c6ed0cd30b3652886bb8711dc4bb01d637a68105a3d5158039b418e6"
508 | dependencies = [
509 |  "cc",
510 |  "libc",
511 | ]
512 | 
513 | [[package]]
514 | name = "measure_time"
515 | version = "0.9.0"
516 | source = "registry+https://github.com/rust-lang/crates.io-index"
517 | checksum = "51c55d61e72fc3ab704396c5fa16f4c184db37978ae4e94ca8959693a235fc0e"
518 | dependencies = [
519 |  "log",
520 | ]
521 | 
522 | [[package]]
523 | name = "memchr"
524 | version = "2.7.4"
525 | source = "registry+https://github.com/rust-lang/crates.io-index"
526 | checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3"
527 | 
528 | [[package]]
529 | name = "number_prefix"
530 | version = "0.4.0"
531 | source = "registry+https://github.com/rust-lang/crates.io-index"
532 | checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3"
533 | 
534 | [[package]]
535 | name = "once_cell"
536 | version = "1.21.3"
537 | source = "registry+https://github.com/rust-lang/crates.io-index"
538 | checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d"
539 | 
540 | [[package]]
541 | name = "pkg-config"
542 | version = "0.3.32"
543 | source = "registry+https://github.com/rust-lang/crates.io-index"
544 | checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c"
545 | 
546 | [[package]]
547 | name = "portable-atomic"
548 | version = "1.11.0"
549 | source = "registry+https://github.com/rust-lang/crates.io-index"
550 | checksum = "350e9b48cbc6b0e028b0473b114454c6316e57336ee184ceab6e53f72c178b3e"
551 | 
552 | [[package]]
553 | name = "portable-atomic-util"
554 | version = "0.2.4"
555 | source = "registry+https://github.com/rust-lang/crates.io-index"
556 | checksum = "d8a2f0d8d040d7848a709caf78912debcc3f33ee4b3cac47d73d1e1069e83507"
557 | dependencies = [
558 |  "portable-atomic",
559 | ]
560 | 
561 | [[package]]
562 | name = "positioned-io"
563 | version = "0.3.4"
564 | source = "registry+https://github.com/rust-lang/crates.io-index"
565 | checksum = "e8078ce4d22da5e8f57324d985cc9befe40c49ab0507a192d6be9e59584495c9"
566 | dependencies = [
567 |  "libc",
568 |  "winapi",
569 | ]
570 | 
571 | [[package]]
572 | name = "proc-macro2"
573 | version = "1.0.95"
574 | source = "registry+https://github.com/rust-lang/crates.io-index"
575 | checksum = "02b3e5e68a3a1a02aad3ec490a98007cbc13c37cbe84a3cd7b8e406d76e7f778"
576 | dependencies = [
577 |  "unicode-ident",
578 | ]
579 | 
580 | [[package]]
581 | name = "quote"
582 | version = "1.0.40"
583 | source = "registry+https://github.com/rust-lang/crates.io-index"
584 | checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d"
585 | dependencies = [
586 |  "proc-macro2",
587 | ]
588 | 
589 | [[package]]
590 | name = "r-efi"
591 | version = "5.2.0"
592 | source = "registry+https://github.com/rust-lang/crates.io-index"
593 | checksum = "74765f6d916ee2faa39bc8e68e4f3ed8949b48cccdac59983d287a7cb71ce9c5"
594 | 
595 | [[package]]
596 | name = "regex"
597 | version = "1.11.1"
598 | source = "registry+https://github.com/rust-lang/crates.io-index"
599 | checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191"
600 | dependencies = [
601 |  "aho-corasick",
602 |  "memchr",
603 |  "regex-automata",
604 |  "regex-syntax",
605 | ]
606 | 
607 | [[package]]
608 | name = "regex-automata"
609 | version = "0.4.9"
610 | source = "registry+https://github.com/rust-lang/crates.io-index"
611 | checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908"
612 | dependencies = [
613 |  "aho-corasick",
614 |  "memchr",
615 |  "regex-syntax",
616 | ]
617 | 
618 | [[package]]
619 | name = "regex-syntax"
620 | version = "0.8.5"
621 | source = "registry+https://github.com/rust-lang/crates.io-index"
622 | checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c"
623 | 
624 | [[package]]
625 | name = "rustic_cdc"
626 | version = "0.3.1"
627 | source = "registry+https://github.com/rust-lang/crates.io-index"
628 | checksum = "fbcebf2228827bc4b61cb54dfd84cf43aacf06ca2dfe4c014b136a0e32b876e2"
629 | 
630 | [[package]]
631 | name = "rustix"
632 | version = "1.0.7"
633 | source = "registry+https://github.com/rust-lang/crates.io-index"
634 | checksum = "c71e83d6afe7ff64890ec6b71d6a69bb8a610ab78ce364b3352876bb4c801266"
635 | dependencies = [
636 |  "bitflags",
637 |  "errno",
638 |  "libc",
639 |  "linux-raw-sys",
640 |  "windows-sys",
641 | ]
642 | 
643 | [[package]]
644 | name = "serde"
645 | version = "1.0.219"
646 | source = "registry+https://github.com/rust-lang/crates.io-index"
647 | checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6"
648 | dependencies = [
649 |  "serde_derive",
650 | ]
651 | 
652 | [[package]]
653 | name = "serde_derive"
654 | version = "1.0.219"
655 | source = "registry+https://github.com/rust-lang/crates.io-index"
656 | checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00"
657 | dependencies = [
658 |  "proc-macro2",
659 |  "quote",
660 |  "syn",
661 | ]
662 | 
663 | [[package]]
664 | name = "sha2"
665 | version = "0.10.9"
666 | source = "registry+https://github.com/rust-lang/crates.io-index"
667 | checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283"
668 | dependencies = [
669 |  "cfg-if",
670 |  "cpufeatures",
671 |  "digest",
672 | ]
673 | 
674 | [[package]]
675 | name = "shlex"
676 | version = "1.3.0"
677 | source = "registry+https://github.com/rust-lang/crates.io-index"
678 | checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
679 | 
680 | [[package]]
681 | name = "strsim"
682 | version = "0.11.1"
683 | source = "registry+https://github.com/rust-lang/crates.io-index"
684 | checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
685 | 
686 | [[package]]
687 | name = "syn"
688 | version = "2.0.101"
689 | source = "registry+https://github.com/rust-lang/crates.io-index"
690 | checksum = "8ce2b7fc941b3a24138a0a7cf8e858bfc6a992e7978a068a5c760deb0ed43caf"
691 | dependencies = [
692 |  "proc-macro2",
693 |  "quote",
694 |  "unicode-ident",
695 | ]
696 | 
697 | [[package]]
698 | name = "tempfile"
699 | version = "3.20.0"
700 | source = "registry+https://github.com/rust-lang/crates.io-index"
701 | checksum = "e8a64e3985349f2441a1a9ef0b853f869006c3855f2cda6862a94d26ebb9d6a1"
702 | dependencies = [
703 |  "fastrand",
704 |  "getrandom",
705 |  "once_cell",
706 |  "rustix",
707 |  "windows-sys",
708 | ]
709 | 
710 | [[package]]
711 | name = "typenum"
712 | version = "1.18.0"
713 | source = "registry+https://github.com/rust-lang/crates.io-index"
714 | checksum = "1dccffe3ce07af9386bfd29e80c0ab1a8205a2fc34e4bcd40364df902cfa8f3f"
715 | 
716 | [[package]]
717 | name = "unicode-ident"
718 | version = "1.0.18"
719 | source = "registry+https://github.com/rust-lang/crates.io-index"
720 | checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512"
721 | 
722 | [[package]]
723 | name = "unicode-width"
724 | version = "0.2.0"
725 | source = "registry+https://github.com/rust-lang/crates.io-index"
726 | checksum = "1fc81956842c57dac11422a97c3b8195a1ff727f06e85c84ed2e8aa277c9a0fd"
727 | 
728 | [[package]]
729 | name = "utf8parse"
730 | version = "0.2.2"
731 | source = "registry+https://github.com/rust-lang/crates.io-index"
732 | checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
733 | 
734 | [[package]]
735 | name = "version_check"
736 | version = "0.9.5"
737 | source = "registry+https://github.com/rust-lang/crates.io-index"
738 | checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a"
739 | 
740 | [[package]]
741 | name = "wasi"
742 | version = "0.14.2+wasi-0.2.4"
743 | source = "registry+https://github.com/rust-lang/crates.io-index"
744 | checksum = "9683f9a5a998d873c0d21fcbe3c083009670149a8fab228644b8bd36b2c48cb3"
745 | dependencies = [
746 |  "wit-bindgen-rt",
747 | ]
748 | 
749 | [[package]]
750 | name = "wasm-bindgen"
751 | version = "0.2.100"
752 | source = "registry+https://github.com/rust-lang/crates.io-index"
753 | checksum = "1edc8929d7499fc4e8f0be2262a241556cfc54a0bea223790e71446f2aab1ef5"
754 | dependencies = [
755 |  "cfg-if",
756 |  "once_cell",
757 |  "wasm-bindgen-macro",
758 | ]
759 | 
760 | [[package]]
761 | name = "wasm-bindgen-backend"
762 | version = "0.2.100"
763 | source = "registry+https://github.com/rust-lang/crates.io-index"
764 | checksum = "2f0a0651a5c2bc21487bde11ee802ccaf4c51935d0d3d42a6101f98161700bc6"
765 | dependencies = [
766 |  "bumpalo",
767 |  "log",
768 |  "proc-macro2",
769 |  "quote",
770 |  "syn",
771 |  "wasm-bindgen-shared",
772 | ]
773 | 
774 | [[package]]
775 | name = "wasm-bindgen-macro"
776 | version = "0.2.100"
777 | source = "registry+https://github.com/rust-lang/crates.io-index"
778 | checksum = "7fe63fc6d09ed3792bd0897b314f53de8e16568c2b3f7982f468c0bf9bd0b407"
779 | dependencies = [
780 |  "quote",
781 |  "wasm-bindgen-macro-support",
782 | ]
783 | 
784 | [[package]]
785 | name = "wasm-bindgen-macro-support"
786 | version = "0.2.100"
787 | source = "registry+https://github.com/rust-lang/crates.io-index"
788 | checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de"
789 | dependencies = [
790 |  "proc-macro2",
791 |  "quote",
792 |  "syn",
793 |  "wasm-bindgen-backend",
794 |  "wasm-bindgen-shared",
795 | ]
796 | 
797 | [[package]]
798 | name = "wasm-bindgen-shared"
799 | version = "0.2.100"
800 | source = "registry+https://github.com/rust-lang/crates.io-index"
801 | checksum = "1a05d73b933a847d6cccdda8f838a22ff101ad9bf93e33684f39c1f5f0eece3d"
802 | dependencies = [
803 |  "unicode-ident",
804 | ]
805 | 
806 | [[package]]
807 | name = "web-time"
808 | version = "1.1.0"
809 | source = "registry+https://github.com/rust-lang/crates.io-index"
810 | checksum = "5a6580f308b1fad9207618087a65c04e7a10bc77e02c8e84e9b00dd4b12fa0bb"
811 | dependencies = [
812 |  "js-sys",
813 |  "wasm-bindgen",
814 | ]
815 | 
816 | [[package]]
817 | name = "winapi"
818 | version = "0.3.9"
819 | source = "registry+https://github.com/rust-lang/crates.io-index"
820 | checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
821 | dependencies = [
822 |  "winapi-i686-pc-windows-gnu",
823 |  "winapi-x86_64-pc-windows-gnu",
824 | ]
825 | 
826 | [[package]]
827 | name = "winapi-i686-pc-windows-gnu"
828 | version = "0.4.0"
829 | source = "registry+https://github.com/rust-lang/crates.io-index"
830 | checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
831 | 
832 | [[package]]
833 | name = "winapi-x86_64-pc-windows-gnu"
834 | version = "0.4.0"
835 | source = "registry+https://github.com/rust-lang/crates.io-index"
836 | checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
837 | 
838 | [[package]]
839 | name = "windows-sys"
840 | version = "0.59.0"
841 | source = "registry+https://github.com/rust-lang/crates.io-index"
842 | checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b"
843 | dependencies = [
844 |  "windows-targets",
845 | ]
846 | 
847 | [[package]]
848 | name = "windows-targets"
849 | version = "0.52.6"
850 | source = "registry+https://github.com/rust-lang/crates.io-index"
851 | checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
852 | dependencies = [
853 |  "windows_aarch64_gnullvm",
854 |  "windows_aarch64_msvc",
855 |  "windows_i686_gnu",
856 |  "windows_i686_gnullvm",
857 |  "windows_i686_msvc",
858 |  "windows_x86_64_gnu",
859 |  "windows_x86_64_gnullvm",
860 |  "windows_x86_64_msvc",
861 | ]
862 | 
863 | [[package]]
864 | name = "windows_aarch64_gnullvm"
865 | version = "0.52.6"
866 | source = "registry+https://github.com/rust-lang/crates.io-index"
867 | checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
868 | 
869 | [[package]]
870 | name = "windows_aarch64_msvc"
871 | version = "0.52.6"
872 | source = "registry+https://github.com/rust-lang/crates.io-index"
873 | checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
874 | 
875 | [[package]]
876 | name = "windows_i686_gnu"
877 | version = "0.52.6"
878 | source = "registry+https://github.com/rust-lang/crates.io-index"
879 | checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
880 | 
881 | [[package]]
882 | name = "windows_i686_gnullvm"
883 | version = "0.52.6"
884 | source = "registry+https://github.com/rust-lang/crates.io-index"
885 | checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
886 | 
887 | [[package]]
888 | name = "windows_i686_msvc"
889 | version = "0.52.6"
890 | source = "registry+https://github.com/rust-lang/crates.io-index"
891 | checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
892 | 
893 | [[package]]
894 | name = "windows_x86_64_gnu"
895 | version = "0.52.6"
896 | source = "registry+https://github.com/rust-lang/crates.io-index"
897 | checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
898 | 
899 | [[package]]
900 | name = "windows_x86_64_gnullvm"
901 | version = "0.52.6"
902 | source = "registry+https://github.com/rust-lang/crates.io-index"
903 | checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
904 | 
905 | [[package]]
906 | name = "windows_x86_64_msvc"
907 | version = "0.52.6"
908 | source = "registry+https://github.com/rust-lang/crates.io-index"
909 | checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
910 | 
911 | [[package]]
912 | name = "wit-bindgen-rt"
913 | version = "0.39.0"
914 | source = "registry+https://github.com/rust-lang/crates.io-index"
915 | checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1"
916 | dependencies = [
917 |  "bitflags",
918 | ]
919 | 
920 | [[package]]
921 | name = "xshell"
922 | version = "0.2.7"
923 | source = "registry+https://github.com/rust-lang/crates.io-index"
924 | checksum = "9e7290c623014758632efe00737145b6867b66292c42167f2ec381eb566a373d"
925 | dependencies = [
926 |  "xshell-macros",
927 | ]
928 | 
929 | [[package]]
930 | name = "xshell-macros"
931 | version = "0.2.7"
932 | source = "registry+https://github.com/rust-lang/crates.io-index"
933 | checksum = "32ac00cd3f8ec9c1d33fb3e7958a82df6989c42d747bd326c822b1d625283547"
934 | 
935 | [[package]]
936 | name = "xxhash-rust"
937 | version = "0.8.15"
938 | source = "registry+https://github.com/rust-lang/crates.io-index"
939 | checksum = "fdd20c5420375476fbd4394763288da7eb0cc0b8c11deed431a91562af7335d3"
940 | 
941 | [[package]]
942 | name = "zerocopy"
943 | version = "0.8.25"
944 | source = "registry+https://github.com/rust-lang/crates.io-index"
945 | checksum = "a1702d9583232ddb9174e01bb7c15a2ab8fb1bc6f227aa1233858c351a3ba0cb"
946 | dependencies = [
947 |  "zerocopy-derive",
948 | ]
949 | 
950 | [[package]]
951 | name = "zerocopy-derive"
952 | version = "0.8.25"
953 | source = "registry+https://github.com/rust-lang/crates.io-index"
954 | checksum = "28a6e20d751156648aa063f3800b706ee209a32c0b4d9f24be3d980b01be55ef"
955 | dependencies = [
956 |  "proc-macro2",
957 |  "quote",
958 |  "syn",
959 | ]
960 | 
961 | [[package]]
962 | name = "zstd-safe"
963 | version = "7.2.4"
964 | source = "registry+https://github.com/rust-lang/crates.io-index"
965 | checksum = "8f49c4d5f0abb602a93fb8736af2a4f4dd9512e36f7f570d66e65ff867ed3b9d"
966 | dependencies = [
967 |  "zstd-sys",
968 | ]
969 | 
970 | [[package]]
971 | name = "zstd-sys"
972 | version = "2.0.15+zstd.1.5.7"
973 | source = "registry+https://github.com/rust-lang/crates.io-index"
974 | checksum = "eb81183ddd97d0c74cedf1d50d85c8d08c1b8b68ee863bdee9e706eedba1a237"
975 | dependencies = [
976 |  "cc",
977 |  "pkg-config",
978 | ]
979 | 


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [workspace]
 2 | resolver = "3"
 3 | members = [
 4 |     "dwarfs",
 5 |     "dwarfs-test",
 6 |     "dwarfs-enc",
 7 | ]
 8 | 
 9 | [profile.bench]
10 | debug = "line-tables-only"
11 | 


--------------------------------------------------------------------------------
/LICENSE-APACHE:
--------------------------------------------------------------------------------
  1 |                               Apache License
  2 |                         Version 2.0, January 2004
  3 |                      http://www.apache.org/licenses/
  4 | 
  5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 | 1. Definitions.
  8 | 
  9 |    "License" shall mean the terms and conditions for use, reproduction,
 10 |    and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |    "Licensor" shall mean the copyright owner or entity authorized by
 13 |    the copyright owner that is granting the License.
 14 | 
 15 |    "Legal Entity" shall mean the union of the acting entity and all
 16 |    other entities that control, are controlled by, or are under common
 17 |    control with that entity. For the purposes of this definition,
 18 |    "control" means (i) the power, direct or indirect, to cause the
 19 |    direction or management of such entity, whether by contract or
 20 |    otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |    outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |    "You" (or "Your") shall mean an individual or Legal Entity
 24 |    exercising permissions granted by this License.
 25 | 
 26 |    "Source" form shall mean the preferred form for making modifications,
 27 |    including but not limited to software source code, documentation
 28 |    source, and configuration files.
 29 | 
 30 |    "Object" form shall mean any form resulting from mechanical
 31 |    transformation or translation of a Source form, including but
 32 |    not limited to compiled object code, generated documentation,
 33 |    and conversions to other media types.
 34 | 
 35 |    "Work" shall mean the work of authorship, whether in Source or
 36 |    Object form, made available under the License, as indicated by a
 37 |    copyright notice that is included in or attached to the work
 38 |    (an example is provided in the Appendix below).
 39 | 
 40 |    "Derivative Works" shall mean any work, whether in Source or Object
 41 |    form, that is based on (or derived from) the Work and for which the
 42 |    editorial revisions, annotations, elaborations, or other modifications
 43 |    represent, as a whole, an original work of authorship. For the purposes
 44 |    of this License, Derivative Works shall not include works that remain
 45 |    separable from, or merely link (or bind by name) to the interfaces of,
 46 |    the Work and Derivative Works thereof.
 47 | 
 48 |    "Contribution" shall mean any work of authorship, including
 49 |    the original version of the Work and any modifications or additions
 50 |    to that Work or Derivative Works thereof, that is intentionally
 51 |    submitted to Licensor for inclusion in the Work by the copyright owner
 52 |    or by an individual or Legal Entity authorized to submit on behalf of
 53 |    the copyright owner. For the purposes of this definition, "submitted"
 54 |    means any form of electronic, verbal, or written communication sent
 55 |    to the Licensor or its representatives, including but not limited to
 56 |    communication on electronic mailing lists, source code control systems,
 57 |    and issue tracking systems that are managed by, or on behalf of, the
 58 |    Licensor for the purpose of discussing and improving the Work, but
 59 |    excluding communication that is conspicuously marked or otherwise
 60 |    designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |    "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |    on behalf of whom a Contribution has been received by Licensor and
 64 |    subsequently incorporated within the Work.
 65 | 
 66 | 2. Grant of Copyright License. Subject to the terms and conditions of
 67 |    this License, each Contributor hereby grants to You a perpetual,
 68 |    worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |    copyright license to reproduce, prepare Derivative Works of,
 70 |    publicly display, publicly perform, sublicense, and distribute the
 71 |    Work and such Derivative Works in Source or Object form.
 72 | 
 73 | 3. Grant of Patent License. Subject to the terms and conditions of
 74 |    this License, each Contributor hereby grants to You a perpetual,
 75 |    worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |    (except as stated in this section) patent license to make, have made,
 77 |    use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |    where such license applies only to those patent claims licensable
 79 |    by such Contributor that are necessarily infringed by their
 80 |    Contribution(s) alone or by combination of their Contribution(s)
 81 |    with the Work to which such Contribution(s) was submitted. If You
 82 |    institute patent litigation against any entity (including a
 83 |    cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |    or a Contribution incorporated within the Work constitutes direct
 85 |    or contributory patent infringement, then any patent licenses
 86 |    granted to You under this License for that Work shall terminate
 87 |    as of the date such litigation is filed.
 88 | 
 89 | 4. Redistribution. You may reproduce and distribute copies of the
 90 |    Work or Derivative Works thereof in any medium, with or without
 91 |    modifications, and in Source or Object form, provided that You
 92 |    meet the following conditions:
 93 | 
 94 |    (a) You must give any other recipients of the Work or
 95 |        Derivative Works a copy of this License; and
 96 | 
 97 |    (b) You must cause any modified files to carry prominent notices
 98 |        stating that You changed the files; and
 99 | 
100 |    (c) You must retain, in the Source form of any Derivative Works
101 |        that You distribute, all copyright, patent, trademark, and
102 |        attribution notices from the Source form of the Work,
103 |        excluding those notices that do not pertain to any part of
104 |        the Derivative Works; and
105 | 
106 |    (d) If the Work includes a "NOTICE" text file as part of its
107 |        distribution, then any Derivative Works that You distribute must
108 |        include a readable copy of the attribution notices contained
109 |        within such NOTICE file, excluding those notices that do not
110 |        pertain to any part of the Derivative Works, in at least one
111 |        of the following places: within a NOTICE text file distributed
112 |        as part of the Derivative Works; within the Source form or
113 |        documentation, if provided along with the Derivative Works; or,
114 |        within a display generated by the Derivative Works, if and
115 |        wherever such third-party notices normally appear. The contents
116 |        of the NOTICE file are for informational purposes only and
117 |        do not modify the License. You may add Your own attribution
118 |        notices within Derivative Works that You distribute, alongside
119 |        or as an addendum to the NOTICE text from the Work, provided
120 |        that such additional attribution notices cannot be construed
121 |        as modifying the License.
122 | 
123 |    You may add Your own copyright statement to Your modifications and
124 |    may provide additional or different license terms and conditions
125 |    for use, reproduction, or distribution of Your modifications, or
126 |    for any such Derivative Works as a whole, provided Your use,
127 |    reproduction, and distribution of the Work otherwise complies with
128 |    the conditions stated in this License.
129 | 
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 |    any Contribution intentionally submitted for inclusion in the Work
132 |    by You to the Licensor shall be under the terms and conditions of
133 |    this License, without any additional terms or conditions.
134 |    Notwithstanding the above, nothing herein shall supersede or modify
135 |    the terms of any separate license agreement you may have executed
136 |    with Licensor regarding such Contributions.
137 | 
138 | 6. Trademarks. This License does not grant permission to use the trade
139 |    names, trademarks, service marks, or product names of the Licensor,
140 |    except as required for reasonable and customary use in describing the
141 |    origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 |    agreed to in writing, Licensor provides the Work (and each
145 |    Contributor provides its Contributions) on an "AS IS" BASIS,
146 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |    implied, including, without limitation, any warranties or conditions
148 |    of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |    PARTICULAR PURPOSE. You are solely responsible for determining the
150 |    appropriateness of using or redistributing the Work and assume any
151 |    risks associated with Your exercise of permissions under this License.
152 | 
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 |    whether in tort (including negligence), contract, or otherwise,
155 |    unless required by applicable law (such as deliberate and grossly
156 |    negligent acts) or agreed to in writing, shall any Contributor be
157 |    liable to You for damages, including any direct, indirect, special,
158 |    incidental, or consequential damages of any character arising as a
159 |    result of this License or out of the use or inability to use the
160 |    Work (including but not limited to damages for loss of goodwill,
161 |    work stoppage, computer failure or malfunction, or any and all
162 |    other commercial damages or losses), even if such Contributor
163 |    has been advised of the possibility of such damages.
164 | 
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 |    the Work or Derivative Works thereof, You may choose to offer,
167 |    and charge a fee for, acceptance of support, warranty, indemnity,
168 |    or other liability obligations and/or rights consistent with this
169 |    License. However, in accepting such obligations, You may act only
170 |    on Your own behalf and on Your sole responsibility, not on behalf
171 |    of any other Contributor, and only if You agree to indemnify,
172 |    defend, and hold each Contributor harmless for any liability
173 |    incurred by, or claims asserted against, such Contributor by reason
174 |    of your accepting any such warranty or additional liability.
175 | 
176 | END OF TERMS AND CONDITIONS


--------------------------------------------------------------------------------
/LICENSE-MIT:
--------------------------------------------------------------------------------
 1 | Permission is hereby granted, free of charge, to any
 2 | person obtaining a copy of this software and associated
 3 | documentation files (the "Software"), to deal in the
 4 | Software without restriction, including without
 5 | limitation the rights to use, copy, modify, merge,
 6 | publish, distribute, sublicense, and/or sell copies of
 7 | the Software, and to permit persons to whom the Software
 8 | is furnished to do so, subject to the following
 9 | conditions:
10 | 
11 | The above copyright notice and this permission notice
12 | shall be included in all copies or substantial portions
13 | of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
16 | ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
17 | TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
18 | PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
19 | SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
22 | IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23 | DEALINGS IN THE SOFTWARE.
24 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # dwarfs
 2 | 
 3 | [![crates.io](https://img.shields.io/crates/v/dwarfs?label=dwarfs)](https://crates.io/crates/dwarfs)
 4 | [![docs.rs](https://img.shields.io/docsrs/dwarfs?label=docs.rs%2Fdwarfs)](https://docs.rs/dwarfs)
 5 | [![crates.io](https://img.shields.io/crates/v/dwarfs-enc?label=dwarfs-enc)](https://crates.io/crates/dwarfs-enc)
 6 | [![docs.rs](https://img.shields.io/docsrs/dwarfs-enc?label=docs.rs%2Fdwarfs-enc)](https://docs.rs/dwarfs-enc)
 7 | 
 8 | Libraries for reading and writing [DwarFS][dwarfs] archives (aka. DwarFS images),
 9 | in pure Rust without `unsafe`.
10 | 
11 | #### License
12 | 
13 | TL;DR: We mostly follow [upstream][dwarfs]: the package for constructing
14 | DwarFS archives (dwarfs-enc) is GPL-3.0. Other code is "(MIT OR Apache-2.0)".
15 | 
16 | Long version:
17 | 
18 | All files under directory `dwarfs-enc` are licensed under GNU General Public
19 | License, version 3. Check `./dwarfs-enc/README.md` and `./LICENSE-GPL-3.0` for
20 | details.
21 | 
22 | Other files in this repository outside `dwarfs-enc`, including `dwarfs` and
23 | `dwarfs-test` packages, are licensed under Apache
24 | License 2.0 or MIT license at your option. Check `./dwarfs/README.md`,
25 | `./LICENSE-APACHE` and `./LICENSE-MIT` for details.
26 | 
27 | [dwarfs]: https://github.com/mhx/dwarfs
28 | 


--------------------------------------------------------------------------------
/clippy.toml:
--------------------------------------------------------------------------------
1 | doc-valid-idents = ["DwarFS", ".."]


--------------------------------------------------------------------------------
/dwarfs-enc/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "dwarfs-enc"
 3 | version = "0.1.0"
 4 | edition = "2024"
 5 | license = "GPL-3.0-only"
 6 | description = "A library for writing DwarFS archives (aka. DwarFS images)"
 7 | keywords = ["dwarfs", "archive", "compression"]
 8 | categories = ["compression", "filesystem"]
 9 | repository = "https://github.com/oxalica/dwarfs-rs"
10 | 
11 | [features]
12 | default = ["zstd"]
13 | 
14 | zstd = ["dep:zstd-safe"]
15 | lzma = ["dep:liblzma"]
16 | 
17 | [dependencies]
18 | crossbeam-channel = "0.5.15"
19 | dwarfs = { version = "0.2.1", path = "../dwarfs", features = ["serialize"] }
20 | indexmap = "2.9.0"
21 | liblzma = { version = "0.4.1", optional = true }
22 | rustic_cdc = "0.3.1"
23 | rustix = { version = "1.0.7", features = ["fs"] }
24 | serde = "1.0.219"
25 | sha2 = "0.10.9"
26 | zerocopy = { version = "0.8.25", features = ["derive", "std"] }
27 | zstd-safe = { version = "7.2.4", default-features = false, optional = true }
28 | 
29 | [dev-dependencies]
30 | clap = { version = "4.5.39", features = ["derive"] }
31 | indicatif = "0.17.11"
32 | 
33 | [[example]]
34 | name = "mkdwarfs"
35 | required-features = ["zstd", "lzma"]
36 | 
37 | [lints.clippy]
38 | dbg-macro = "warn"
39 | todo = "warn"
40 | print-stdout = "warn"
41 | print-stderr = "warn"


--------------------------------------------------------------------------------
/dwarfs-enc/LICENSE-GPL-3.0:
--------------------------------------------------------------------------------
1 | ../LICENSE-GPL-3.0


--------------------------------------------------------------------------------
/dwarfs-enc/README.md:
--------------------------------------------------------------------------------
 1 | # dwarfs-enc
 2 | 
 3 | [![crates.io](https://img.shields.io/crates/v/dwarfs-enc)](https://crates.io/crates/dwarfs-enc)
 4 | [![docs.rs](https://img.shields.io/docsrs/dwarfs-enc)](https://docs.rs/dwarfs-enc)
 5 | 
 6 | A library for writing [DwarFS][dwarfs] archives (aka. DwarFS images),
 7 | building on top of [`dwarfs` crate][dwarfs-rs].
 8 | 
 9 | [dwarfs]: https://github.com/mhx/dwarfs
10 | [dwarfs-rs]: https://crates.io/crates/dwarfs
11 | 
12 | #### License
13 | 
14 | SPDX-License-Identifier: GPL-3.0-only
15 | 
16 | Copyright (C) 2025 Oxalica
17 | 
18 | This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, version 3.
19 | 
20 | This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
21 | 
22 | You should have received a copy of the GNU General Public License along with this program. If not, see <https://www.gnu.org/licenses/>. 
23 | 


--------------------------------------------------------------------------------
/dwarfs-enc/examples/mkdwarfs.rs:
--------------------------------------------------------------------------------
  1 | #![expect(clippy::print_stderr, reason = "allowed in examples")]
  2 | use std::{
  3 |     borrow::Cow,
  4 |     fs,
  5 |     path::{Path, PathBuf},
  6 |     time::Instant,
  7 | };
  8 | 
  9 | use dwarfs_enc::{
 10 |     chunker::{self, Chunker},
 11 |     metadata::{Builder as MetadataBuilder, InodeMetadata},
 12 |     section::{self, CompressParam},
 13 | };
 14 | use indicatif::{HumanBytes, HumanCount, MultiProgress, ProgressBar, ProgressStyle};
 15 | 
 16 | #[derive(Debug, clap::Parser)]
 17 | struct Cli {
 18 |     #[arg(short, long)]
 19 |     input: PathBuf,
 20 |     #[arg(short, long)]
 21 |     output: PathBuf,
 22 | 
 23 |     #[arg(short, long)]
 24 |     force: bool,
 25 | 
 26 |     #[arg(long, conflicts_with = "lzma")]
 27 |     zstd: Option<i32>,
 28 |     #[arg(long)]
 29 |     lzma: Option<u32>,
 30 | }
 31 | 
 32 | fn main() -> Result<(), Box<dyn std::error::Error>> {
 33 |     let cli: Cli = clap::Parser::parse();
 34 | 
 35 |     let inst = Instant::now();
 36 | 
 37 |     let fout = fs::OpenOptions::new()
 38 |         .write(true)
 39 |         .create(true)
 40 |         .truncate(true)
 41 |         .create_new(!cli.force)
 42 |         .open(&cli.output)?;
 43 | 
 44 |     let root_meta = fs::metadata(&cli.input)?;
 45 |     let root_meta = InodeMetadata::from(&root_meta);
 46 | 
 47 |     let stat = {
 48 |         let progress = ProgressBar::new_spinner();
 49 |         let mut stat = Stats::default();
 50 |         traverse_stats(&cli.input, &mut stat, &progress)?;
 51 |         progress.finish();
 52 |         stat
 53 |     };
 54 | 
 55 |     let compress = match (cli.zstd, cli.lzma) {
 56 |         (None, None) => CompressParam::None,
 57 |         (Some(zstd), None) => CompressParam::Zstd(zstd),
 58 |         (None, Some(lzma)) => CompressParam::Lzma(lzma),
 59 |         _ => unreachable!(),
 60 |     };
 61 |     eprintln!("using compression: {compress:?}");
 62 | 
 63 |     let pb_in_bytes = ProgressBar::new(stat.total_bytes).with_style(
 64 |         ProgressStyle::with_template(
 65 |             "input : {binary_bytes}/{binary_total_bytes} ({binary_bytes_per_sec}) {wide_bar}",
 66 |         )
 67 |         .unwrap(),
 68 |     );
 69 |     let pb_out_bytes = ProgressBar::no_length()
 70 |         .with_style(ProgressStyle::with_template("output: {binary_bytes} {spinner}").unwrap());
 71 |     let fout_pb = pb_out_bytes.wrap_write(&fout);
 72 | 
 73 |     let pbs = MultiProgress::new();
 74 |     pbs.add(pb_in_bytes.clone());
 75 |     pbs.add(pb_out_bytes.clone());
 76 | 
 77 |     // Make bars visible now, or there would be a delay on the second bar,
 78 |     // because block compression takes quite some time to finish.
 79 |     pb_in_bytes.tick();
 80 |     pb_out_bytes.tick();
 81 | 
 82 |     let mut builder = MetadataBuilder::new(&root_meta);
 83 |     let writer = section::Writer::new(fout_pb)?;
 84 |     let chunker = chunker::BasicChunker::new(writer, builder.block_size(), compress);
 85 |     let mut chunker = chunker::CdcChunker::new(chunker);
 86 | 
 87 |     build_archive(&mut builder, &mut chunker, &cli.input, &pb_in_bytes)?;
 88 | 
 89 |     pb_in_bytes.finish();
 90 |     pbs.println(format!(
 91 |         "deduplicated {}",
 92 |         HumanBytes(chunker.deduplicated_bytes()),
 93 |     ))?;
 94 | 
 95 |     pbs.println("finalizing metadata")?;
 96 |     let mut w = chunker.finish()?;
 97 |     w.write_metadata_sections(&builder.finish()?, compress)?;
 98 | 
 99 |     pbs.println("waiting for compression to finish")?;
100 |     w.finish()?;
101 |     pb_out_bytes.finish();
102 | 
103 |     let output_len = fout.metadata()?.len();
104 | 
105 |     let elapsed = inst.elapsed();
106 |     eprintln!(
107 |         "completed in {:?}, with compression ratio {:.2}%",
108 |         elapsed,
109 |         output_len as f32 / stat.total_bytes as f32 * 100.0,
110 |     );
111 | 
112 |     Ok(())
113 | }
114 | 
115 | #[derive(Debug, Default)]
116 | struct Stats {
117 |     files: u64,
118 |     total_bytes: u64,
119 | }
120 | 
121 | fn traverse_stats(
122 |     root_path: &Path,
123 |     stat: &mut Stats,
124 |     progress: &ProgressBar,
125 | ) -> std::io::Result<()> {
126 |     for ent in fs::read_dir(root_path)? {
127 |         let ent = ent?;
128 |         let ft = ent.file_type()?;
129 |         if ft.is_dir() {
130 |             traverse_stats(&ent.path(), stat, progress)?;
131 |         } else if ft.is_file() {
132 |             stat.files += 1;
133 |             stat.total_bytes += fs::symlink_metadata(ent.path())?.len();
134 | 
135 |             if stat.files % 1024 == 0 {
136 |                 progress.set_message(format!(
137 |                     "found {} files, total {}",
138 |                     HumanCount(stat.files),
139 |                     HumanBytes(stat.total_bytes),
140 |                 ));
141 |             }
142 |         }
143 |     }
144 |     Ok(())
145 | }
146 | 
147 | fn build_archive(
148 |     meta_builder: &mut MetadataBuilder,
149 |     chunker: &mut dyn Chunker,
150 |     root_path: &Path,
151 |     pb_in_bytes: &ProgressBar,
152 | ) -> dwarfs_enc::Result<()> {
153 |     let mut stack = Vec::new();
154 |     stack.push((
155 |         meta_builder.root(),
156 |         root_path.to_owned(),
157 |         fs::read_dir(root_path)?,
158 |     ));
159 | 
160 |     while let Some(&mut (dir, ref dir_path, ref mut iter)) = stack.last_mut() {
161 |         let Some(ent) = iter.next().transpose()? else {
162 |             stack.pop();
163 |             continue;
164 |         };
165 | 
166 |         let name = ent.file_name();
167 |         let name_str = name.to_string_lossy();
168 |         if matches!(name_str, Cow::Owned(_)) {
169 |             eprintln!("normalized non-UTF-8 name: {name:?} -> {name_str:?}");
170 |         }
171 |         let subpath = dir_path.join(&name);
172 | 
173 |         let ft = ent.file_type()?;
174 |         let os_meta = ent.metadata()?;
175 |         let inode_meta = InodeMetadata::from(&os_meta);
176 | 
177 |         if ft.is_dir() {
178 |             let subdir = meta_builder.put_dir(dir, &name_str, &inode_meta)?;
179 |             let subiter = fs::read_dir(&subpath)?;
180 |             stack.push((subdir, subpath, subiter));
181 |         } else if ft.is_file() {
182 |             let os_file = fs::File::open(&subpath)?;
183 |             let chunks = chunker.put_reader(&mut pb_in_bytes.wrap_read(os_file))?;
184 |             meta_builder.put_file(dir, &name_str, &inode_meta, chunks)?;
185 |         } else if ft.is_symlink() {
186 |             let target = fs::read_link(&subpath)?;
187 |             let target_str = target.to_string_lossy();
188 |             if matches!(target_str, Cow::Owned(_)) {
189 |                 eprintln!("normalized non-UTF-8 symlink target: {target:?} -> {target_str:?}");
190 |             }
191 |             meta_builder.put_symlink(dir, &name_str, &inode_meta, &target_str)?;
192 |         } else {
193 |             eprintln!(
194 |                 "ignore unsupported file type {:?} for path: {}",
195 |                 ft,
196 |                 subpath.display(),
197 |             );
198 |         }
199 |     }
200 |     Ok(())
201 | }
202 | 


--------------------------------------------------------------------------------
/dwarfs-enc/src/chunker.rs:
--------------------------------------------------------------------------------
  1 | //! File data slicing and/or deduplication.
  2 | use std::{
  3 |     collections::{HashMap, hash_map::Entry},
  4 |     fmt,
  5 |     io::{Read, Write},
  6 |     num::NonZero,
  7 | };
  8 | 
  9 | use dwarfs::section::SectionType;
 10 | use rustic_cdc::{Rabin64, RollingHash64};
 11 | use sha2::{Digest, Sha512_256};
 12 | 
 13 | use crate::{
 14 |     Error, Result,
 15 |     metadata::Chunk,
 16 |     section::{self, CompressParam},
 17 | };
 18 | 
 19 | type Chunks = Vec<Chunk>;
 20 | 
 21 | /// Algorithm to slice and/or deduplicate file content.
 22 | pub trait Chunker {
 23 |     /// Put data via a [`Read`] instance into the archive, and return the
 24 |     /// chunking result ready for [`crate::metadata::Builder::put_file`].
 25 |     fn put_reader(&mut self, rdr: &mut dyn Read) -> Result<Chunks>;
 26 | 
 27 |     /// Put in-memory data into the archive.
 28 |     ///
 29 |     /// This is a shortcut to [`Chunker::put_reader`].
 30 |     fn put_bytes(&mut self, mut bytes: &[u8]) -> Result<Chunks> {
 31 |         self.put_reader(&mut bytes)
 32 |     }
 33 | }
 34 | 
 35 | /// The simplest chunker to concat all files and slice data at block size.
 36 | ///
 37 | /// This does no deduplication.
 38 | pub struct BasicChunker<W> {
 39 |     buf: Box<[u8]>,
 40 |     buf_len: usize,
 41 |     compression: CompressParam,
 42 |     w: section::Writer<W>,
 43 | }
 44 | 
 45 | impl<W: fmt::Debug> fmt::Debug for BasicChunker<W> {
 46 |     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
 47 |         f.debug_struct("BasicChunker")
 48 |             .field("buf", &format_args!("{}/{}", self.buf_len, self.buf.len()))
 49 |             .field("compression", &self.compression)
 50 |             .field("w", &self.w)
 51 |             .finish()
 52 |     }
 53 | }
 54 | 
 55 | impl<W> BasicChunker<W> {
 56 |     /// Create a basic chunker with given section writer and parameters.
 57 |     ///
 58 |     /// Note: `block_size` must match the block size configured for
 59 |     /// [`crate::metadata::Builder`]. You should always get it from
 60 |     /// [`crate::metadata::Builder::block_size`].
 61 |     pub fn new(
 62 |         w: section::Writer<W>,
 63 |         block_size: NonZero<u32>,
 64 |         compression: CompressParam,
 65 |     ) -> Self {
 66 |         Self {
 67 |             buf: vec![0u8; block_size.get() as usize].into_boxed_slice(),
 68 |             buf_len: 0,
 69 |             compression,
 70 |             w,
 71 |         }
 72 |     }
 73 | 
 74 |     /// Finalize data chunks and get back the underlying section writer.
 75 |     pub fn finish(mut self) -> Result<section::Writer<W>>
 76 |     where
 77 |         W: Write,
 78 |     {
 79 |         if self.buf_len != 0 {
 80 |             self.w.write_section(
 81 |                 SectionType::BLOCK,
 82 |                 self.compression,
 83 |                 &self.buf[..self.buf_len],
 84 |             )?;
 85 |             self.buf_len = 0;
 86 |         }
 87 |         Ok(self.w)
 88 |     }
 89 | 
 90 |     fn put_reader_inner(&mut self, rdr: &mut dyn Read) -> Result<SeqChunks>
 91 |     where
 92 |         W: Write,
 93 |     {
 94 |         let mut chunks = SeqChunks {
 95 |             start_section_idx: self.w.section_count(),
 96 |             start_offset: self.buf_len as u32,
 97 |             len: 0,
 98 |         };
 99 |         loop {
100 |             while self.buf_len < self.buf.len() {
101 |                 match rdr.read(&mut self.buf[self.buf_len..]) {
102 |                     Ok(0) => return Ok(chunks),
103 |                     Ok(n) => {
104 |                         self.buf_len += n;
105 |                         chunks.len += n as u64;
106 |                     }
107 |                     Err(err) if err.kind() == std::io::ErrorKind::Interrupted => continue,
108 |                     Err(err) => return Err(err.into()),
109 |                 }
110 |             }
111 | 
112 |             debug_assert_eq!(self.buf_len, self.buf.len());
113 |             self.w
114 |                 .write_section(SectionType::BLOCK, self.compression, &self.buf)?;
115 |             self.buf_len = 0;
116 |         }
117 |     }
118 | }
119 | 
120 | #[derive(Debug, Clone, Copy)]
121 | struct SeqChunks {
122 |     start_section_idx: u32,
123 |     start_offset: u32,
124 |     len: u64,
125 | }
126 | 
127 | impl SeqChunks {
128 |     fn to_chunks(mut self, block_size: u32) -> impl Iterator<Item = Chunk> {
129 |         std::iter::from_fn(move || {
130 |             let rest_len = block_size - self.start_offset;
131 |             if self.len == 0 {
132 |                 None
133 |             } else if self.len <= u64::from(rest_len) {
134 |                 let c = Chunk {
135 |                     section_idx: self.start_section_idx,
136 |                     offset: self.start_offset,
137 |                     size: self.len as u32,
138 |                 };
139 |                 self.len = 0;
140 |                 Some(c)
141 |             } else {
142 |                 let c = Chunk {
143 |                     section_idx: self.start_section_idx,
144 |                     offset: self.start_offset,
145 |                     size: rest_len,
146 |                 };
147 |                 self.len -= u64::from(rest_len);
148 |                 self.start_section_idx += 1;
149 |                 self.start_offset = 0;
150 |                 Some(c)
151 |             }
152 |         })
153 |     }
154 | }
155 | 
156 | impl<W: Write> Chunker for BasicChunker<W> {
157 |     fn put_reader(&mut self, rdr: &mut dyn Read) -> Result<Chunks> {
158 |         let seq = self.put_reader_inner(rdr)?;
159 |         Ok(seq.to_chunks(self.buf.len() as u32).collect())
160 |     }
161 | }
162 | 
163 | /// The deduplicating chunker using Content Defined Chunking (CDC).
164 | ///
165 | /// The exact algorithm used may change. Currently it uses [rustic_cdc].
166 | pub struct CdcChunker<W> {
167 |     inner: BasicChunker<W>,
168 |     // TODO: This struct is too large.
169 |     rabin: Rabin64,
170 |     chunk_buf: Box<[u8]>,
171 | 
172 |     table: HashMap<u64, CdcChunk>,
173 |     deduplicated_bytes: u64,
174 | }
175 | 
176 | impl<W: fmt::Debug> fmt::Debug for CdcChunker<W> {
177 |     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
178 |         f.debug_struct("CdcChunker")
179 |             .field("inner", &self.inner)
180 |             .field("table_size", &self.table.len())
181 |             .field("deduplicated_bytes", &self.deduplicated_bytes)
182 |             .finish_non_exhaustive()
183 |     }
184 | }
185 | 
186 | struct CdcChunk {
187 |     sha256_suffix: [u8; 24],
188 |     start_section_idx: u32,
189 |     start_offset: u32,
190 | }
191 | 
192 | impl<W> CdcChunker<W> {
193 |     const WINDOW_SIZE_BITS: u32 = 6;
194 |     const WINDOW_SIZE: usize = 1usize << Self::WINDOW_SIZE_BITS;
195 |     const CUT_MASK: u64 = (1u64 << 11) - 1;
196 |     const MIN_CHUNK_SIZE: usize = Self::WINDOW_SIZE;
197 |     const MAX_CHUNK_SIZE: usize = 64 << 10;
198 | 
199 |     /// Create the deduplicating chunker on top of a [`BasicChunker`].
200 |     pub fn new(inner: BasicChunker<W>) -> Self {
201 |         let rabin = Rabin64::new(Self::WINDOW_SIZE_BITS);
202 |         CdcChunker {
203 |             inner,
204 |             rabin,
205 |             chunk_buf: vec![0u8; Self::MAX_CHUNK_SIZE].into_boxed_slice(),
206 |             table: HashMap::new(),
207 |             deduplicated_bytes: 0,
208 |         }
209 |     }
210 | 
211 |     /// Get the total deduplicated bytes.
212 |     pub fn deduplicated_bytes(&self) -> u64 {
213 |         self.deduplicated_bytes
214 |     }
215 | 
216 |     /// Finalize data chunks and get back the underlying section writer.
217 |     pub fn finish(self) -> Result<section::Writer<W>>
218 |     where
219 |         W: Write,
220 |     {
221 |         self.inner.finish()
222 |     }
223 | }
224 | 
225 | impl<W: Write> Chunker for CdcChunker<W> {
226 |     fn put_reader(&mut self, rdr: &mut dyn Read) -> Result<Chunks> {
227 |         let block_size = self.inner.buf.len() as u32;
228 | 
229 |         let mut chunks = Chunks::new();
230 |         let mut record_chunk = |cdchunk: &[u8]| {
231 |             debug_assert_ne!(cdchunk.len(), 0);
232 | 
233 |             let hash = Sha512_256::new_with_prefix(cdchunk).finalize();
234 |             let (&hash_prefix, hash_suffix) = hash.split_first_chunk::<8>().expect("hash is 32B");
235 |             let hash_suffix: [u8; 24] = hash_suffix.try_into().expect("hash is 32B");
236 | 
237 |             let seq = match self.table.entry(u64::from_ne_bytes(hash_prefix)) {
238 |                 Entry::Vacant(ent) => {
239 |                     let seq = self.inner.put_reader_inner(&mut { cdchunk })?;
240 |                     ent.insert(CdcChunk {
241 |                         sha256_suffix: hash_suffix,
242 |                         start_section_idx: seq.start_section_idx,
243 |                         start_offset: seq.start_offset,
244 |                     });
245 |                     seq
246 |                 }
247 |                 Entry::Occupied(ent) if ent.get().sha256_suffix == hash_suffix => {
248 |                     self.deduplicated_bytes += cdchunk.len() as u64;
249 |                     SeqChunks {
250 |                         start_section_idx: ent.get().start_section_idx,
251 |                         start_offset: ent.get().start_offset,
252 |                         len: cdchunk.len() as u64,
253 |                     }
254 |                 }
255 |                 // Hash prefix collision.
256 |                 Entry::Occupied(_) => self.inner.put_reader_inner(&mut { cdchunk })?,
257 |             };
258 | 
259 |             // Merge chunks if possible.
260 |             for c in seq.to_chunks(block_size) {
261 |                 if let Some(p) = chunks
262 |                     .last_mut()
263 |                     .filter(|p| (p.section_idx, p.offset + p.size) == (c.section_idx, c.offset))
264 |                 {
265 |                     p.size += c.size;
266 |                 } else {
267 |                     chunks.push(c);
268 |                 }
269 |             }
270 | 
271 |             Ok::<_, Error>(())
272 |         };
273 | 
274 |         self.rabin.reset();
275 | 
276 |         // |               chunk_buf                            |
277 |         // | ...chunk | chunk | partial chunk | next read | ... |
278 |         //                    ^cut_pos        ^end_pos
279 |         //                                     ~~~~~~~~~~~ read_len
280 |         let mut cut_pos = 0usize;
281 |         let mut end_pos = 0usize;
282 |         loop {
283 |             assert_ne!(end_pos, self.chunk_buf.len());
284 |             let read_len = match rdr.read(&mut self.chunk_buf[end_pos..]) {
285 |                 Ok(0) => break,
286 |                 Ok(n) => n,
287 |                 Err(err) if err.kind() == std::io::ErrorKind::Interrupted => continue,
288 |                 Err(err) => return Err(err.into()),
289 |             };
290 | 
291 |             for (&b, pos) in self.chunk_buf[end_pos..end_pos + read_len]
292 |                 .iter()
293 |                 .zip(end_pos..)
294 |             {
295 |                 self.rabin.slide(b);
296 |                 // This is the length of the whole chunk, including previous partial data.
297 |                 // NB. the current byte at `pos` is included, hereby `+1`.
298 |                 let len = pos - cut_pos + 1;
299 | 
300 |                 // The `MIN_CHUNK_SIZE` guarantees the sliding window is always filled.
301 |                 if len >= Self::MIN_CHUNK_SIZE && self.rabin.hash & Self::CUT_MASK == Self::CUT_MASK
302 |                     || len >= Self::MAX_CHUNK_SIZE
303 |                 {
304 |                     let cdchunk = &self.chunk_buf[cut_pos..pos];
305 |                     cut_pos = pos;
306 |                     record_chunk(cdchunk)?;
307 |                 }
308 |             }
309 |             end_pos += read_len;
310 | 
311 |             // Shift-down the last partial chunk if we reached the end of buffer.
312 |             // For files smaller than `MAX_CHUNK_SIZE`, this path is never entered.
313 |             if end_pos >= self.chunk_buf.len() {
314 |                 debug_assert_eq!(end_pos, self.chunk_buf.len());
315 |                 self.chunk_buf.copy_within(cut_pos.., 0);
316 |                 end_pos -= cut_pos;
317 |                 cut_pos = 0;
318 |             }
319 |         }
320 | 
321 |         if cut_pos < end_pos {
322 |             record_chunk(&self.chunk_buf[cut_pos..end_pos])?;
323 |         }
324 | 
325 |         Ok(chunks)
326 |     }
327 | }
328 | 


--------------------------------------------------------------------------------
/dwarfs-enc/src/error.rs:
--------------------------------------------------------------------------------
 1 | use std::fmt;
 2 | 
 3 | /// A `Result` with default error [`Error`].
 4 | pub type Result<T, E = Error> = std::result::Result<T, E>;
 5 | 
 6 | /// An error representing any possible error raised from this crate.
 7 | pub struct Error(Box<ErrorInner>);
 8 | 
 9 | #[derive(Debug)]
10 | #[cfg_attr(not(feature = "default"), allow(dead_code))]
11 | pub(crate) enum ErrorInner {
12 |     Limit(&'static str),
13 |     SerializeMetadata(dwarfs::metadata::Error),
14 |     DuplicatedEntry,
15 |     Compress(std::io::Error),
16 | 
17 |     Io(std::io::Error),
18 | }
19 | 
20 | impl fmt::Debug for Error {
21 |     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
22 |         self.0.fmt(f)
23 |     }
24 | }
25 | 
26 | impl fmt::Display for Error {
27 |     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
28 |         match &*self.0 {
29 |             ErrorInner::DuplicatedEntry => f.pad("duplicated entry names in a directory"),
30 |             ErrorInner::Limit(msg) => write!(f, "{msg}"),
31 |             ErrorInner::SerializeMetadata(err) => err.fmt(f),
32 |             ErrorInner::Compress(err) | ErrorInner::Io(err) => err.fmt(f),
33 |         }
34 |     }
35 | }
36 | 
37 | impl std::error::Error for Error {
38 |     fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
39 |         match &*self.0 {
40 |             ErrorInner::Compress(err) | ErrorInner::Io(err) => Some(err),
41 |             ErrorInner::SerializeMetadata(err) => Some(err),
42 |             _ => None,
43 |         }
44 |     }
45 | }
46 | 
47 | impl From<ErrorInner> for Error {
48 |     #[cold]
49 |     fn from(err: ErrorInner) -> Self {
50 |         Self(Box::new(err))
51 |     }
52 | }
53 | 
54 | impl From<std::io::Error> for Error {
55 |     #[cold]
56 |     fn from(err: std::io::Error) -> Self {
57 |         Self(Box::new(ErrorInner::Io(err)))
58 |     }
59 | }
60 | 
61 | impl From<dwarfs::metadata::Error> for Error {
62 |     #[cold]
63 |     fn from(err: dwarfs::metadata::Error) -> Self {
64 |         Self(Box::new(ErrorInner::SerializeMetadata(err)))
65 |     }
66 | }
67 | 


--------------------------------------------------------------------------------
/dwarfs-enc/src/lib.rs:
--------------------------------------------------------------------------------
 1 | //! A library for writing [DwarFS][dwarfs] archives (aka. images),
 2 | //! building on top of [`dwarfs` crate][::dwarfs].
 3 | //!
 4 | //! For reading archives only, check [`dwarfs` crate][::dwarfs] instead.
 5 | //!
 6 | //! [dwarfs]: https://github.com/mhx/dwarfs
 7 | //!
 8 | //! Currently, this crate writes DwarFS archive with filesystem version v2.5,
 9 | //! which should be compatible with upstream dwarfs v0.7.0..=v0.12.4 (latest at
10 | //! the time of writing).
11 | //!
12 | //! ## Examples
13 | //!
14 | //! ```
15 | //! use dwarfs_enc::{
16 | //!     chunker::{Chunker, BasicChunker, CdcChunker},
17 | //!     metadata::{Builder as MetaBuilder, InodeMetadata},
18 | //!     section::{Writer as SectionWriter, CompressParam},
19 | //! };
20 | //! use std::{fs, time::SystemTime};
21 | //!
22 | //! # fn work() -> dwarfs_enc::Result<()> {
23 | //! let f = fs::File::create("out.dwarfs")?;
24 | //!
25 | //! // Create inode metadata.
26 | //! let mut dir_meta = InodeMetadata::new(0o755);
27 | //! dir_meta.uid(1000).gid(1000).atime(SystemTime::now());
28 | //! // ... or initialize from OS metadata.
29 | //! let file_meta = InodeMetadata::from(&fs::metadata("./bar")?);
30 | //!
31 | //! // Create a hierarchy builder initialized with a root inode.
32 | //! let mut meta = MetaBuilder::new(&dir_meta);
33 | //!
34 | //! // Use ZSTD compression level 22, Content Defined Chunking (CDC) for deduplication.
35 | //! let compress = CompressParam::Zstd(22);
36 | //! let writer = SectionWriter::new(f)?;
37 | //! let chunker = BasicChunker::new(writer, meta.block_size(), compress);
38 | //! let mut chunker = CdcChunker::new(chunker);
39 | //!
40 | //! // Put a directories and a symlink.
41 | //! let root = meta.root();
42 | //! let subdir = meta.put_dir(root, "subdir", &dir_meta)?;
43 | //! meta.put_symlink(subdir, "symlink", &file_meta, "./subdir")?;
44 | //!
45 | //! // Put a regular file, using in-memory data.
46 | //! meta.put_file(root, "foo", &file_meta, chunker.put_bytes(b"hello world")?)?;
47 | //! // Put a regular file, reading from an OS File.
48 | //! let chunks = chunker.put_reader(&mut fs::File::open("bar")?)?;
49 | //! let bar = meta.put_file(root, "bar", &file_meta, chunks)?;
50 | //!
51 | //! // Hard links are also supported.
52 | //! meta.put_hard_link(root, "hardlink", bar)?;
53 | //!
54 | //! // Finalizing data chunks, metadata, and section writer in order.
55 | //! let mut writer = chunker.finish()?;
56 | //! writer.write_metadata_sections(&meta.finish()?, compress)?;
57 | //! writer.finish()?;
58 | //!
59 | //! # Ok(()) }
60 | //! ```
61 | //!
62 | //! See also the simple `mkdwarfs` impl at `./examples/mkdwarfs.rs`.
63 | //!
64 | //! ## Cargo features
65 | //!
66 | //! - `zstd`, `lzma` *(Only `zstd` is enabled by default)*
67 | //!
68 | //!   Enable relevant compression algorithm support. `zstd` is the default
69 | //!   compression algorithm `mkdwarfs` uses and it should be enough for most cases.
70 | #![cfg_attr(docsrs, feature(doc_auto_cfg))]
71 | #![forbid(unsafe_code)]
72 | #![warn(missing_debug_implementations)]
73 | #![warn(missing_docs)]
74 | mod error;
75 | 
76 | pub mod chunker;
77 | pub mod metadata;
78 | mod ordered_parallel;
79 | pub mod section;
80 | 
81 | use self::error::ErrorInner;
82 | pub use self::error::{Error, Result};
83 | 


--------------------------------------------------------------------------------
/dwarfs-enc/src/metadata.rs:
--------------------------------------------------------------------------------
  1 | //! DwarFS archive hierarchy builder.
  2 | //!
  3 | //! This module provides [`Builder`] to build [`dwarfs::metadata::Metadata`] of
  4 | //! a DwarFS archive, which is the spine structure for directory hierarchy and
  5 | //! file chunks information.
  6 | //!
  7 | //! ## Limitations
  8 | //!
  9 | //! Due to implementation limitations, the `Metadata` structure cannot exceeds
 10 | //! 2³² bytes. This also implies lengths of all substructures, eg. number of
 11 | //! files, directories, chunks and etc, must also not exceed 2³².
 12 | //!
 13 | //! Note that this limitation only applies to `Metadata` itself, not file
 14 | //! (chunk) data. The total length of chunks is not limited, as long as it
 15 | //! is addressable. Eg. It's possible to have 2²⁰ files each consists of 2²⁰
 16 | //! chunks of 2²⁰ bytes without any issue.
 17 | use std::{
 18 |     borrow::Cow,
 19 |     hash::{Hash, Hasher},
 20 |     num::NonZero,
 21 |     time::{Duration, SystemTime},
 22 | };
 23 | 
 24 | use dwarfs::metadata;
 25 | use indexmap::IndexSet;
 26 | 
 27 | use crate::{Error, ErrorInner, Result};
 28 | 
 29 | // These values are stored on disk, thus should be platform-agnostic.
 30 | // But `rustix` does not expose them on non-UNIX platforms yet.
 31 | // TODO: Maybe define them in `dwarfs`?
 32 | // From: <https://man.archlinux.org/man/inode.7.en#The_file_type_and_mode>
 33 | const S_IFSOCK: u32 = 0o0140000;
 34 | const S_IFLNK: u32 = 0o0120000;
 35 | const S_IFREG: u32 = 0o0100000;
 36 | const S_IFBLK: u32 = 0o0060000;
 37 | const S_IFDIR: u32 = 0o0040000;
 38 | const S_IFCHR: u32 = 0o0020000;
 39 | const S_IFIFO: u32 = 0o0010000;
 40 | 
 41 | /// Metadata construction configurations.
 42 | #[derive(Debug, Clone)]
 43 | pub struct Config {
 44 |     block_size: NonZero<u32>,
 45 |     mtime_only: bool,
 46 |     time_resolution_sec: NonZero<u32>,
 47 |     source_date_epoch: u64,
 48 |     creator: Option<Cow<'static, str>>,
 49 |     created_timestamp: Option<u64>,
 50 | }
 51 | 
 52 | impl Default for Config {
 53 |     fn default() -> Self {
 54 |         Self {
 55 |             block_size: NonZero::new(16 << 20).expect("not zero"),
 56 |             mtime_only: false,
 57 |             time_resolution_sec: NonZero::new(1).expect("not zero"),
 58 |             source_date_epoch: u64::MAX,
 59 |             creator: Some(Cow::Borrowed(Self::DEFAULT_CREATOR_VERSION)),
 60 |             created_timestamp: None,
 61 |         }
 62 |     }
 63 | }
 64 | 
 65 | impl Config {
 66 |     const DEFAULT_CREATOR_VERSION: &str =
 67 |         concat!(env!("CARGO_PKG_NAME"), " ", env!("CARGO_PKG_VERSION"));
 68 | 
 69 |     /// Set the block size of this archive.
 70 |     ///
 71 |     /// Default value is 16MiB.
 72 |     ///
 73 |     /// Each [`BLOCK` section][dwarfs::section::SectionType::BLOCK] must have
 74 |     /// this size (before compression) except for the last one.
 75 |     ///
 76 |     /// # Panics
 77 |     ///
 78 |     /// Panics if `bytes` is not a power of two.
 79 |     pub fn block_size(&mut self, bytes: NonZero<u32>) -> &mut Self {
 80 |         assert!(bytes.is_power_of_two());
 81 |         self.block_size = bytes;
 82 |         self
 83 |     }
 84 | 
 85 |     /// Only store file modification time (mtime) and ignore access (atime) or
 86 |     /// change (ctime) times.
 87 |     ///
 88 |     /// Default value is `false`.
 89 |     ///
 90 |     /// This will cause all access and change times to be ignored, and will set
 91 |     /// a flag in metadata informing their unavailability.
 92 |     pub fn mtime_only(&mut self, yes: bool) -> &mut Self {
 93 |         self.mtime_only = yes;
 94 |         self
 95 |     }
 96 | 
 97 |     /// Set the minimum resolution of all file times.
 98 |     ///
 99 |     /// Default value is 1 second, which is also the minimal possible value.
100 |     ///
101 |     /// A non-one resolution will cause all file times to be truncated to the
102 |     /// max multiples of the resolution not-greater than the original value.
103 |     pub fn time_resolution_sec(&mut self, sec: NonZero<u32>) -> &mut Self {
104 |         self.time_resolution_sec = sec;
105 |         self
106 |     }
107 | 
108 |     /// Set the [`SOURCE_DATE_EPOCH`](https://reproducible-builds.org/specs/source-date-epoch/)
109 |     /// which clamps all timestamps after it to it.
110 |     pub fn source_date_epoch(&mut self, timestamp: u64) -> &mut Self {
111 |         self.source_date_epoch = timestamp;
112 |         self.clamp_timestamp();
113 |         self
114 |     }
115 | 
116 |     /// Set a custom string indicating the name and version of the creator program.
117 |     ///
118 |     /// Default value is
119 |     #[doc = concat!("`\"", env!("CARGO_PKG_NAME"), " ", env!("CARGO_PKG_VERSION"), "\"`.")]
120 |     pub fn creator(&mut self, info: impl Into<Option<Cow<'static, str>>>) -> &mut Self {
121 |         self.creator = info.into();
122 |         self
123 |     }
124 | 
125 |     /// Set a timestamp indicating the archive creation time.
126 |     ///
127 |     /// The value will be clamped by [`Config::source_date_epoch`] if both are set.
128 |     ///
129 |     /// Default value is `None`.
130 |     pub fn created_timestamp(&mut self, ts: impl Into<Option<u64>>) -> &mut Self {
131 |         self.created_timestamp = ts.into();
132 |         self.clamp_timestamp();
133 |         self
134 |     }
135 | 
136 |     fn clamp_timestamp(&mut self) {
137 |         if let Some(t) = &mut self.created_timestamp {
138 |             *t = self.source_date_epoch.min(*t);
139 |         }
140 |     }
141 | }
142 | 
143 | /// The metadata builder.
144 | ///
145 | /// See [module level documentations][self].
146 | #[derive(Debug)]
147 | pub struct Builder {
148 |     config: Config,
149 | 
150 |     inodes: Vec<InodeData>,
151 |     dir_entries: IndexSet<DirEntry>,
152 |     chunks: Vec<Chunk>,
153 |     file_chunk_start: Vec<u32>,
154 |     /// Symlinks do not store its target (index) in inode data, but is looked up
155 |     /// through an indirect table with its inode.
156 |     symlink_target_idxs: Vec<u32>,
157 |     devices: Vec<u64>,
158 | 
159 |     // TODO: Optimize memory footprint of these small strings.
160 |     name_table: IndexSet<String>,
161 |     symlink_table: IndexSet<String>,
162 | 
163 |     modes: IndexSet<u32>,
164 |     uids: IndexSet<u32>,
165 |     gids: IndexSet<u32>,
166 | }
167 | 
168 | impl Builder {
169 |     /// Create a builder with default configurations.
170 |     pub fn new(root_meta: &InodeMetadata) -> Self {
171 |         Self::new_with_config(&Config::default(), root_meta)
172 |     }
173 | 
174 |     /// Create a builder with custom configurations.
175 |     pub fn new_with_config(config: &Config, root_meta: &InodeMetadata) -> Self {
176 |         let mut this = Self {
177 |             config: config.clone(),
178 |             inodes: Default::default(),
179 |             dir_entries: Default::default(),
180 |             chunks: Default::default(),
181 |             file_chunk_start: Default::default(),
182 |             symlink_target_idxs: Default::default(),
183 |             devices: Default::default(),
184 |             name_table: Default::default(),
185 |             symlink_table: Default::default(),
186 |             modes: Default::default(),
187 |             uids: Default::default(),
188 |             gids: Default::default(),
189 |         };
190 |         this.put_inode(S_IFDIR, InodeKind::Dir, root_meta)
191 |             .expect("no overflow");
192 |         // NB. The self-link of root directory is handled in `finish`.
193 |         // We do not want to check duplicates against te special (0, 0, 0) link.
194 |         this
195 |     }
196 | 
197 |     /// Get the configured block size.
198 |     pub fn block_size(&self) -> NonZero<u32> {
199 |         self.config.block_size
200 |     }
201 | 
202 |     /// Get the implicitly created root directory.
203 |     #[inline]
204 |     pub fn root(&self) -> DirId {
205 |         DirId(0)
206 |     }
207 | 
208 |     fn put_inode(&mut self, file_type: u32, kind: InodeKind, meta: &InodeMetadata) -> Result<u32> {
209 |         let ino = u32::try_from(self.inodes.len())
210 |             .ok()
211 |             .ok_or(ErrorInner::Limit("inode count exceeds 2^32"))?;
212 | 
213 |         let cvt_time = |time: SystemTime| {
214 |             let timestamp = time
215 |                 .duration_since(SystemTime::UNIX_EPOCH)
216 |                 .map_err(|_| ErrorInner::Limit("timestamp before UNIX epoch is unsupported"))?
217 |                 .as_secs();
218 |             let multiples = timestamp.min(self.config.source_date_epoch)
219 |                 / u64::from(self.config.time_resolution_sec.get());
220 |             u32::try_from(multiples)
221 |                 .map_err(|_| Error::from(ErrorInner::Limit("relative timestamp exceeds 2^32")))
222 |         };
223 |         let mtime_offset = cvt_time(meta.mtime)?;
224 |         let (atime_offset, ctime_offset) = if self.config.mtime_only {
225 |             (0, 0)
226 |         } else {
227 |             (cvt_time(meta.atime)?, cvt_time(meta.ctime)?)
228 |         };
229 | 
230 |         let mode = file_type | meta.mode_without_type;
231 |         let mode_idx = self.modes.insert_full(mode).0 as u32;
232 |         let uid_idx = self.uids.insert_full(meta.uid).0 as u32;
233 |         let gid_idx = self.gids.insert_full(meta.gid).0 as u32;
234 | 
235 |         self.inodes.push(InodeData {
236 |             kind,
237 |             orig_ino: ino,
238 |             mode_idx,
239 |             uid_idx,
240 |             gid_idx,
241 |             mtime_offset,
242 |             atime_offset,
243 |             ctime_offset,
244 |         });
245 | 
246 |         Ok(ino)
247 |     }
248 | 
249 |     fn put_entry_inner(&mut self, parent: DirId, name: &str, child: u32) -> Result<()> {
250 |         u32::try_from(self.dir_entries.len())
251 |             .ok()
252 |             .ok_or(ErrorInner::Limit("directory entry count exceeds 2^32"))?;
253 |         let name_idx = self.name_table.insert_full(name.into()).0 as u32;
254 |         let (_, inserted) = self.dir_entries.insert_full(DirEntry {
255 |             parent: parent.0,
256 |             name_idx,
257 |             child,
258 |         });
259 |         if !inserted {
260 |             return Err(ErrorInner::DuplicatedEntry.into());
261 |         }
262 |         Ok(())
263 |     }
264 | 
265 |     /// Add an empty directory under a directory.
266 |     ///
267 |     /// # Errors
268 |     ///
269 |     /// Return `Err` if either:
270 |     ///
271 |     /// - Inode count overflows.
272 |     /// - Directory entry count overflows.
273 |     /// - There is already an entry with the same name in the directory.
274 |     #[inline]
275 |     pub fn put_dir(&mut self, parent: DirId, name: &str, meta: &InodeMetadata) -> Result<DirId> {
276 |         let ino = self.put_inode(S_IFDIR, InodeKind::Dir, meta)?;
277 |         self.put_entry_inner(parent, name, ino)?;
278 |         Ok(DirId(ino))
279 |     }
280 | 
281 |     /// Add a hard link to an existing inode under a directory.
282 |     ///
283 |     /// # Errors
284 |     ///
285 |     /// See [`Builder::put_dir`].
286 |     pub fn put_hard_link(
287 |         &mut self,
288 |         parent: DirId,
289 |         name: &str,
290 |         inode: impl Into<LinkableInodeId>,
291 |     ) -> Result<()> {
292 |         self.put_entry_inner(parent, name, inode.into().0)
293 |     }
294 | 
295 |     /// Add a regular file under a directory.
296 |     ///
297 |     /// # Panics
298 |     ///
299 |     /// Panics if any chunk has a offset exceeding [`Config::block_size`].
300 |     ///
301 |     /// # Errors
302 |     ///
303 |     /// See [`Builder::put_dir`].
304 |     pub fn put_file(
305 |         &mut self,
306 |         parent: DirId,
307 |         name: &str,
308 |         meta: &InodeMetadata,
309 |         chunks: impl IntoIterator<Item = Chunk>,
310 |     ) -> Result<FileId> {
311 |         let chunk_start = self.chunks.len() as u32;
312 |         self.chunks.extend(chunks);
313 |         u32::try_from(self.chunks.len())
314 |             .ok()
315 |             .ok_or(ErrorInner::Limit("file chunk count exceeds 2^32"))?;
316 |         if let Some(c) = self.chunks[chunk_start as usize..].iter().find(|c| {
317 |             c.offset
318 |                 .checked_add(c.size)
319 |                 .is_none_or(|end| end > self.config.block_size.get())
320 |         }) {
321 |             panic!(
322 |                 "invalid chunk for block size {}B: {:?}",
323 |                 self.config.block_size, c,
324 |             );
325 |         }
326 |         let ino = self.put_inode(S_IFREG, InodeKind::UniqueFile, meta)?;
327 |         self.file_chunk_start.push(chunk_start);
328 |         self.put_entry_inner(parent, name, ino)?;
329 |         Ok(FileId(ino))
330 |     }
331 | 
332 |     /// Add a symbolic link (symlink) under a directory.
333 |     ///
334 |     /// # Errors
335 |     ///
336 |     /// See [`Builder::put_dir`].
337 |     #[inline]
338 |     pub fn put_symlink(
339 |         &mut self,
340 |         parent: DirId,
341 |         name: &str,
342 |         meta: &InodeMetadata,
343 |         target: &str,
344 |     ) -> Result<LinkableInodeId> {
345 |         let ino = self.put_inode(S_IFLNK, InodeKind::Symlink, meta)?;
346 |         let tgt_idx = self.symlink_table.insert_full(target.into()).0 as u32;
347 |         self.symlink_target_idxs.push(tgt_idx);
348 |         self.put_entry_inner(parent, name, ino)?;
349 |         Ok(LinkableInodeId(ino))
350 |     }
351 | 
352 |     /// Add a block device inode under a directory.
353 |     ///
354 |     /// # Errors
355 |     ///
356 |     /// See [`Builder::put_dir`].
357 |     #[inline]
358 |     pub fn put_block_device(
359 |         &mut self,
360 |         parent: DirId,
361 |         name: &str,
362 |         meta: &InodeMetadata,
363 |         device_id: u64,
364 |     ) -> Result<LinkableInodeId> {
365 |         let ino = self.put_inode(S_IFBLK, InodeKind::Device, meta)?;
366 |         self.devices.push(device_id);
367 |         self.put_entry_inner(parent, name, ino)?;
368 |         Ok(LinkableInodeId(ino))
369 |     }
370 | 
371 |     /// Add a character device inode under a directory.
372 |     ///
373 |     /// # Errors
374 |     ///
375 |     /// See [`Builder::put_dir`].
376 |     #[inline]
377 |     pub fn put_char_device(
378 |         &mut self,
379 |         parent: DirId,
380 |         name: &str,
381 |         meta: &InodeMetadata,
382 |         device_id: u64,
383 |     ) -> Result<LinkableInodeId> {
384 |         let ino = self.put_inode(S_IFCHR, InodeKind::Device, meta)?;
385 |         self.devices.push(device_id);
386 |         self.put_entry_inner(parent, name, ino)?;
387 |         Ok(LinkableInodeId(ino))
388 |     }
389 | 
390 |     /// Add a FIFO (named pipe) inode under a directory.
391 |     ///
392 |     /// # Errors
393 |     ///
394 |     /// See [`Builder::put_dir`].
395 |     #[inline]
396 |     pub fn put_fifo(
397 |         &mut self,
398 |         parent: DirId,
399 |         name: &str,
400 |         meta: &InodeMetadata,
401 |     ) -> Result<LinkableInodeId> {
402 |         let ino = self.put_inode(S_IFIFO, InodeKind::Ipc, meta)?;
403 |         self.put_entry_inner(parent, name, ino)?;
404 |         Ok(LinkableInodeId(ino))
405 |     }
406 | 
407 |     /// Add a socket inode under a directory.
408 |     ///
409 |     /// # Errors
410 |     ///
411 |     /// See [`Builder::put_dir`].
412 |     #[inline]
413 |     pub fn put_socket(
414 |         &mut self,
415 |         parent: DirId,
416 |         name: &str,
417 |         meta: &InodeMetadata,
418 |     ) -> Result<LinkableInodeId> {
419 |         let ino = self.put_inode(S_IFSOCK, InodeKind::Ipc, meta)?;
420 |         self.put_entry_inner(parent, name, ino)?;
421 |         Ok(LinkableInodeId(ino))
422 |     }
423 | 
424 |     // TODO: FSST compressor.
425 |     fn build_string_table(
426 |         tbl: IndexSet<String>,
427 |         err_msg: &'static str,
428 |     ) -> Result<Option<metadata::StringTable>> {
429 |         if tbl.is_empty() {
430 |             return Ok(None);
431 |         }
432 | 
433 |         let mut out = metadata::StringTable::default();
434 |         // Delta indices.
435 |         out.packed_index = true;
436 | 
437 |         let total_len = tbl.iter().map(|s| s.len()).sum::<usize>();
438 |         u32::try_from(total_len)
439 |             .ok()
440 |             .ok_or(ErrorInner::Limit(err_msg))?;
441 |         out.buffer.reserve(total_len);
442 | 
443 |         // NB. For `packed_index`, the first zero should be omitted.
444 |         out.index.reserve(tbl.len());
445 |         for s in tbl {
446 |             out.buffer.extend_from_slice(s.as_bytes());
447 |             out.index.push(s.len() as u32);
448 |         }
449 | 
450 |         Ok(Some(out))
451 |     }
452 | 
453 |     /// Finalize and construct the result [`Metadata`][dwarfs::metadata::Metadata].
454 |     ///
455 |     /// # Errors
456 |     ///
457 |     /// Returns `Err` if the hierarchy is invalid, or exceeds certain limitations,
458 |     /// including and not limited to:
459 |     /// - Duplicated entry names in a directory.
460 |     /// - Any (intermediate) low-level structures exceeds 2³² bytes.
461 |     ///   See [module level documentations][self].
462 |     pub fn finish(mut self) -> Result<metadata::Metadata> {
463 |         let mut out = metadata::Metadata::default();
464 |         let opts = out.options.insert(metadata::FsOptions::default());
465 | 
466 |         //// Configurables ////
467 | 
468 |         opts.mtime_only = self.config.mtime_only;
469 |         opts.time_resolution_sec = (self.config.time_resolution_sec.get() != 1)
470 |             .then_some(self.config.time_resolution_sec.get());
471 |         // TODO: Pack more fields if possible.
472 | 
473 |         //// Inodes ////
474 | 
475 |         // Stable sort to keep relative order unchanged. It is important to keep
476 |         // the topological order of directories.
477 |         self.inodes.sort_by_key(|inode| inode.kind as u8);
478 |         let orig_ino_to_final = {
479 |             let mut map = vec![0u32; self.inodes.len()];
480 |             for (final_ino, inode) in self.inodes.iter().enumerate() {
481 |                 map[inode.orig_ino as usize] = final_ino as u32;
482 |             }
483 |             map
484 |         };
485 | 
486 |         out.inodes = self
487 |             .inodes
488 |             .iter()
489 |             .map(|inode| {
490 |                 let mut data = metadata::InodeData::default();
491 |                 data.mode_index = inode.mode_idx;
492 |                 data.owner_index = inode.uid_idx;
493 |                 data.group_index = inode.gid_idx;
494 |                 data.atime_offset = inode.atime_offset;
495 |                 data.mtime_offset = inode.mtime_offset;
496 |                 data.ctime_offset = inode.ctime_offset;
497 |                 data
498 |             })
499 |             .collect();
500 | 
501 |         //// Directory and entries ////
502 | 
503 |         let dir_cnt = self
504 |             .inodes
505 |             .iter()
506 |             .take_while(|data| data.kind == InodeKind::Dir)
507 |             .count();
508 |         assert_ne!(dir_cnt, 0, "root exists");
509 | 
510 |         // Directory relative order is kept unchanged because of stable sort above.
511 |         // So this will sort `dir_entries` to the final order.
512 |         // Note that `dir_entries[0]` is the self-link for the root directory.
513 |         let mut dir_entries = std::iter::once(DirEntry {
514 |             parent: 0,
515 |             child: 0,
516 |             // This index is unused.
517 |             name_idx: 0,
518 |         })
519 |         .chain(self.dir_entries)
520 |         .collect::<Vec<_>>();
521 |         dir_entries[1..]
522 |             .sort_by_key(|ent| (ent.parent, &self.name_table[ent.name_idx as usize][..]));
523 |         // Checked on inserting entries.
524 |         debug_assert!(
525 |             dir_entries[1..]
526 |                 .windows(2)
527 |                 .all(|w| (w[0].parent, w[0].name_idx) != (w[1].parent, w[1].name_idx))
528 |         );
529 | 
530 |         // Initialize directories links.
531 |         {
532 |             // One more sentinel element.
533 |             out.directories = vec![Default::default(); dir_cnt + 1];
534 | 
535 |             // Skip the 0-th root directory, which should be kept zero-initialized.
536 |             let mut offset = 1u32;
537 |             for (final_ino, inode) in self.inodes[..dir_cnt].iter().enumerate() {
538 |                 let dir = &mut out.directories[final_ino];
539 |                 dir.first_entry = offset;
540 |                 // For child directories of root, this is the default 0, as expected.
541 |                 // For other directories, this should already be initialized by
542 |                 // the entry traversal of its parent entries, because of
543 |                 // the topological order enforced by APIs.
544 |                 let parent_entry = dir.self_entry;
545 | 
546 |                 // Update parent links of child directories.
547 |                 while let Some(ent) = dir_entries
548 |                     .get(offset as usize)
549 |                     .filter(|ent| ent.parent == inode.orig_ino)
550 |                 {
551 |                     let child_final_ino = orig_ino_to_final[ent.child as usize] as usize;
552 |                     if let Some(subdir) = out.directories.get_mut(child_final_ino) {
553 |                         subdir.self_entry = offset;
554 |                         subdir.parent_entry = parent_entry;
555 |                     }
556 |                     offset += 1;
557 |                 }
558 |             }
559 |             debug_assert_eq!(offset as usize, dir_entries.len());
560 | 
561 |             // Sentinel.
562 |             out.directories.last_mut().unwrap().first_entry = dir_entries.len() as u32;
563 |         }
564 | 
565 |         out.dir_entries = Some(
566 |             dir_entries
567 |                 .into_iter()
568 |                 .map(|ent| {
569 |                     let mut out = metadata::DirEntry::default();
570 |                     out.name_index = ent.name_idx;
571 |                     out.inode_num = orig_ino_to_final[ent.child as usize];
572 |                     out
573 |                 })
574 |                 .collect(),
575 |         );
576 | 
577 |         //// String tables ////
578 | 
579 |         out.compact_names =
580 |             Self::build_string_table(self.name_table, "total file name length exceeds 2^32")?;
581 |         out.compact_symlinks =
582 |             Self::build_string_table(self.symlink_table, "total symlink length exceeds 2^32")?;
583 | 
584 |         //// Trivial fields ////
585 | 
586 |         out.block_size = self.config.block_size.get();
587 |         out.total_fs_size = 0; // Not really necessary but only for human.
588 |         out.dwarfs_version = self.config.creator.map(|s| String::from(s).into());
589 |         out.create_timestamp = self.config.created_timestamp;
590 | 
591 |         out.symlink_table = self.symlink_target_idxs;
592 |         out.modes = self.modes.into_iter().collect();
593 |         out.uids = self.uids.into_iter().collect();
594 |         out.gids = self.gids.into_iter().collect();
595 |         out.devices = (!self.devices.is_empty()).then_some(self.devices);
596 | 
597 |         out.chunk_table = self.file_chunk_start;
598 |         // Sentinel.
599 |         out.chunk_table.push(self.chunks.len() as u32);
600 | 
601 |         out.chunks = self
602 |             .chunks
603 |             .into_iter()
604 |             .map(|chunk| {
605 |                 let mut data = metadata::Chunk::default();
606 |                 data.block = chunk.section_idx;
607 |                 data.offset = chunk.offset;
608 |                 data.size = chunk.size;
609 |                 data
610 |             })
611 |             .collect();
612 | 
613 |         Ok(out)
614 |     }
615 | }
616 | 
617 | #[derive(Debug)]
618 | struct DirEntry {
619 |     parent: u32,
620 |     name_idx: u32,
621 |     child: u32,
622 | }
623 | 
624 | // Hash and Eq impls are only on `(parent, name_idx)` pair, because we want to
625 | // check entry names in a directory do not duplicate.
626 | impl Hash for DirEntry {
627 |     fn hash<H: Hasher>(&self, h: &mut H) {
628 |         h.write_u64(u64::from(self.parent) | u64::from(self.name_idx) << 32);
629 |     }
630 | }
631 | impl PartialEq for DirEntry {
632 |     fn eq(&self, other: &Self) -> bool {
633 |         (self.parent, self.name_idx) == (other.parent, other.name_idx)
634 |     }
635 | }
636 | impl Eq for DirEntry {}
637 | 
638 | /// The location of a chunk of data for a regular file.
639 | ///
640 | /// Usually, you should use [`crate::chunker::Chunker`]s to slice file data into
641 | /// [`Chunk`]s and copy data at the same time, rather than manually constructing
642 | /// them.
643 | ///
644 | /// For details about data chunking and the meaning of fields, check
645 | /// [upstream documentations](https://github.com/mhx/dwarfs/blob/v0.12.4/doc/dwarfs-format.md).
646 | #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
647 | pub struct Chunk {
648 |     /// The section index.
649 |     pub section_idx: u32,
650 |     /// The byte offset inside the section.
651 |     pub offset: u32,
652 |     /// The size of the chunk.
653 |     pub size: u32,
654 | }
655 | 
656 | #[derive(Debug, Clone, Copy, PartialEq, Eq)]
657 | enum InodeKind {
658 |     // NB. The order matters for sorting.
659 |     // It should match the DwarFS inode type order.
660 |     Dir,
661 |     Symlink,
662 |     UniqueFile,
663 |     // TODO: SharedFile
664 |     Device,
665 |     Ipc,
666 | }
667 | 
668 | #[derive(Debug)]
669 | struct InodeData {
670 |     kind: InodeKind,
671 |     // To maintain mapping after sorting inodes by their kinds.
672 |     orig_ino: u32,
673 | 
674 |     mode_idx: u32,
675 |     uid_idx: u32,
676 |     gid_idx: u32,
677 |     mtime_offset: u32,
678 |     atime_offset: u32,
679 |     ctime_offset: u32,
680 | }
681 | 
682 | /// The metadata of an inode.
683 | #[derive(Debug, Clone)]
684 | pub struct InodeMetadata {
685 |     mode_without_type: u32,
686 |     uid: u32,
687 |     gid: u32,
688 |     mtime: SystemTime,
689 |     atime: SystemTime,
690 |     ctime: SystemTime,
691 | }
692 | 
693 | impl From<&std::fs::Metadata> for InodeMetadata {
694 |     fn from(meta: &std::fs::Metadata) -> Self {
695 |         #[cfg(unix)]
696 |         use std::os::unix::fs::MetadataExt;
697 | 
698 |         #[cfg(unix)]
699 |         let mode = meta.mode() & 0o777;
700 |         #[cfg(not(unix))]
701 |         let mode = if meta.is_dir() { 0o755 } else { 0o644 };
702 | 
703 |         let mut ret = InodeMetadata::new(mode);
704 |         if let Ok(mtime) = meta.modified() {
705 |             ret.mtime(mtime);
706 |         }
707 |         if let Ok(atime) = meta.accessed() {
708 |             ret.atime(atime);
709 |         }
710 | 
711 |         #[cfg(unix)]
712 |         {
713 |             let ctime = meta.ctime();
714 |             let ctime = if ctime >= 0 {
715 |                 SystemTime::UNIX_EPOCH + Duration::from_secs(meta.ctime() as u64)
716 |             } else {
717 |                 SystemTime::UNIX_EPOCH - Duration::from_secs(-meta.ctime() as u64)
718 |             };
719 |             ret.ctime(ctime).uid(meta.uid()).gid(meta.gid());
720 |         }
721 | 
722 |         ret
723 |     }
724 | }
725 | 
726 | impl InodeMetadata {
727 |     /// Create a default metadata with given [file mode][mode].
728 |     ///
729 |     /// [mode]: https://man.archlinux.org/man/inode.7.en#The_file_type_and_mode
730 |     pub const fn new(mode_without_type: u32) -> Self {
731 |         assert!(
732 |             mode_without_type & !0o777 == 0,
733 |             "`mode_without_type` should only have 0o7777 bits set",
734 |         );
735 |         Self {
736 |             mode_without_type,
737 |             uid: 0,
738 |             gid: 0,
739 |             mtime: SystemTime::UNIX_EPOCH,
740 |             atime: SystemTime::UNIX_EPOCH,
741 |             ctime: SystemTime::UNIX_EPOCH,
742 |         }
743 |     }
744 | 
745 |     /// Set the owner numeric id.
746 |     ///
747 |     /// If unset, it defaults to `0` (root).
748 |     pub fn uid(&mut self, uid: u32) -> &mut Self {
749 |         self.uid = uid;
750 |         self
751 |     }
752 | 
753 |     /// Set the owner group numeric id.
754 |     ///
755 |     /// If unset, it defaults to `0` (root).
756 |     pub fn gid(&mut self, gid: u32) -> &mut Self {
757 |         self.gid = gid;
758 |         self
759 |     }
760 | 
761 |     /// Set the modification time (mtime).
762 |     ///
763 |     /// If unset, it defaults to [`SystemTime::UNIX_EPOCH`].
764 |     pub fn mtime(&mut self, timestamp: SystemTime) -> &mut Self {
765 |         self.mtime = timestamp;
766 |         self
767 |     }
768 | 
769 |     /// Set the access time (atime).
770 |     ///
771 |     /// If unset, it defaults to [`SystemTime::UNIX_EPOCH`].
772 |     /// If [`Config::mtime_only`] is set, this value is ignored.
773 |     pub fn atime(&mut self, timestamp: SystemTime) -> &mut Self {
774 |         self.atime = timestamp;
775 |         self
776 |     }
777 | 
778 |     /// Set the change time (ctime).
779 |     ///
780 |     /// If unset, it defaults to [`SystemTime::UNIX_EPOCH`].
781 |     /// If [`Config::mtime_only`] is set, this value is ignored.
782 |     pub fn ctime(&mut self, timestamp: SystemTime) -> &mut Self {
783 |         self.ctime = timestamp;
784 |         self
785 |     }
786 | }
787 | 
788 | /// A handle to a directory inode.
789 | #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
790 | pub struct DirId(u32);
791 | 
792 | /// A handle to an inode that is allowed to be hard-linked.
793 | ///
794 | /// All inodes except directories are linkable.
795 | #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
796 | pub struct LinkableInodeId(u32);
797 | 
798 | /// A handle to a regular file inode.
799 | ///
800 | /// This type implements `Into<LinkableInodeId>`.
801 | #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
802 | pub struct FileId(u32);
803 | 
804 | impl From<FileId> for LinkableInodeId {
805 |     fn from(i: FileId) -> Self {
806 |         Self(i.0)
807 |     }
808 | }
809 | 


--------------------------------------------------------------------------------
/dwarfs-enc/src/ordered_parallel.rs:
--------------------------------------------------------------------------------
  1 | //! Run tasks in parallel while keeping the original order.
  2 | 
  3 | use std::{fmt, num::NonZero, panic, thread};
  4 | 
  5 | use crossbeam_channel as mpmc;
  6 | 
  7 | pub struct OrderedParallel<R> {
  8 |     injector: Option<mpmc::Sender<Task<R>>>,
  9 |     collector: mpmc::Receiver<TaskResult<R>>,
 10 |     next_to_send: usize,
 11 |     next_to_recv: usize,
 12 |     ring_buf: Box<[Option<R>]>,
 13 | 
 14 |     threads: Box<[thread::JoinHandle<()>]>,
 15 | }
 16 | 
 17 | impl<R> fmt::Debug for OrderedParallel<R> {
 18 |     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
 19 |         f.debug_struct("OrderedParallel")
 20 |             .field("next_to_send", &self.next_to_send)
 21 |             .field("next_to_recv", &self.next_to_recv)
 22 |             .field("ring_buf_size", &self.ring_buf.len())
 23 |             .field("threads_cnt", &self.threads.len())
 24 |             .finish_non_exhaustive()
 25 |     }
 26 | }
 27 | 
 28 | type Task<R> = (usize, Box<dyn FnOnce() -> R + Send>);
 29 | type TaskResult<R> = (usize, thread::Result<R>);
 30 | 
 31 | impl<R> Drop for OrderedParallel<R> {
 32 |     fn drop(&mut self) {
 33 |         self.injector = None;
 34 |         let worker_panicked = std::mem::take(&mut self.threads)
 35 |             .into_iter()
 36 |             .fold(false, |panicked, j| panicked | j.join().is_err());
 37 |         if worker_panicked && !thread::panicking() {
 38 |             panic!("worker panicked");
 39 |         }
 40 |     }
 41 | }
 42 | 
 43 | impl<R: Send + 'static> OrderedParallel<R> {
 44 |     pub fn new(thread_name: &str, thread_cnt: NonZero<usize>) -> std::io::Result<Self> {
 45 |         // Random picked: 1.5x.
 46 |         let max_inflights = thread_cnt.saturating_add(thread_cnt.get().div_ceil(2));
 47 | 
 48 |         let (injector, injector_rx) = mpmc::bounded(max_inflights.get());
 49 |         let (collector_tx, collector) = mpmc::bounded(max_inflights.get());
 50 | 
 51 |         let threads = (0..thread_cnt.get())
 52 |             .map(|idx| {
 53 |                 let injector_rx = injector_rx.clone();
 54 |                 let collector_tx = collector_tx.clone();
 55 |                 std::thread::Builder::new()
 56 |                     .name(format!("{thread_name}-{idx}"))
 57 |                     .spawn(|| Self::worker(injector_rx, collector_tx))
 58 |             })
 59 |             .collect::<std::io::Result<Box<[_]>>>()?;
 60 | 
 61 |         let ring_buf = std::iter::repeat_with(|| None)
 62 |             .take(max_inflights.get())
 63 |             .collect();
 64 | 
 65 |         Ok(Self {
 66 |             next_to_send: 0,
 67 |             next_to_recv: 0,
 68 | 
 69 |             ring_buf,
 70 | 
 71 |             injector: Some(injector),
 72 |             threads,
 73 |             collector,
 74 |         })
 75 |     }
 76 | 
 77 |     fn worker(injector: mpmc::Receiver<Task<R>>, collector: mpmc::Sender<TaskResult<R>>) {
 78 |         while let Ok((index, task)) = injector.recv() {
 79 |             let ret = panic::catch_unwind(panic::AssertUnwindSafe(task));
 80 |             if collector.send((index, ret)).is_err() {
 81 |                 break;
 82 |             }
 83 |         }
 84 |     }
 85 | 
 86 |     /// Spawn a new task and retrieve some completed tasks.
 87 |     ///
 88 |     /// You should always drain the returning iterator, or the behavior is unspecified.
 89 |     #[must_use = "iterator must be drained"]
 90 |     pub fn submit_and_get<F>(&mut self, task: F) -> impl Iterator<Item = R>
 91 |     where
 92 |         F: FnOnce() -> R + Send + 'static,
 93 |     {
 94 |         let index = self.next_to_send;
 95 |         self.next_to_send += 1;
 96 |         if self.next_to_send == self.ring_buf.len() {
 97 |             self.next_to_send = 0;
 98 |         }
 99 |         self.send_and_recv_inner((index, Box::new(task)))
100 |     }
101 | 
102 |     fn send_and_recv_inner(&mut self, task: Task<R>) -> impl Iterator<Item = R> {
103 |         let injector = self.injector.as_ref().expect("channel closed");
104 |         // Blocking wait for the bottleneck-ed task if the next send would overflow.
105 |         // Note that we ensures `ring_buf.len() >= 2` so the first send always does no wait.
106 |         if self.next_to_send == self.next_to_recv {
107 |             while self.ring_buf[self.next_to_recv].is_none() {
108 |                 Self::process_ret(
109 |                     self.collector.recv().expect("channel closed"),
110 |                     &mut self.ring_buf,
111 |                 );
112 |             }
113 |         }
114 | 
115 |         injector.try_send(task).expect("channel is not full");
116 |         while let Ok(ret) = self.collector.try_recv() {
117 |             Self::process_ret(ret, &mut self.ring_buf);
118 |         }
119 | 
120 |         Self::received_iter(&mut self.next_to_recv, &mut self.ring_buf)
121 |     }
122 | 
123 |     fn process_ret((idx, ret): TaskResult<R>, ring_buf: &mut [Option<R>]) {
124 |         let v = match ret {
125 |             Ok(v) => v,
126 |             Err(_err) => panic!("task panicked"),
127 |         };
128 |         assert!(ring_buf[idx].is_none(), "completion buffer overflowed");
129 |         ring_buf[idx] = Some(v);
130 |     }
131 | 
132 |     fn received_iter(
133 |         next_to_recv: &mut usize,
134 |         ring_buf: &mut [Option<R>],
135 |     ) -> impl Iterator<Item = R> {
136 |         std::iter::from_fn(|| {
137 |             let elem = ring_buf[*next_to_recv].take()?;
138 |             *next_to_recv += 1;
139 |             if *next_to_recv == ring_buf.len() {
140 |                 *next_to_recv = 0;
141 |             }
142 |             Some(elem)
143 |         })
144 |     }
145 | 
146 |     /// Blocking receive some completed results.
147 |     ///
148 |     /// Return `None` if the channel is closed and all results are drained.
149 |     pub fn wait_and_get(&mut self) -> Option<impl Iterator<Item = R>> {
150 |         while self.ring_buf[self.next_to_recv].is_none() {
151 |             let ret = self.collector.recv().ok()?;
152 |             Self::process_ret(ret, &mut self.ring_buf);
153 |         }
154 |         Some(Self::received_iter(
155 |             &mut self.next_to_recv,
156 |             &mut self.ring_buf,
157 |         ))
158 |     }
159 | 
160 |     /// Signal the end of tasks. Stop all workers.
161 |     pub fn stop(&mut self) {
162 |         self.injector = None;
163 |     }
164 | }
165 | 


--------------------------------------------------------------------------------
/dwarfs-enc/src/section.rs:
--------------------------------------------------------------------------------
  1 | //! DwarFS section writer.
  2 | use std::io::Write;
  3 | use std::num::NonZero;
  4 | 
  5 | use dwarfs::section::{CompressAlgo, Header, MagicVersion, SectionIndexEntry, SectionType};
  6 | use dwarfs::zerocopy::IntoBytes;
  7 | use zerocopy::FromBytes;
  8 | 
  9 | use crate::ordered_parallel::OrderedParallel;
 10 | use crate::{ErrorInner, Result};
 11 | 
 12 | /// The section compression parameter.
 13 | #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
 14 | #[non_exhaustive]
 15 | pub enum CompressParam {
 16 |     /// No compression.
 17 |     None,
 18 |     /// Compress with a given ZSTD level. Requires feature `zstd`.
 19 |     #[cfg(feature = "zstd")]
 20 |     Zstd(zstd_safe::CompressionLevel),
 21 |     /// Compress with a given LZMA (aka. xz) level. Requires feature `lzma`.
 22 |     #[cfg(feature = "lzma")]
 23 |     Lzma(u32),
 24 | }
 25 | 
 26 | /// DwarFS section writer.
 27 | #[derive(Debug)]
 28 | pub struct Writer<W: ?Sized> {
 29 |     workers: OrderedParallel<Result<Vec<u8>>>,
 30 |     /// The total number of sections initiated, including ones that are not written yet.
 31 |     initiated_section_count: u32,
 32 |     index: IndexBuilder,
 33 | 
 34 |     w: W,
 35 | }
 36 | 
 37 | #[derive(Debug, Default)]
 38 | struct IndexBuilder {
 39 |     index: Vec<SectionIndexEntry>,
 40 |     next_offset: u64,
 41 | }
 42 | 
 43 | impl IndexBuilder {
 44 |     fn push(&mut self, typ: SectionType, sec_raw_len: usize) -> Result<()> {
 45 |         let ent = SectionIndexEntry::new(typ, self.next_offset).expect("checked by last write");
 46 |         self.next_offset = u64::try_from(sec_raw_len)
 47 |             .ok()
 48 |             .and_then(|l| l.checked_add(self.next_offset))
 49 |             .filter(|&n| n < 1u64 << 48)
 50 |             .ok_or(ErrorInner::Limit("archive size exceeds 2^48 bytes"))?;
 51 |         self.index.push(ent);
 52 |         Ok(())
 53 |     }
 54 | }
 55 | 
 56 | impl<W> Writer<W> {
 57 |     /// Create a default multi-threaded section writer.
 58 |     pub fn new(w: W) -> std::io::Result<Self> {
 59 |         let thread_cnt = std::thread::available_parallelism()?;
 60 |         Self::new_with_threads(w, thread_cnt)
 61 |     }
 62 | 
 63 |     /// Create a section writer with specific parallelism.
 64 |     pub fn new_with_threads(w: W, thread_cnt: NonZero<usize>) -> std::io::Result<Self> {
 65 |         let workers = OrderedParallel::new("compressor", thread_cnt)?;
 66 |         Ok(Self {
 67 |             workers,
 68 |             initiated_section_count: 0,
 69 |             index: IndexBuilder::default(),
 70 |             w,
 71 |         })
 72 |     }
 73 | }
 74 | 
 75 | impl<W: ?Sized> Writer<W> {
 76 |     /// Get a reference to the underlying writer.
 77 |     pub fn get_ref(&self) -> &W {
 78 |         &self.w
 79 |     }
 80 | 
 81 |     /// Get a mutable reference tothe underlying writer.
 82 |     pub fn get_mut(&mut self) -> &mut W {
 83 |         &mut self.w
 84 |     }
 85 | 
 86 |     /// Retrieve the ownership of the underlying reader.
 87 |     pub fn into_inner(self) -> W
 88 |     where
 89 |         W: Sized,
 90 |     {
 91 |         self.w
 92 |     }
 93 | }
 94 | 
 95 | impl<W: Write> Writer<W> {
 96 |     /// Number of sections of initiated via `write_section`.
 97 |     #[must_use]
 98 |     pub fn section_count(&self) -> u32 {
 99 |         // Checked by `write_section` not to overflow u32.
100 |         self.initiated_section_count
101 |     }
102 | 
103 |     /// Finalize and seal the DwarFS archive.
104 |     pub fn finish(mut self) -> Result<W> {
105 |         // Wait for all proceeding sections to complete, so their offsets are recorded.
106 |         self.workers.stop();
107 |         while let Some(iter) = self.workers.wait_and_get() {
108 |             Self::commit_completed(iter, &mut self.w, &mut self.index)?;
109 |         }
110 | 
111 |         // The last length is unused.
112 |         let index_byte_len = self.index.index.as_bytes().len() + size_of::<SectionIndexEntry>();
113 |         self.index
114 |             .push(SectionType::SECTION_INDEX, index_byte_len)?;
115 |         let sec = Self::seal_section(
116 |             self.section_count(),
117 |             SectionType::SECTION_INDEX,
118 |             CompressParam::None,
119 |             self.index.index.as_bytes(),
120 |         )?;
121 |         self.w.write_all(&sec)?;
122 | 
123 |         Ok(self.w)
124 |     }
125 | 
126 |     fn commit_completed(
127 |         completed: impl Iterator<Item = Result<Vec<u8>>>,
128 |         w: &mut W,
129 |         index: &mut IndexBuilder,
130 |     ) -> Result<()> {
131 |         for ret in completed {
132 |             let sec = ret?;
133 |             let off = std::mem::offset_of!(Header, section_type);
134 |             let typ = SectionType::read_from_prefix(&sec[off..]).unwrap().0;
135 |             w.write_all(&sec)?;
136 |             index.push(typ, sec.len())?;
137 |         }
138 |         Ok(())
139 |     }
140 | 
141 |     /// Write a section with given (uncompressed) payload.
142 |     pub fn write_section(
143 |         &mut self,
144 |         section_type: SectionType,
145 |         compression: CompressParam,
146 |         payload: &[u8],
147 |     ) -> Result<()> {
148 |         // Should not happen for current machines.
149 |         assert!(u64::try_from(size_of::<Header>() + payload.len()).is_ok());
150 | 
151 |         let section_number = self.section_count();
152 |         self.initiated_section_count = self
153 |             .initiated_section_count
154 |             .checked_add(1)
155 |             .ok_or(ErrorInner::Limit("section count exceeds 2^32"))?;
156 | 
157 |         let payload = payload.to_vec();
158 |         Self::commit_completed(
159 |             self.workers.submit_and_get(move || {
160 |                 Self::seal_section(section_number, section_type, compression, &payload)
161 |             }),
162 |             &mut self.w,
163 |             &mut self.index,
164 |         )
165 |     }
166 | 
167 |     /// Compress payload if possible, calculate hashes and fill the section header.
168 |     fn seal_section(
169 |         section_number: u32,
170 |         section_type: SectionType,
171 |         compression: CompressParam,
172 |         payload: &[u8],
173 |     ) -> Result<Vec<u8>> {
174 |         let mut buf = vec![0u8; size_of::<Header>() + payload.len()];
175 |         #[cfg_attr(not(feature = "default"), allow(unused_labels))]
176 |         let (compress_algo, compressed_len) = 'compressed: {
177 |             let compressed_buf = &mut buf[size_of::<Header>()..];
178 |             match compression {
179 |                 CompressParam::None => {}
180 | 
181 |                 #[cfg(feature = "zstd")]
182 |                 #[expect(non_upper_case_globals, reason = "name from C")]
183 |                 CompressParam::Zstd(lvl) => {
184 |                     // See: <https://github.com/gyscos/zstd-rs/issues/276>
185 |                     const ZSTD_error_dstSize_tooSmall: zstd_safe::ErrorCode = -70isize as usize;
186 | 
187 |                     match zstd_safe::compress(compressed_buf, payload, lvl) {
188 |                         Ok(compressed_len) => {
189 |                             assert!(compressed_len <= payload.len());
190 |                             break 'compressed (CompressAlgo::ZSTD, compressed_len);
191 |                         }
192 |                         Err(ZSTD_error_dstSize_tooSmall) => {}
193 |                         Err(code) => {
194 |                             let err = std::io::Error::new(
195 |                                 std::io::ErrorKind::InvalidInput,
196 |                                 format!(
197 |                                     "ZSTD compression failed (code={}): {}",
198 |                                     code,
199 |                                     zstd_safe::get_error_name(code),
200 |                                 ),
201 |                             );
202 |                             return Err(ErrorInner::Compress(err).into());
203 |                         }
204 |                     }
205 |                 }
206 | 
207 |                 #[cfg(feature = "lzma")]
208 |                 CompressParam::Lzma(lvl) => {
209 |                     if let Some(compressed_len) = (|| {
210 |                         use liblzma::stream::{Action, Check, Status, Stream};
211 | 
212 |                         // The default parameters used by `liblzma::bufread::XzEncoder::new`.
213 |                         // See: <https://docs.rs/liblzma/0.4.1/src/liblzma/bufread.rs.html#35>
214 |                         let mut encoder = Stream::new_easy_encoder(lvl, Check::Crc64)?;
215 | 
216 |                         match encoder.process(payload, compressed_buf, Action::Run)? {
217 |                             // Treat partial consumption as buffer-too-small.
218 |                             Status::Ok if encoder.total_in() == payload.len() as u64 => {}
219 |                             Status::Ok | Status::MemNeeded => return Ok(None),
220 |                             Status::StreamEnd | Status::GetCheck => unreachable!(),
221 |                         }
222 |                         match encoder.process(
223 |                             &[],
224 |                             &mut compressed_buf[encoder.total_out() as usize..],
225 |                             Action::Finish,
226 |                         )? {
227 |                             Status::StreamEnd => {}
228 |                             Status::MemNeeded => return Ok(None),
229 |                             Status::Ok | Status::GetCheck => unreachable!(),
230 |                         }
231 | 
232 |                         Ok::<_, std::io::Error>(Some(encoder.total_out() as usize))
233 |                     })()
234 |                     .map_err(ErrorInner::Compress)?
235 |                     {
236 |                         break 'compressed (CompressAlgo::LZMA, compressed_len);
237 |                     }
238 |                 }
239 |             }
240 |             compressed_buf.copy_from_slice(payload);
241 |             (CompressAlgo::NONE, payload.len())
242 |         };
243 |         buf.truncate(size_of::<Header>() + compressed_len);
244 |         let (header_buf, compressed_buf) = buf.split_at_mut(size_of::<Header>());
245 | 
246 |         let mut header = Header {
247 |             magic_version: MagicVersion::LATEST,
248 |             slow_hash: [0u8; 32],
249 |             fast_hash: [0u8; 8],
250 |             section_number: section_number.into(),
251 |             section_type,
252 |             compress_algo,
253 |             payload_size: 0.into(),
254 |         };
255 |         header.update_size_and_checksum(compressed_buf);
256 |         header_buf.copy_from_slice(header.as_bytes());
257 | 
258 |         Ok(buf)
259 |     }
260 | 
261 |     /// Write metadata sections `METADATA_V2_{,_SCHEMA}`.
262 |     pub fn write_metadata_sections(
263 |         &mut self,
264 |         metadata: &dwarfs::metadata::Metadata,
265 |         compression: CompressParam,
266 |     ) -> Result<()> {
267 |         let (schema, metadata_bytes) = metadata.to_schema_and_bytes()?;
268 |         let schema_bytes = schema.to_bytes()?;
269 |         self.write_section(SectionType::METADATA_V2_SCHEMA, compression, &schema_bytes)?;
270 |         self.write_section(SectionType::METADATA_V2, compression, &metadata_bytes)
271 |     }
272 | }
273 | 


--------------------------------------------------------------------------------
/dwarfs-enc/tests/basic.rs:
--------------------------------------------------------------------------------
 1 | use dwarfs::{Archive, AsChunks, InodeKind, archive::IsInode};
 2 | use dwarfs_enc::{
 3 |     chunker::{BasicChunker, Chunker},
 4 |     metadata::{Builder, InodeMetadata},
 5 |     section::{CompressParam, Writer},
 6 | };
 7 | 
 8 | const META: InodeMetadata = InodeMetadata::new(0o777);
 9 | 
10 | fn build_with(f: impl FnOnce(&mut Builder, &mut dyn Chunker) -> dwarfs_enc::Result<()>) -> Vec<u8> {
11 |     let buf = Vec::new();
12 |     let writer = Writer::new(buf).unwrap();
13 |     let mut builder = Builder::new(&META);
14 |     let mut chunker = BasicChunker::new(writer, builder.block_size(), CompressParam::Zstd(3));
15 |     f(&mut builder, &mut chunker).unwrap();
16 |     let mut writer = chunker.finish().unwrap();
17 |     writer
18 |         .write_metadata_sections(&builder.finish().unwrap(), CompressParam::Zstd(3))
19 |         .unwrap();
20 |     writer.finish().unwrap()
21 | }
22 | 
23 | #[test]
24 | fn empty() {
25 |     let b = build_with(|_meta, _chunker| Ok(()));
26 |     let (index, _archive) = Archive::new(b).unwrap();
27 |     assert_eq!(index.root().entries().len(), 0);
28 |     assert_eq!(index.inodes().len(), 1);
29 |     assert_eq!(index.directories().len(), 1);
30 | }
31 | 
32 | #[test]
33 | fn smoke() {
34 |     let b = build_with(|meta, _chunker| {
35 |         let root = meta.root();
36 |         meta.put_dir(root, "0dir", &META)?;
37 |         let f = meta.put_file(root, "1file", &META, [])?;
38 |         meta.put_symlink(root, "2symlink", &META, "target")?;
39 |         meta.put_block_device(root, "3blkdev", &META, 0xDEAD_BEEF_DEAD_BEEF)?;
40 |         meta.put_char_device(root, "4chardev", &META, 0xBEEF_DEAD_BEEF_DEAD)?;
41 |         meta.put_fifo(root, "5fifo", &META)?;
42 |         meta.put_socket(root, "6socket", &META)?;
43 |         meta.put_hard_link(root, "7hardlink", f)?;
44 |         Ok(())
45 |     });
46 | 
47 |     let (index, _archive) = Archive::new(b).unwrap();
48 |     let (children, names) = index
49 |         .root()
50 |         .entries()
51 |         .map(|ent| (ent.inode().classify(), ent.name()))
52 |         .unzip::<_, _, Vec<_>, Vec<_>>();
53 | 
54 |     assert_eq!(
55 |         names,
56 |         vec![
57 |             "0dir",
58 |             "1file",
59 |             "2symlink",
60 |             "3blkdev",
61 |             "4chardev",
62 |             "5fifo",
63 |             "6socket",
64 |             "7hardlink",
65 |         ]
66 |     );
67 | 
68 |     assert!(matches!(children[0], InodeKind::Directory(_)));
69 |     assert!(matches!(children[1], InodeKind::File(i) if i.as_chunks().len() == 0));
70 |     assert!(matches!(children[2], InodeKind::Symlink(i) if i.target() == "target"));
71 |     assert!(matches!(children[3], InodeKind::Device(i) if i.device_id() == 0xDEAD_BEEF_DEAD_BEEF));
72 |     assert!(matches!(children[4], InodeKind::Device(i) if i.device_id() == 0xBEEF_DEAD_BEEF_DEAD));
73 |     assert!(matches!(children[5], InodeKind::Ipc(_)));
74 |     assert!(matches!(children[6], InodeKind::Ipc(_)));
75 |     assert_eq!(children[7].inode_num(), children[1].inode_num());
76 | }
77 | 


--------------------------------------------------------------------------------
/dwarfs-test/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "dwarfs-test"
 3 | version = "0.0.0"
 4 | edition = "2024"
 5 | publish = false
 6 | license = "MIT OR Apache-2.0"
 7 | 
 8 | [dependencies]
 9 | dwarfs = { path = "../dwarfs", features = ["serialize", "lzma", "lz4"] }
10 | env_logger = "0.11.8"
11 | tempfile = "3.20.0"
12 | xshell = "0.2.7"
13 | 
14 | [target.'cfg(unix)'.dependencies]
15 | # use-libc is necessary to support fakeroot.
16 | rustix = { version = "1.0.7", features = ["fs", "process", "use-libc"] }
17 | 
18 | [dev-dependencies]
19 | hex = "0.4.3"
20 | sha2 = "0.10.9"
21 | 


--------------------------------------------------------------------------------
/dwarfs-test/LICENSE-APACHE:
--------------------------------------------------------------------------------
1 | ../LICENSE-APACHE


--------------------------------------------------------------------------------
/dwarfs-test/LICENSE-MIT:
--------------------------------------------------------------------------------
1 | ../LICENSE-MIT


--------------------------------------------------------------------------------
/dwarfs-test/src/lib.rs:
--------------------------------------------------------------------------------
1 | pub mod mtree;
2 | pub mod traverse;
3 | 


--------------------------------------------------------------------------------
/dwarfs-test/src/main.rs:
--------------------------------------------------------------------------------
 1 | use xshell::{Shell, cmd};
 2 | 
 3 | fn main() {
 4 |     env_logger::init();
 5 |     let args = std::env::args().collect::<Vec<String>>();
 6 |     let output = match &*args {
 7 |         [_, cmd, output] if cmd == "gen-privileged-archive" => output,
 8 |         _ => panic!("invalid argument"),
 9 |     };
10 | 
11 |     #[cfg(not(unix))]
12 |     {
13 |         let _ = output;
14 |         panic!("only UNIX platform is supported");
15 |     }
16 | 
17 |     // Used by `../tests/tests/rs`.
18 |     #[cfg(unix)]
19 |     {
20 |         use rustix::fs as ufs;
21 | 
22 |         assert!(
23 |             rustix::process::geteuid().is_root(),
24 |             "gen-privileged-archive must be executed under root or 'fakeroot'",
25 |         );
26 | 
27 |         let sh = Shell::new().unwrap();
28 |         let temp_dir = tempfile::tempdir().expect("failed to create tempdir");
29 |         let src_path = temp_dir.path().join("root");
30 |         std::fs::create_dir(&src_path).unwrap();
31 |         ufs::mknodat(
32 |             ufs::ABS,
33 |             src_path.join("bdev"),
34 |             ufs::FileType::BlockDevice,
35 |             ufs::Mode::from_bits_truncate(0o777),
36 |             0x0123_4567_89AB_CDEF,
37 |         )
38 |         .unwrap();
39 |         ufs::mknodat(
40 |             ufs::ABS,
41 |             src_path.join("cdev"),
42 |             ufs::FileType::CharacterDevice,
43 |             ufs::Mode::from_bits_truncate(0o777),
44 |             0xFEDC_BA98_7654_3210,
45 |         )
46 |         .unwrap();
47 | 
48 |         cmd!(
49 |             sh,
50 |             "mkdwarfs -i {src_path} -o {output} --no-progress --log-level=error --with-devices"
51 |         )
52 |         .run()
53 |         .expect("failed to run 'mkdwarfs'");
54 |     }
55 | }
56 | 


--------------------------------------------------------------------------------
/dwarfs-test/src/mtree.rs:
--------------------------------------------------------------------------------
 1 | use std::io::{Result, Write};
 2 | 
 3 | use dwarfs::{ArchiveIndex, AsChunks, Dir, InodeKind};
 4 | 
 5 | pub fn dump(w: &mut dyn Write, index: &ArchiveIndex) -> Result<()> {
 6 |     writeln!(w, "#mtree")?;
 7 |     dump_dir(w, index.root(), &mut String::from("."))
 8 | }
 9 | 
10 | /// mtree escapes '/' and non-printable chars as `\ooo`.
11 | /// See: <https://man.archlinux.org/man/mtree.5.en>
12 | fn escape_into(buf: &mut String, s: &str) {
13 |     for &b in s.as_bytes() {
14 |         // ASCII printables.
15 |         if (33..=126).contains(&b) && !b"\\/#".contains(&b) {
16 |             buf.push(b as char);
17 |         } else {
18 |             buf.push('\\');
19 |             let digit = |x: u8| (b'0' + x) as char;
20 |             buf.push(digit(b / 64));
21 |             buf.push(digit(b / 8 % 8));
22 |             buf.push(digit(b % 8));
23 |         }
24 |     }
25 | }
26 | 
27 | fn dump_dir(w: &mut dyn Write, dir: Dir<'_>, path: &mut String) -> Result<()> {
28 |     for only_dir in [false, true] {
29 |         for ent in dir.entries() {
30 |             let name = ent.name();
31 |             let ino = ent.inode();
32 |             let prev_len = path.len();
33 |             path.push('/');
34 |             escape_into(path, name);
35 | 
36 |             let meta = ino.metadata();
37 |             let mtime = meta.mtime();
38 |             let mode = meta.file_type_mode().permission_bits();
39 |             let gid = meta.gid();
40 |             let uid = meta.uid();
41 |             if let Some(d) = ino.as_dir() {
42 |                 if only_dir {
43 |                     writeln!(
44 |                         w,
45 |                         "{path} time={mtime}.0 mode={mode:03o} gid={gid} uid={uid} type=dir",
46 |                     )?;
47 |                     dump_dir(w, d, path)?;
48 |                 }
49 |             } else if !only_dir {
50 |                 if let Some(f) = ino.as_file() {
51 |                     let size = f.as_chunks().total_size();
52 |                     writeln!(
53 |                         w,
54 |                         "{path} time={mtime}.0 mode={mode:03o} gid={gid} uid={uid} type=file size={size}",
55 |                     )?;
56 |                 } else {
57 |                     let kind = ino.classify();
58 |                     if let InodeKind::Symlink(sym) = kind {
59 |                         let tgt = sym.target();
60 |                         writeln!(
61 |                             w,
62 |                             "{path} time={mtime}.0 mode={mode:03o} gid={gid} uid={uid} type=link link={tgt}",
63 |                         )?;
64 |                     } else {
65 |                         unimplemented!("{kind:?}");
66 |                     }
67 |                 }
68 |             }
69 | 
70 |             path.truncate(prev_len);
71 |         }
72 |     }
73 |     Ok(())
74 | }
75 | 


--------------------------------------------------------------------------------
/dwarfs-test/src/traverse.rs:
--------------------------------------------------------------------------------
 1 | use dwarfs::{ArchiveIndex, AsChunks, File};
 2 | 
 3 | /// Traverse all files in file offset order, for efficient content access.
 4 | pub fn traverse_files(index: &ArchiveIndex) -> Vec<(String, File<'_>)> {
 5 |     let mut files = Vec::with_capacity(index.inodes().len() - index.directories().len());
 6 |     let mut queue = Vec::new();
 7 |     queue.push((String::new(), index.root()));
 8 | 
 9 |     while let Some((mut path, dir)) = queue.pop() {
10 |         path.push('/');
11 |         let prev_len = path.len();
12 | 
13 |         for ent in dir.entries() {
14 |             let name = ent.name();
15 |             let ino = ent.inode();
16 |             path.push_str(name);
17 | 
18 |             if let Some(d) = ino.as_dir() {
19 |                 queue.push((path.clone(), d));
20 |             } else if let Some(f) = ino.as_file() {
21 |                 let start_sec_idx = f.as_chunks().next().map_or(0, |c| c.section_idx());
22 |                 files.push((start_sec_idx, path.clone(), f));
23 |             }
24 | 
25 |             path.truncate(prev_len);
26 |         }
27 |     }
28 | 
29 |     files.sort_by_key(|(sec_idx, ..)| *sec_idx);
30 | 
31 |     files.into_iter().map(|(_, path, f)| (path, f)).collect()
32 | }
33 | 


--------------------------------------------------------------------------------
/dwarfs-test/tests/basic.rs:
--------------------------------------------------------------------------------
  1 | //! Basic functionality tests.
  2 | use std::{
  3 |     fs,
  4 |     io::BufRead,
  5 |     path::{Path, PathBuf},
  6 |     time::{Duration, SystemTime},
  7 | };
  8 | 
  9 | use dwarfs::{
 10 |     Archive, AsChunks,
 11 |     archive::{Config, IsInode, SectionIndexStrategy},
 12 | };
 13 | use xshell::{Shell, TempDir, cmd};
 14 | 
 15 | fn debug_print_mtree(sh: &Shell, archive_path: &Path) {
 16 |     let mtree_out = cmd!(
 17 |         sh,
 18 |         "dwarfsextract -i {archive_path} -f mtree --log-level=error"
 19 |     )
 20 |     .read()
 21 |     .unwrap();
 22 |     eprintln!("{mtree_out}");
 23 | }
 24 | 
 25 | #[track_caller]
 26 | fn build_archive(sh: &Shell, out: &str, opts: &str) -> PathBuf {
 27 |     let opts = opts.split_ascii_whitespace();
 28 |     cmd!(
 29 |         sh,
 30 |         "mkdwarfs -i ./root -o {out} --no-progress --log-level=error {opts...}"
 31 |     )
 32 |     .run()
 33 |     .unwrap();
 34 |     debug_print_mtree(sh, out.as_ref());
 35 |     sh.current_dir().join(out)
 36 | }
 37 | 
 38 | fn new_temp_shell() -> (Shell, TempDir) {
 39 |     let sh = Shell::new().unwrap();
 40 |     let temp_dir = sh.create_temp_dir().unwrap();
 41 |     sh.change_dir(temp_dir.path());
 42 |     (sh, temp_dir)
 43 | }
 44 | 
 45 | #[test]
 46 | fn empty() {
 47 |     let (sh, _temp_dir) = new_temp_shell();
 48 |     sh.create_dir("root").unwrap();
 49 |     let archive_path = build_archive(&sh, "img.dwarfs", "--set-time=42");
 50 |     let (index, _archive) = Archive::new(fs::File::open(&archive_path).unwrap()).unwrap();
 51 | 
 52 |     let root = index.root();
 53 |     assert_eq!(root.inode_num(), 0);
 54 |     assert_eq!(index.inodes().len(), 1);
 55 |     assert_eq!(index.inodes().next().unwrap().inode_num(), root.inode_num());
 56 |     assert_eq!(index.directories().len(), 1);
 57 |     assert_eq!(
 58 |         index.directories().next().unwrap().inode_num(),
 59 |         root.inode_num()
 60 |     );
 61 | 
 62 |     assert_eq!(root.entries().len(), 0);
 63 |     assert!(root.get("").is_none());
 64 | 
 65 |     assert_eq!(index.get_inode(0).unwrap().inode_num(), 0);
 66 | 
 67 |     assert_eq!(
 68 |         index
 69 |             .get_path(std::iter::empty::<&str>())
 70 |             .unwrap()
 71 |             .inode_num(),
 72 |         root.inode_num()
 73 |     );
 74 | 
 75 |     let meta = root.metadata();
 76 |     assert_eq!(meta.mtime(), 42);
 77 |     assert_eq!(meta.atime(), None);
 78 |     assert_eq!(meta.ctime(), None);
 79 |     #[cfg(unix)]
 80 |     assert_eq!(
 81 |         meta.file_type_mode().type_bits(),
 82 |         rustix::fs::FileType::Directory.as_raw_mode(),
 83 |     );
 84 | }
 85 | 
 86 | #[test]
 87 | fn basics() {
 88 |     let (sh, _temp_dir) = new_temp_shell();
 89 |     sh.create_dir("root").unwrap();
 90 |     sh.create_dir("root/dir1").unwrap();
 91 |     sh.create_dir("root/dir2").unwrap();
 92 |     sh.write_file("root/dir2/foo.txt", "bar").unwrap();
 93 |     sh.write_file("root/empty", "").unwrap();
 94 |     fs::File::open(sh.current_dir().join("root/dir2/foo.txt"))
 95 |         .unwrap()
 96 |         .set_times(
 97 |             fs::FileTimes::new()
 98 |                 .set_modified(SystemTime::UNIX_EPOCH + Duration::from_secs(42))
 99 |                 .set_accessed(SystemTime::UNIX_EPOCH + Duration::from_secs(666)),
100 |         )
101 |         .unwrap();
102 |     let archive_path = build_archive(&sh, "img.dwarfs", "--keep-all-times");
103 | 
104 |     let (index, mut archive) = Archive::new(fs::File::open(&archive_path).unwrap()).unwrap();
105 |     let root = index.root();
106 |     assert_eq!(
107 |         root.entries().map(|ent| ent.name()).collect::<Vec<_>>(),
108 |         ["dir1", "dir2", "empty"],
109 |     );
110 | 
111 |     let dir1 = root.get("dir1").unwrap().inode().as_dir().unwrap();
112 |     assert_eq!(dir1.entries().len(), 0);
113 | 
114 |     let empty = root.get("empty").unwrap().inode().as_file().unwrap();
115 |     assert_eq!(empty.read_to_vec(&mut archive).unwrap(), []);
116 |     assert_eq!(empty.as_chunks().len(), 0);
117 |     assert_eq!(empty.as_chunks().total_size(), 0);
118 |     assert_eq!(empty.as_reader(&mut archive).total_size(), 0);
119 |     assert_eq!(empty.as_reader(&mut archive).fill_buf().unwrap(), []);
120 | 
121 |     let dir2 = root.get("dir2").unwrap().inode().as_dir().unwrap();
122 |     let foo = dir2.get("foo.txt").unwrap().inode();
123 |     let foo2 = index.get_path(["dir2", "foo.txt"]).unwrap();
124 |     assert_eq!(foo.inode_num(), foo2.inode_num());
125 |     let foo = foo.as_file().unwrap();
126 | 
127 |     assert_eq!(foo.as_chunks().len(), 1);
128 |     assert_eq!(foo.as_chunks().total_size(), 3);
129 |     assert_eq!(
130 |         foo.as_chunks()
131 |             .next()
132 |             .unwrap()
133 |             .read_cached(&mut archive)
134 |             .unwrap(),
135 |         b"bar"
136 |     );
137 |     assert_eq!(foo.read_to_vec(&mut archive).unwrap(), b"bar");
138 | 
139 |     let meta = foo.metadata();
140 |     assert_eq!(meta.mtime(), 42);
141 |     assert_eq!(meta.atime(), Some(666));
142 |     assert!(meta.ctime().is_some());
143 |     #[cfg(unix)]
144 |     assert_eq!(
145 |         meta.file_type_mode().type_bits(),
146 |         rustix::fs::FileType::RegularFile.as_raw_mode(),
147 |     );
148 | }
149 | 
150 | #[cfg(unix)]
151 | #[test]
152 | fn unix_specials() {
153 |     use dwarfs::InodeKind;
154 |     use rustix::fs::{self as ufs, FileType, Mode};
155 | 
156 |     let (sh, _temp_dir) = new_temp_shell();
157 |     let src_path = sh.create_dir("root").unwrap();
158 |     ufs::symlink("/absolute/path", src_path.join("abs")).unwrap();
159 |     ufs::symlink("/absolute/path", src_path.join("dup")).unwrap();
160 |     ufs::symlink("../relative/path", src_path.join("rel")).unwrap();
161 | 
162 |     // Do not mask. We make assertions on permissions below.
163 |     rustix::process::umask(Mode::empty());
164 | 
165 |     ufs::mkdir(
166 |         src_path.join("sticky"),
167 |         Mode::RWXU | Mode::XOTH | Mode::SVTX,
168 |     )
169 |     .unwrap();
170 |     ufs::mknodat(
171 |         ufs::ABS,
172 |         src_path.join("pipe"),
173 |         FileType::Fifo,
174 |         Mode::RWXU | Mode::SUID,
175 |         0,
176 |     )
177 |     .unwrap();
178 |     ufs::mknodat(
179 |         ufs::ABS,
180 |         src_path.join("sock"),
181 |         FileType::Socket,
182 |         Mode::RWXG | Mode::SGID,
183 |         0,
184 |     )
185 |     .unwrap();
186 | 
187 |     let archive_path = build_archive(&sh, "img.dwarfs", "--with-specials");
188 |     let (index, _archive) = Archive::new(fs::File::open(&archive_path).unwrap()).unwrap();
189 |     let root = index.root();
190 | 
191 |     assert!(matches!(root.get("abs").unwrap().inode().classify(),
192 |         InodeKind::Symlink(f) if f.target() == "/absolute/path"));
193 |     assert!(matches!(root.get("dup").unwrap().inode().classify(),
194 |         InodeKind::Symlink(f) if f.target() == "/absolute/path"));
195 |     assert!(matches!(root.get("rel").unwrap().inode().classify(),
196 |         InodeKind::Symlink(f) if f.target() == "../relative/path"));
197 | 
198 |     let sticky = root.get("sticky").unwrap().inode().as_dir().unwrap();
199 |     let sticky_mode = sticky.metadata().file_type_mode();
200 |     assert_eq!(
201 |         FileType::from_raw_mode(sticky_mode.type_bits()),
202 |         FileType::Directory
203 |     );
204 |     assert_eq!(
205 |         Mode::from_bits(sticky_mode.mode_bits()),
206 |         Some(Mode::RWXU | Mode::XOTH | Mode::SVTX)
207 |     );
208 | 
209 |     let pipe = root.get("pipe").unwrap().inode();
210 |     let pipe_mode = pipe.metadata().file_type_mode();
211 |     assert!(matches!(pipe.classify(), InodeKind::Ipc(_)));
212 |     assert_eq!(
213 |         FileType::from_raw_mode(pipe_mode.type_bits()),
214 |         FileType::Fifo,
215 |     );
216 |     assert_eq!(
217 |         Mode::from_bits(pipe_mode.mode_bits()),
218 |         Some(Mode::RWXU | Mode::SUID)
219 |     );
220 | 
221 |     let sock = root.get("sock").unwrap().inode();
222 |     let sock_mode = sock.metadata().file_type_mode();
223 |     assert!(matches!(sock.classify(), InodeKind::Ipc(_)));
224 |     assert_eq!(
225 |         FileType::from_raw_mode(sock_mode.type_bits()),
226 |         FileType::Socket,
227 |     );
228 |     assert_eq!(
229 |         Mode::from_bits(sock_mode.mode_bits()),
230 |         Some(Mode::RWXG | Mode::SGID)
231 |     );
232 | }
233 | 
234 | #[cfg(unix)]
235 | #[test]
236 | fn unix_devices() {
237 |     use dwarfs::InodeKind;
238 |     use rustix::fs::FileType;
239 | 
240 |     let (sh, _temp_dir) = new_temp_shell();
241 |     let exe = env!("CARGO_BIN_EXE_dwarfs-test");
242 |     cmd!(sh, "fakeroot -- {exe} gen-privileged-archive img.dwarfs")
243 |         .run()
244 |         .unwrap();
245 |     let archive_path = sh.current_dir().join("img.dwarfs");
246 |     debug_print_mtree(&sh, &archive_path);
247 | 
248 |     let (index, _archive) = Archive::new(fs::File::open(&archive_path).unwrap()).unwrap();
249 |     let root = index.root();
250 | 
251 |     let bdev = root.get("bdev").unwrap().inode();
252 |     let InodeKind::Device(bdev) = bdev.classify() else {
253 |         panic!("wrong file type")
254 |     };
255 |     assert_eq!(bdev.device_id(), 0x0123_4567_89AB_CDEF);
256 |     assert_eq!(
257 |         FileType::from_raw_mode(bdev.metadata().file_type_mode().type_bits()),
258 |         FileType::BlockDevice,
259 |     );
260 | 
261 |     let cdev = root.get("cdev").unwrap().inode();
262 |     let InodeKind::Device(cdev) = cdev.classify() else {
263 |         panic!("wrong file type")
264 |     };
265 |     assert_eq!(cdev.device_id(), 0xFEDC_BA98_7654_3210);
266 |     assert_eq!(
267 |         FileType::from_raw_mode(cdev.metadata().file_type_mode().type_bits()),
268 |         FileType::CharacterDevice,
269 |     );
270 | }
271 | 
272 | #[test]
273 | fn section_index() {
274 |     let (sh, _temp_dir) = new_temp_shell();
275 |     sh.create_dir("root").unwrap();
276 | 
277 |     let load = |f: &Path, strategy: SectionIndexStrategy| {
278 |         Archive::new_with_config(
279 |             fs::File::open(f).unwrap(),
280 |             Config::default().section_index_strategy(strategy),
281 |         )
282 |     };
283 | 
284 |     let with_index = build_archive(&sh, "with_index.dwarfs", "");
285 |     load(&with_index, SectionIndexStrategy::UseEmbeddedIfExists).unwrap();
286 |     load(&with_index, SectionIndexStrategy::Build).unwrap();
287 |     load(&with_index, SectionIndexStrategy::UseEmbedded).unwrap();
288 | 
289 |     let no_index = build_archive(&sh, "no_index.dwarfs", "--no-section-index");
290 |     load(&no_index, SectionIndexStrategy::UseEmbeddedIfExists).unwrap();
291 |     load(&no_index, SectionIndexStrategy::Build).unwrap();
292 | 
293 |     let err = load(&no_index, SectionIndexStrategy::UseEmbedded).unwrap_err();
294 |     assert_eq!(err.to_string(), "missing section SECTION_INDEX");
295 | }
296 | 
297 | #[test]
298 | fn packed_metadata() {
299 |     let (sh, _temp_dir) = new_temp_shell();
300 |     let src_dir = sh.create_dir("root").unwrap();
301 |     sh.create_dir("root/foo/foo/baz").unwrap();
302 |     sh.write_file("root/foo/baz", "hello world").unwrap();
303 |     sh.write_file("root/baz", "").unwrap();
304 | 
305 |     #[cfg(unix)]
306 |     {
307 |         use std::os::unix::fs as ufs;
308 |         ufs::symlink("foo", src_dir.join("sym1")).unwrap();
309 |         ufs::symlink("foo", src_dir.join("sym2")).unwrap();
310 |         ufs::symlink("bar", src_dir.join("sym3")).unwrap();
311 |     }
312 | 
313 |     let pack_none = build_archive(&sh, "none.dwarfs", "--pack-metadata=none");
314 |     Archive::new(fs::File::open(pack_none).unwrap()).unwrap();
315 | 
316 |     let pack_all = build_archive(&sh, "all.dwarfs", "--pack-metadata=all");
317 |     Archive::new(fs::File::open(pack_all).unwrap()).unwrap();
318 | }
319 | 
320 | #[test]
321 | fn symtab() {
322 |     let (sh, _temp_dir) = new_temp_shell();
323 |     let names = (0..32)
324 |         .map(|i| format!("a_very_common_prefix.{i:02}.txt"))
325 |         // Single occurrence byte.
326 |         .chain(["~".to_owned()])
327 |         .collect::<Vec<_>>();
328 | 
329 |     for name in &names {
330 |         sh.write_file(format!("root/{name}"), "").unwrap();
331 |     }
332 | 
333 |     let archive_path = build_archive(&sh, "img.dwarfs", "--pack-metadata=names,force");
334 |     let (index, _) = Archive::new(fs::File::open(archive_path).unwrap()).unwrap();
335 |     let root = index.root();
336 |     assert_eq!(
337 |         root.entries().map(|ent| ent.name()).collect::<Vec<_>>(),
338 |         names,
339 |     );
340 | }
341 | 
342 | #[test]
343 | fn shared_files() {
344 |     let (sh, _temp_dir) = new_temp_shell();
345 |     let content = (0..1024)
346 |         .map(|i| format!("{i:04}"))
347 |         .collect::<String>()
348 |         .into_bytes();
349 |     sh.write_file("root/a.txt", &content).unwrap();
350 |     sh.write_file("root/b.txt", &content).unwrap();
351 | 
352 |     let archive_path = build_archive(&sh, "img.dwarfs", "--pack-metadata=shared_files,force");
353 |     let (index, mut archive) = Archive::new(fs::File::open(archive_path).unwrap()).unwrap();
354 |     let root = index.root();
355 |     let a = root.get("a.txt").unwrap().inode().as_file().unwrap();
356 |     let b = root.get("b.txt").unwrap().inode().as_file().unwrap();
357 | 
358 |     assert_eq!(a.as_chunks().len(), 1);
359 |     assert_eq!(b.as_chunks().len(), 1);
360 |     assert_eq!(
361 |         a.as_chunks().next().unwrap().offset(),
362 |         b.as_chunks().next().unwrap().offset(),
363 |     );
364 | 
365 |     assert_eq!(a.read_to_vec(&mut archive).unwrap(), content);
366 |     assert_eq!(b.read_to_vec(&mut archive).unwrap(), content);
367 | }
368 | 


--------------------------------------------------------------------------------
/dwarfs-test/tests/large.rs:
--------------------------------------------------------------------------------
  1 | //! Large tests on real production archives.
  2 | use std::{
  3 |     io::{BufRead, Seek, SeekFrom, Write},
  4 |     sync::LazyLock,
  5 |     time::Instant,
  6 | };
  7 | 
  8 | use dwarfs::{
  9 |     Archive, AsChunks,
 10 |     metadata::{Metadata, Schema},
 11 |     positioned_io::ReadAt,
 12 |     section::{CompressAlgo, Header, MagicVersion, SectionIndexEntry, SectionReader, SectionType},
 13 | };
 14 | use tempfile::NamedTempFile;
 15 | use xshell::{Shell, cmd};
 16 | 
 17 | static TEST_FILES: LazyLock<Vec<String>> = LazyLock::new(|| {
 18 |     std::env::var("DWARFS_LARGE_TEST_FILES")
 19 |         .expect("DWARFS_LARGE_TEST_FILES is not set")
 20 |         .split_ascii_whitespace()
 21 |         .map(Into::into)
 22 |         .collect()
 23 | });
 24 | 
 25 | fn with_tests(mut f: impl FnMut(&str, std::fs::File)) {
 26 |     for path in &*TEST_FILES {
 27 |         eprintln!("Testing {path}");
 28 |         let file = std::fs::File::open(path).unwrap();
 29 |         f(path, file);
 30 |     }
 31 | }
 32 | 
 33 | fn read_section_by_type(
 34 |     rdr: &mut SectionReader<impl ReadAt>,
 35 |     sec_index: &[SectionIndexEntry],
 36 |     typ: SectionType,
 37 | ) -> Vec<u8> {
 38 |     let offset = sec_index
 39 |         .iter()
 40 |         .find_map(|i| (i.section_type() == typ).then_some(i.offset()))
 41 |         .expect("missing section");
 42 |     let (_, bytes) = rdr
 43 |         .read_section_at(offset, 16 << 20)
 44 |         .expect("failed to read section");
 45 |     bytes
 46 | }
 47 | 
 48 | /// Update the schema and metadata section of an existing DwarFS archive.
 49 | fn patch_schema_and_metadata(
 50 |     mut orig_file: &std::fs::File,
 51 |     index: &[SectionIndexEntry],
 52 |     schema_bytes: &[u8],
 53 |     metadata_bytes: &[u8],
 54 | ) -> NamedTempFile {
 55 |     // For typical archives, all non-BLOCK sections are at the end, after all BLOCK sections.
 56 |     let data_sections = index
 57 |         .iter()
 58 |         .position(|&ent| ent.section_type() != SectionType::BLOCK)
 59 |         .unwrap();
 60 |     assert!(
 61 |         index[data_sections..]
 62 |             .iter()
 63 |             .all(|ent| ent.section_type() != SectionType::BLOCK)
 64 |     );
 65 |     let data_end_pos = index[data_sections].offset();
 66 | 
 67 |     let mut patched_file = NamedTempFile::new().unwrap();
 68 |     let fout = patched_file.as_file_mut();
 69 |     std::io::copy(&mut orig_file, fout).unwrap();
 70 |     fout.set_len(data_end_pos).unwrap();
 71 |     fout.seek(SeekFrom::End(0)).unwrap();
 72 | 
 73 |     for (i, typ, payload) in [
 74 |         (0, SectionType::METADATA_V2_SCHEMA, schema_bytes),
 75 |         (1, SectionType::METADATA_V2, metadata_bytes),
 76 |     ] {
 77 |         write_section(fout, data_sections as u32 + i, typ, payload).unwrap();
 78 |     }
 79 | 
 80 |     patched_file
 81 | }
 82 | 
 83 | fn write_section(
 84 |     w: &mut dyn Write,
 85 |     section_num: u32,
 86 |     typ: SectionType,
 87 |     payload: &[u8],
 88 | ) -> std::io::Result<()> {
 89 |     use dwarfs::zerocopy::IntoBytes;
 90 | 
 91 |     let mut header = Header {
 92 |         magic_version: MagicVersion::LATEST,
 93 |         slow_hash: [0; 32],
 94 |         fast_hash: [0; 8],
 95 |         section_number: section_num.into(),
 96 |         section_type: typ,
 97 |         compress_algo: CompressAlgo::NONE,
 98 |         payload_size: 0.into(),
 99 |     };
100 |     header.update_size_and_checksum(payload);
101 |     w.write_all(header.as_bytes())?;
102 |     w.write_all(payload)
103 | }
104 | 
105 | fn test_reserialize(schema_only: bool) {
106 |     let sh = Shell::new().unwrap();
107 | 
108 |     with_tests(|orig_path, file| {
109 |         let dump1 = cmd!(sh, "dwarfsck -i {orig_path} -d metadata_full_dump")
110 |             .read()
111 |             .unwrap();
112 | 
113 |         let file_size = file.metadata().expect("failed to get file size").len();
114 |         let mut rdr = SectionReader::new(file);
115 |         let (_, sec_index) = rdr
116 |             .read_section_index(file_size, 16 << 20)
117 |             .expect("failed to read section index")
118 |             .expect("missing section index");
119 |         let mut schema_bytes =
120 |             read_section_by_type(&mut rdr, &sec_index, SectionType::METADATA_V2_SCHEMA);
121 |         let mut metadata_bytes =
122 |             read_section_by_type(&mut rdr, &sec_index, SectionType::METADATA_V2);
123 |         let schema = Schema::parse(&schema_bytes).expect("failed to parse schema");
124 | 
125 |         if schema_only {
126 |             let schema_ser = schema.to_bytes().unwrap();
127 |             let schema2 = Schema::parse(&schema_ser).unwrap();
128 |             assert_eq!(schema, schema2);
129 |             schema_bytes = schema_ser;
130 |         } else {
131 |             let metadata = Metadata::parse(&schema, &metadata_bytes).unwrap();
132 |             let (schema2, metadata_ser) = metadata.to_schema_and_bytes().unwrap();
133 |             let metadata2 = Metadata::parse(&schema2, &metadata_ser).unwrap();
134 |             assert_eq!(metadata, metadata2);
135 |             let schema_ser = schema2.to_bytes().unwrap();
136 |             (schema_bytes, metadata_bytes) = (schema_ser, metadata_ser);
137 |         }
138 | 
139 |         let patched_file =
140 |             patch_schema_and_metadata(rdr.get_ref(), &sec_index, &schema_bytes, &metadata_bytes);
141 |         let patched_path = patched_file.path();
142 |         let dump2 = cmd!(sh, "dwarfsck -i {patched_path} -d metadata_full_dump")
143 |             .read()
144 |             .unwrap();
145 |         if dump1 != dump2 {
146 |             std::fs::write("./result-metadata-dump-before.txt", &dump1).unwrap();
147 |             std::fs::write("./result-metadata-dump-after.txt", &dump2).unwrap();
148 |             panic!("metadata dump differs, results saved to result-metadata-dump-*.txt");
149 |         }
150 |     });
151 | }
152 | 
153 | #[test]
154 | #[ignore = "large test"]
155 | fn schema_roundtrip() {
156 |     test_reserialize(true);
157 | }
158 | 
159 | #[test]
160 | #[ignore = "large test"]
161 | fn metadata_roundtrip() {
162 |     test_reserialize(false);
163 | }
164 | 
165 | #[test]
166 | #[ignore = "large test"]
167 | fn dump_mtree() {
168 |     let sh = Shell::new().unwrap();
169 |     with_tests(|path, file| {
170 |         let expect = cmd!(sh, "dwarfsextract -i {path} -f mtree --log-level=error")
171 |             .read()
172 |             .unwrap();
173 |         let expect = expect.trim_ascii_end();
174 | 
175 |         let mut got = Vec::new();
176 |         let (index, _archive) = Archive::new(file).unwrap();
177 |         dwarfs_test::mtree::dump(&mut got, &index).unwrap();
178 |         let actual = str::from_utf8(&got).unwrap().trim_ascii_end();
179 | 
180 |         if actual != expect {
181 |             std::fs::write("result-actual.mtree", actual).unwrap();
182 |             std::fs::write("result-expect.mtree", expect).unwrap();
183 |             panic!("mtree mismatch");
184 |         }
185 |     });
186 | }
187 | 
188 | #[test]
189 | #[ignore = "large test"]
190 | fn dump_content() {
191 |     use sha2::{Digest, Sha512_256};
192 |     assert!(
193 |         !cfg!(debug_assertions),
194 |         "requires '--release' or it will be too slow",
195 |     );
196 | 
197 |     let sh = Shell::new().unwrap();
198 |     with_tests(|archive_path, archive_file| {
199 |         let inst = Instant::now();
200 |         let output = cmd!(
201 |             sh,
202 |             "dwarfsck --checksum=sha512-256 -i {archive_path} --log-level=error"
203 |         )
204 |         .read()
205 |         .unwrap();
206 |         eprintln!("dwarfsck completes in {:?}", inst.elapsed());
207 | 
208 |         let mut expect = output
209 |             .lines()
210 |             .map(|line| line.split_once("  ").unwrap())
211 |             .collect::<Vec<_>>();
212 |         expect.sort_unstable_by_key(|(_, name)| *name);
213 |         let expect = expect
214 |             .iter()
215 |             .flat_map(|(hash, path)| [hash, "  ", path, "\n"])
216 |             .collect::<String>();
217 | 
218 |         let inst = Instant::now();
219 |         let (index, mut archive) = Archive::new(archive_file).unwrap();
220 |         let mut actual = Vec::with_capacity(index.inodes().len());
221 |         let mut h = Sha512_256::new();
222 |         let files = dwarfs_test::traverse::traverse_files(&index);
223 |         eprintln!("traversal completes in {:?}", inst.elapsed());
224 |         for (path, file) in files {
225 |             let mut rdr = file.as_reader(&mut archive);
226 |             loop {
227 |                 let buf = rdr.fill_buf().unwrap();
228 |                 if buf.is_empty() {
229 |                     break;
230 |                 }
231 |                 h.update(buf);
232 |                 let len = buf.len();
233 |                 rdr.consume(len);
234 |             }
235 |             let digest = hex::encode(h.finalize_reset().as_slice());
236 |             actual.push((digest, path));
237 |         }
238 |         actual.sort_unstable_by(|(_, lhs), (_, rhs)| Ord::cmp(lhs, rhs));
239 |         let actual = actual
240 |             .iter()
241 |             // Exclude leading `/`.
242 |             .flat_map(|(hash, path)| [hash, "  ", &path[1..], "\n"])
243 |             .collect::<String>();
244 |         eprintln!("traversal+checksum completes in {:?}", inst.elapsed());
245 | 
246 |         if actual != expect {
247 |             std::fs::write("result-actual.cksum", actual).unwrap();
248 |             std::fs::write("result-expect.cksum", expect).unwrap();
249 |             panic!("results mismatch")
250 |         }
251 |     });
252 | }
253 | 


--------------------------------------------------------------------------------
/dwarfs/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | # Change Log
 2 | 
 3 | All notable changes to this project will be documented in this file.
 4 | 
 5 | The format is based on [Keep a Changelog](https://keepachangelog.com/)
 6 | and this project adheres to [Semantic Versioning](https://semver.org/).
 7 | 
 8 | ## v0.2.1
 9 | 
10 | ### Changed
11 | 
12 | ### Added
13 | 
14 | - FSST symtab serialization `fsst::to_bytes`.
15 | 
16 | - DwarFS metadata serialization `Metadata::to_schema_and_bytes`.
17 |   
18 |   This implements basic serialization support of Frozen. It uses fixed-width
19 |   integers and does not yet support bit-packing.
20 | 
21 | - Missed `Metadata::reg_file_size_cache` field.
22 | 
23 | - `section::Header::update_size_and_checksum`
24 | 
25 | - `section::MagicVersion::LATEST`
26 | 
27 | ### Others
28 | 
29 | - Switch from `xz2` to `liblzma` crate for LZMA decompression.
30 | 
31 | - Remove unused high-level wrapper crate `zstd` and use `zstd-safe` directly.
32 | 
33 | - Add more tests.
34 | 
35 | ## v0.2.0
36 | 
37 | ### Changed
38 | 
39 | - `metadata::Schema::to_bytes` is now gated under a disabled-by-default
40 |   feature `serialize`.
41 | 
42 | - `fsst` module is refactored. Failable methods of `fsst::Decoder` now returns
43 |   `Result<_, fsst::Error>` instead of `Option<_>`.
44 | 
45 |   `Decoder::parse_symtab` is now renamed to `parse` for consistency.
46 | 
47 | ### Added
48 | 
49 | - Re-export of dependency `zerocopy`.
50 | - `section::Header::calculate_{fast,slow}_checksum`.
51 | 
52 | ### Fixed
53 | 
54 | - A bug causing any valid section index to be rejected.
55 | 
56 | - False errors when loading empty archives.
57 | 
58 | - Incorrect behavior of `Dir::get`.
59 | 
60 | - An off-by-one bug when unpacking string tables.
61 | 
62 | ### Others
63 | 
64 | - Added more tests.
65 | 
66 | ## v0.1.0
67 | 
68 | Initial release.
69 | 


--------------------------------------------------------------------------------
/dwarfs/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "dwarfs"
 3 | version = "0.2.1"
 4 | edition = "2024"
 5 | description = "A library for reading DwarFS archives (aka. DwarFS images)"
 6 | license = "MIT OR Apache-2.0"
 7 | keywords = ["dwarfs", "archive", "compression"]
 8 | categories = ["compression", "filesystem"]
 9 | repository = "https://github.com/oxalica/dwarfs-rs"
10 | 
11 | [features]
12 | default = ["zstd", "log"]
13 | 
14 | # Compression algorithms support.
15 | zstd = ["dep:zstd-safe"]
16 | lzma = ["dep:liblzma"]
17 | lz4 = ["dep:lz4"]
18 | 
19 | # Extra functionalities.
20 | log = ["dep:log", "dep:measure_time"]
21 | serialize = ["dep:indexmap"]
22 | 
23 | [dependencies]
24 | bstr = { version = "1.12.0", features = ["serde"] }
25 | indexmap = { version = "2.9.0", optional = true }
26 | log = { version = "0.4.27", optional = true }
27 | lru = "0.14.0"
28 | lz4 = { version = "1.28.1", optional = true }
29 | measure_time = { version = "0.9.0", optional = true }
30 | positioned-io = { version = "0.3.4", default-features = false }
31 | serde = { version = "1.0.219", features = ["derive"] }
32 | sha2 = "0.10.9"
33 | xxhash-rust = { version = "0.8.15", features = ["xxh3"] }
34 | liblzma = { version = "0.4.1", optional = true }
35 | zerocopy = { version = "0.8.25", features = ["derive", "std"] }
36 | zstd-safe = { version = "7.2.4", optional = true, default-features = false }
37 | 
38 | [lints.clippy]
39 | dbg-macro = "warn"
40 | todo = "warn"
41 | print-stdout = "warn"
42 | print-stderr = "warn"
43 | 


--------------------------------------------------------------------------------
/dwarfs/LICENSE-APACHE:
--------------------------------------------------------------------------------
1 | ../LICENSE-APACHE


--------------------------------------------------------------------------------
/dwarfs/LICENSE-MIT:
--------------------------------------------------------------------------------
1 | ../LICENSE-MIT


--------------------------------------------------------------------------------
/dwarfs/README.md:
--------------------------------------------------------------------------------
 1 | # dwarfs
 2 | 
 3 | [![crates.io](https://img.shields.io/crates/v/dwarfs)](https://crates.io/crates/dwarfs)
 4 | [![docs.rs](https://img.shields.io/docsrs/dwarfs)][docs]
 5 | 
 6 | A library for reading [DwarFS][dwarfs] archives (aka. DwarFS images).
 7 | 
 8 | See [documentations][docs] for more details.
 9 | 
10 | [dwarfs]: https://github.com/mhx/dwarfs
11 | [docs]: https://docs.rs/dwarfs
12 | 
13 | #### License
14 | 
15 | <sup>
16 | Licensed under either of <a href="LICENSE-APACHE">Apache License, Version
17 | 2.0</a> or <a href="LICENSE-MIT">MIT license</a> at your option.
18 | </sup>
19 | 
20 | <br>
21 | 
22 | <sub>
23 | Unless you explicitly state otherwise, any contribution intentionally submitted
24 | for inclusion in this crate by you, as defined in the Apache-2.0 license, shall
25 | be dual licensed as above, without any additional terms or conditions.
26 | </sub>
27 | 


--------------------------------------------------------------------------------
/dwarfs/src/fsst.rs:
--------------------------------------------------------------------------------
  1 | //! The [Fast Static Symbol Table (FSST)][fsst] decoder for compressed string
  2 | //! tables [`StringTable::symtab`][crate::metadata::StringTable::symtab].
  3 | //!
  4 | //! [fsst]: https://github.com/cwida/fsst
  5 | 
  6 | use std::fmt;
  7 | 
  8 | use bstr::{BStr, BString};
  9 | use zerocopy::IntoBytes;
 10 | 
 11 | type Sym = u64;
 12 | 
 13 | const VERSION: u32 = 2019_0218;
 14 | const SYM_CORRUPT: Sym = u64::from_ne_bytes(*b"corrupt\0");
 15 | 
 16 | /// The max length of one symbol.
 17 | pub const MAX_SYMBOL_LEN: usize = 8;
 18 | 
 19 | type Result<T, E = Error> = std::result::Result<T, E>;
 20 | 
 21 | /// A symbol table decoding error.
 22 | pub struct Error(ErrorInner);
 23 | 
 24 | #[derive(Debug)]
 25 | enum ErrorInner {
 26 |     InputEof,
 27 |     InvalidMagic,
 28 |     NulMode,
 29 |     CodeOverflow,
 30 | 
 31 |     BufTooSmall,
 32 |     InvalidEscape,
 33 |     InvalidSymbol,
 34 | 
 35 |     #[cfg(feature = "serialize")]
 36 |     IncorrectSymbolOrder,
 37 | }
 38 | 
 39 | impl fmt::Debug for Error {
 40 |     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
 41 |         self.0.fmt(f)
 42 |     }
 43 | }
 44 | 
 45 | impl fmt::Display for Error {
 46 |     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
 47 |         f.pad(match self.0 {
 48 |             ErrorInner::InputEof => "unexpected end of input",
 49 |             ErrorInner::InvalidMagic => "missing header magic",
 50 |             ErrorInner::NulMode => "unsupported null-terminated mode",
 51 |             ErrorInner::CodeOverflow => "too many symbols",
 52 |             ErrorInner::BufTooSmall => "output buffer is too small",
 53 |             ErrorInner::InvalidEscape => "invalid escape byte at the end of input",
 54 |             ErrorInner::InvalidSymbol => "invalid symbol",
 55 |             #[cfg(feature = "serialize")]
 56 |             ErrorInner::IncorrectSymbolOrder => "symbols must be ordered in length 2,3,4,5,6,7,8,1",
 57 |         })
 58 |     }
 59 | }
 60 | 
 61 | impl std::error::Error for Error {}
 62 | 
 63 | impl From<ErrorInner> for Error {
 64 |     #[cold]
 65 |     #[inline]
 66 |     fn from(err: ErrorInner) -> Self {
 67 |         Self(err)
 68 |     }
 69 | }
 70 | 
 71 | /// The Fast Static Symbol Table (FSST) decoder.
 72 | ///
 73 | /// See [module level documentations](self). Note that this struct contains a
 74 | /// ~2KiB large array, and you may want to box it for fast moving.
 75 | pub struct Decoder {
 76 |     /// Code -> symbol mapping, stored in native-endian, with trailing bytes filled by NUL.
 77 |     symbols: [Sym; 255],
 78 | }
 79 | 
 80 | impl fmt::Debug for Decoder {
 81 |     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
 82 |         struct SymbolMap<'a>(&'a Decoder);
 83 | 
 84 |         impl fmt::Debug for SymbolMap<'_> {
 85 |             fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
 86 |                 f.debug_map()
 87 |                     .entries(self.0.symbols.iter().enumerate().map(|(i, sym)| {
 88 |                         let len = Decoder::symbol_len(*sym);
 89 |                         let sym = &sym.as_bytes()[..len];
 90 |                         (i, BStr::new(sym))
 91 |                     }))
 92 |                     .finish()
 93 |             }
 94 |         }
 95 | 
 96 |         f.debug_struct("Decoder")
 97 |             .field("symbols", &SymbolMap(self))
 98 |             .finish()
 99 |     }
100 | }
101 | 
102 | impl Decoder {
103 |     /// The max length of one symbol.
104 |     pub const MAX_SYMBOL_LEN: usize = MAX_SYMBOL_LEN;
105 | 
106 |     const ALL_CORRUPT: Self = Decoder {
107 |         symbols: [SYM_CORRUPT; 255],
108 |     };
109 | 
110 |     /// Iterate over `len_histo` for (symbol_length, count).
111 |     fn len_histo_iter(histo: &[u8; 8]) -> impl Iterator<Item = (usize, u8)> {
112 |         // Semantically: zip([2,3,4,5,6,7,8,1], histo[...[1,2,3,4,5,6,7,0]])
113 |         (1..=8).map(|i| ((i & 7) + 1, histo[i & 7]))
114 |     }
115 | 
116 |     /// Parse the symbol table `symtab`, from the serialization format from libfstt.
117 |     ///
118 |     /// This is re-implemented in Rust with the reference of
119 |     /// [libfstt's `fsst_import`](https://github.com/cwida/fsst/blob/b228af6356196095eaf9f8f5654b0635f969661e/libfsst.cpp#L555).
120 |     ///
121 |     /// Some notable differences:
122 |     /// - More error checking on short inputs, no buffer overflow, because we're Rust.
123 |     /// - More permissive on version endianness. Allow both little and big endian versions.
124 |     ///   Symbols are still always little-endian, as upstream.
125 |     /// - Zero-terminated mode (NUL as the first symbol) is unsupported and rejected.
126 |     /// - Encoder state bytes are ignored.
127 |     /// - Trailing bytes are allowed but ignored.
128 |     ///
129 |     /// License of libfstt: MIT License, Copyright 2018-2020, CWI, TU Munich, FSU Jena
130 |     ///
131 |     /// # Errors
132 |     ///
133 |     /// Returns `None` if the input cannot be successfully parsed.
134 |     pub fn parse(bytes: &[u8]) -> Result<Self> {
135 |         let mut this = Self::ALL_CORRUPT;
136 | 
137 |         let (&version_bytes, rest) = bytes.split_first_chunk::<8>().ok_or(ErrorInner::InputEof)?;
138 |         let (&zero_terminated, rest) = rest.split_first().ok_or(ErrorInner::InputEof)?;
139 |         let (&len_histo, rest) = rest.split_first_chunk::<8>().ok_or(ErrorInner::InputEof)?;
140 | 
141 |         // FIXME: This is in native endian, thus is non-portable and non-deterministic!
142 |         // Here we use little-endian first, detect and fix the endianness by
143 |         // using the fact the most-significant byte is always zero while the
144 |         // least-significant byte is always non-zero.
145 |         // Need further discussion with upstream.
146 |         let mut version = u64::from_le_bytes(version_bytes);
147 |         #[allow(clippy::verbose_bit_mask, reason = "less clear")]
148 |         if version & 0xFF == 0 {
149 |             version = version.swap_bytes();
150 |         }
151 |         if version >> 32 != u64::from(VERSION) {
152 |             return Err(ErrorInner::InvalidMagic.into());
153 |         }
154 | 
155 |         // Zero terminated flag is not supported.
156 |         if zero_terminated & 1 != 0 {
157 |             return Err(ErrorInner::NulMode.into());
158 |         }
159 | 
160 |         let mut code = 0;
161 |         let mut pos = 0;
162 |         for (sym_len, cnt) in Self::len_histo_iter(&len_histo) {
163 |             for _ in 0..cnt {
164 |                 let mut sym = 0u64;
165 |                 // TODO: Bound check before?
166 |                 sym.as_mut_bytes()[..sym_len]
167 |                     .copy_from_slice(rest.get(pos..pos + sym_len).ok_or(ErrorInner::InputEof)?);
168 |                 *this.symbols.get_mut(code).ok_or(ErrorInner::CodeOverflow)? = sym;
169 |                 pos += sym_len;
170 |                 code += 1;
171 |             }
172 |         }
173 | 
174 |         Ok(this)
175 |     }
176 | 
177 |     /// Return the max possible decoded length of `input_len` length input.
178 |     #[inline]
179 |     #[must_use]
180 |     pub fn max_decode_len(input_len: usize) -> usize {
181 |         // `usize::MAX` on overflow will guarantee a OOM on allocation.
182 |         input_len.checked_mul(8).unwrap_or(usize::MAX)
183 |     }
184 | 
185 |     #[inline]
186 |     fn symbol_len(sym: Sym) -> usize {
187 |         if cfg!(target_endian = "little") {
188 |             8 - sym.leading_zeros() as usize / 8
189 |         } else {
190 |             8 - sym.trailing_zeros() as usize / 8
191 |         }
192 |     }
193 | 
194 |     /// Decode `input` into `output` and return the number of decoded length.
195 |     ///
196 |     /// # Errors
197 |     ///
198 |     /// If `output.len() < Self::max_decode_len(input.len())`, or an error occurs
199 |     /// during decoding, `None` is returned.
200 |     #[allow(clippy::missing_panics_doc, reason = "never panics")]
201 |     pub fn decode_into(&self, input: &[u8], mut output: &mut [u8]) -> Result<usize> {
202 |         if input.is_empty() {
203 |             return Ok(0);
204 |         }
205 |         if output.len() < Self::max_decode_len(input.len()) {
206 |             return Err(ErrorInner::BufTooSmall.into());
207 |         }
208 |         if input.last() == Some(&0xFF) {
209 |             return Err(ErrorInner::InvalidEscape.into());
210 |         }
211 | 
212 |         let prev_output_len = output.len();
213 |         let mut i = 0;
214 |         // The second condition is a loop invariant, not an exit condition.
215 |         while i < input.len() && output.len() >= MAX_SYMBOL_LEN {
216 |             let b = input[i];
217 |             if b < 0xFF {
218 |                 let sym = self.symbols[b as usize];
219 |                 if sym == 0 {
220 |                     return Err(ErrorInner::InvalidSymbol.into());
221 |                 }
222 |                 // We always use max possible decode length, so output[..8] will never fail.
223 |                 *output.first_chunk_mut().expect("loop invariant") = sym.to_ne_bytes();
224 |                 output = &mut output[Self::symbol_len(sym)..];
225 |             // This condition is always true due to the initial check,
226 |             // but is here for better codegen.
227 |             } else if i + 1 < input.len() {
228 |                 i += 1;
229 |                 output[0] = input[i];
230 |                 output = &mut output[1..];
231 |             }
232 |             i += 1;
233 |         }
234 |         Ok(prev_output_len - output.len())
235 |     }
236 | 
237 |     /// Decode `input` into an owned byte string.
238 |     ///
239 |     /// # Errors
240 |     ///
241 |     /// If an error occurs during decoding, `None` is returned.
242 |     pub fn decode(&self, input: &[u8]) -> Result<BString> {
243 |         let mut buf = vec![0u8; Self::max_decode_len(input.len())];
244 |         let len = self.decode_into(input, &mut buf)?;
245 |         buf.truncate(len);
246 |         Ok(buf.into())
247 |     }
248 | }
249 | 
250 | /// Serialize symbol table consists of given symbols into bytes.
251 | ///
252 | /// `symbols` is an iterator of FSST symbols for code `0..`. It must be ordered
253 | /// in length `2,3,4,5,6,7,8,1`.
254 | ///
255 | /// # Errors
256 | ///
257 | /// Returns `Err` if either:
258 | /// - `symbols` has are more than 255 elements, or not in the expected order.
259 | /// - A symbol has length outside range `1..=8`.
260 | /// - A symbol contains a zero (NUL) byte.
261 | #[cfg(feature = "serialize")]
262 | pub fn to_bytes<I>(symbols: I) -> Result<Vec<u8>>
263 | where
264 |     I: IntoIterator,
265 |     I::Item: AsRef<[u8]>,
266 | {
267 |     let mut tbl = [0u64; 255];
268 |     let mut len_histo = [0u8; 8];
269 |     let mut prev_len_order = 0usize;
270 |     let mut code = 0usize;
271 |     for bytes in symbols {
272 |         if code >= 0xFF {
273 |             return Err(ErrorInner::CodeOverflow.into());
274 |         }
275 |         let bytes = bytes.as_ref();
276 |         let len = bytes.len();
277 |         if !(1..=8).contains(&len) || bytes.contains(&0) {
278 |             return Err(ErrorInner::InvalidSymbol.into());
279 |         }
280 |         // 23456781 => 0123456MAX
281 |         let len_order = len.wrapping_sub(2);
282 |         if prev_len_order > len_order {
283 |             return Err(ErrorInner::IncorrectSymbolOrder.into());
284 |         }
285 |         prev_len_order = len_order;
286 | 
287 |         let mut sym = 0u64;
288 |         sym.as_mut_bytes()[..len].copy_from_slice(bytes);
289 |         tbl[code] = sym;
290 |         code += 1;
291 |         len_histo[len - 1] += 1;
292 |     }
293 | 
294 |     let mut out = Vec::with_capacity(8 + 1 + 8 + MAX_SYMBOL_LEN * 255);
295 |     // Magic bytes, with no parameters set.
296 |     let magic = u64::from(VERSION) << 32 | 0xFF;
297 |     out.extend_from_slice(&magic.to_le_bytes());
298 |     // Disable `zero_terminated` mode.
299 |     out.push(0x00);
300 |     // Lengths.
301 |     out.extend_from_slice(&len_histo);
302 | 
303 |     for sym in &tbl[..code] {
304 |         let len = Decoder::symbol_len(*sym);
305 |         out.extend_from_slice(&sym.as_bytes()[..len]);
306 |     }
307 | 
308 |     Ok(out)
309 | }
310 | 
311 | #[cfg(test)]
312 | mod tests {
313 |     use super::*;
314 | 
315 |     #[test]
316 |     #[allow(clippy::print_stderr)]
317 |     fn smoke() {
318 |         let tbl = Decoder {
319 |             symbols: [u64::from_ne_bytes(*b"hello\0\0\0"); 255],
320 |         };
321 |         let debug = format!("{tbl:#?}");
322 |         eprintln!("{debug}");
323 |         assert!(debug.contains(r#"42: "hello","#));
324 | 
325 |         assert_eq!(tbl.decode(b"").unwrap(), "");
326 |         assert_eq!(
327 |             tbl.decode(b"\xFF").unwrap_err().to_string(),
328 |             "invalid escape byte at the end of input",
329 |         );
330 |         assert_eq!(
331 |             tbl.decode_into(b"\0", &mut [0u8; 4])
332 |                 .unwrap_err()
333 |                 .to_string(),
334 |             "output buffer is too small",
335 |         );
336 | 
337 |         let got = tbl.decode(b"\0\xFF,\0").unwrap();
338 |         assert_eq!(got, "hello,hello");
339 |     }
340 | 
341 |     #[test]
342 |     #[cfg(feature = "serialize")]
343 |     fn serialize() {
344 |         let bytes = to_bytes([&b"hello"[..], b"world", b"!"]).unwrap();
345 |         let tbl = Decoder::parse(&bytes).unwrap();
346 |         assert_eq!(tbl.decode(b"\0\xFF,\x01\x02").unwrap(), "hello,world!");
347 | 
348 |         assert_eq!(
349 |             to_bytes([&b"!"[..], b"hello"]).unwrap_err().to_string(),
350 |             "symbols must be ordered in length 2,3,4,5,6,7,8,1",
351 |         );
352 |         assert_eq!(
353 |             to_bytes([b"123456789"]).unwrap_err().to_string(),
354 |             "invalid symbol",
355 |         );
356 |         assert_eq!(
357 |             to_bytes(&[b"a"].repeat(256)).unwrap_err().to_string(),
358 |             "too many symbols",
359 |         );
360 |     }
361 | }
362 | 


--------------------------------------------------------------------------------
/dwarfs/src/lib.rs:
--------------------------------------------------------------------------------
  1 | //! A library for reading [DwarFS][dwarfs] archives (aka. images).
  2 | //!
  3 | //! Currently, DwarFS filesystem version 2.3..=2.5 is supported,
  4 | //! which should be compatible with files generated by
  5 | //! [upstream `mkdwarfs`][dwarfs] v0.5.0..=v0.12.4 (latest at the time of
  6 | //! writing). Other versions may also be readable but are not guaranteed.
  7 | //!
  8 | //! [dwarfs]: https://github.com/mhx/dwarfs
  9 | //!
 10 | //! ```
 11 | //! use dwarfs::{Archive, ArchiveIndex, AsChunks};
 12 | //! use std::fs::File;
 13 | //!
 14 | //! # fn wrap() -> dwarfs::Result<()> {
 15 | //! // Open an archive file and load the metadata of it.
 16 | //! let file = File::open("./my.dwarfs")?;
 17 | //! let (index, mut archive) = Archive::new(file)?;
 18 | //!
 19 | //! // Hierarchy traversal.
 20 | //! for entry in index.root().entries() {
 21 | //!     let inode = entry.inode();
 22 | //!     println!("/{} mode={}", entry.name(), inode.metadata().file_type_mode());
 23 | //!     if let Some(deep) = inode.as_dir() {
 24 | //!         for entry in deep.entries() {
 25 | //!             // ...
 26 | //!         }
 27 | //!     }
 28 | //! }
 29 | //!
 30 | //! // Resolve paths.
 31 | //! let file: dwarfs::File = index.get_path(["src", "Cargo.toml"])
 32 | //!     .expect("does not exist")
 33 | //!     .as_file()
 34 | //!     .expect("not a file");
 35 | //! // The simple way to read content.
 36 | //! let bytes: Vec<u8> = file.read_to_vec(&mut archive)?;
 37 | //!
 38 | //! # Ok(()) }
 39 | //! ```
 40 | //!
 41 | //! ## Cargo features
 42 | //!
 43 | //! - `zstd`, `lzma`, `lz4` *(Only `zstd` is enabled by default)*
 44 | //!
 45 | //!   Enable relevant decompression algorithm support. `zstd` is the default
 46 | //!   compression algorithm `mkdwarfs` uses and it should be enough for most cases.
 47 | //!
 48 | //! - `log` *(Enabled by default)*
 49 | //!
 50 | //!   Enable trace-level logging and time measurement for internal events via
 51 | //!   [`log` crate][log]. Useful for profiling or debugging. Should not
 52 | //!   have performance penalty unless trace-level log is enabled.
 53 | //!
 54 | //! - `serialize` *(Disabled by default)*
 55 | //!
 56 | //!   Enable serialization support for various structures. It enables:
 57 | //!   - [`metadata::Schema::to_bytes`]
 58 | //!   - [`metadata::Metadata::to_schema_and_bytes`]
 59 | //!   - [`fsst::to_bytes`]
 60 | //!
 61 | //! [log]: https://crates.io/crates/log
 62 | #![cfg_attr(docsrs, feature(doc_auto_cfg))]
 63 | #![forbid(unsafe_code)]
 64 | #![warn(missing_debug_implementations)]
 65 | #![warn(missing_docs)]
 66 | 
 67 | #[cfg(feature = "log")]
 68 | #[macro_use(trace_time)]
 69 | extern crate measure_time;
 70 | 
 71 | #[cfg(feature = "log")]
 72 | #[macro_use(trace)]
 73 | extern crate log;
 74 | 
 75 | #[cfg(not(feature = "log"))]
 76 | #[macro_use]
 77 | mod macros {
 78 |     macro_rules! trace {
 79 |         ($($tt:tt)*) => {
 80 |             let _ = if false {
 81 |                 let _ = ::std::format_args!($($tt)*);
 82 |             };
 83 |         };
 84 |     }
 85 | 
 86 |     macro_rules! trace_time {
 87 |         ($($tt:tt)*) => {
 88 |             trace!($($tt)*)
 89 |         };
 90 |     }
 91 | }
 92 | 
 93 | macro_rules! bail {
 94 |     ($err:expr $(,)?) => {
 95 |         return Err(Into::into($err))
 96 |     };
 97 | }
 98 | 
 99 | pub mod archive;
100 | pub mod fsst;
101 | pub mod metadata;
102 | pub mod section;
103 | 
104 | pub extern crate positioned_io;
105 | pub extern crate zerocopy;
106 | 
107 | /// The range of filesystem version tuple `(major, minor)` supported by this library.
108 | ///
109 | /// Currently this is `(2, 3)..=(2, 5)`.
110 | // TODO: We could lower this.
111 | pub const SUPPORTED_VERSION_RANGE: std::ops::RangeInclusive<(u8, u8)> = (2, 3)..=(2, 5);
112 | 
113 | #[doc(inline)]
114 | pub use archive::{
115 |     Archive, ArchiveIndex, AsChunks, Device, Dir, DirEntry, Error, File, Inode, InodeKind,
116 |     InodeMetadata, Ipc, Result, Symlink,
117 | };
118 | 


--------------------------------------------------------------------------------
/dwarfs/src/metadata.rs:
--------------------------------------------------------------------------------
  1 | //! The low-level metadata structures and parsers.
  2 | //!
  3 | //! The parsed [`Metadata`] and [`Schema`] is given as-is from the underlying
  4 | //! structure without additional modification. Notably, for `Metadata`, no
  5 | //! unpacking is performed, no value validation is performed, and only binary
  6 | //! syntax and structure are validated.
  7 | //!
  8 | //! For high-level access of the image hierarchy and content, use
  9 | //! [`Archive`][crate::Archive] instead.
 10 | //!
 11 | //! See upstream documentations for the meaning of structs and fields:
 12 | //!
 13 | //! - Metadata definition: <https://github.com/mhx/dwarfs/blob/v0.12.4/thrift/metadata.thrift>
 14 | //!
 15 | //! - Frozen schema definition: <https://github.com/facebook/fbthrift/blob/2f5415eed3a4981b5b1535a504a76b9309834a90/thrift/lib/thrift/frozen.thrift>
 16 | //!   
 17 | //!   Typically, users should treat [`Schema`] as an opaque type, because the
 18 | //!   definition in this crate is specialized only for [`Metadata::parse`].
 19 | use std::{borrow::Borrow, fmt, marker::PhantomData, ops};
 20 | 
 21 | use bstr::BString;
 22 | use serde::{Deserialize, Serialize, de};
 23 | 
 24 | mod de_frozen;
 25 | mod de_thrift;
 26 | 
 27 | #[cfg(feature = "serialize")]
 28 | mod ser_frozen;
 29 | #[cfg(feature = "serialize")]
 30 | mod ser_thrift;
 31 | 
 32 | #[cfg(test)]
 33 | mod tests;
 34 | 
 35 | type Result<T, E = Error> = std::result::Result<T, E>;
 36 | 
 37 | /// An error raised from parsing schema or metadata.
 38 | #[derive(Debug)]
 39 | pub struct Error(Box<str>);
 40 | 
 41 | impl fmt::Display for Error {
 42 |     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
 43 |         self.0.fmt(f)
 44 |     }
 45 | }
 46 | 
 47 | impl std::error::Error for Error {}
 48 | 
 49 | /// A dense map of i16 -> T, stored as `Vec<Option<T>>` for quick indexing.
 50 | #[derive(Default, Clone, PartialEq, Eq, Hash)]
 51 | pub struct DenseMap<T>(pub Vec<Option<T>>);
 52 | 
 53 | impl<T: fmt::Debug> fmt::Debug for DenseMap<T> {
 54 |     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
 55 |         f.debug_map().entries(self.iter()).finish()
 56 |     }
 57 | }
 58 | 
 59 | impl<'de, T: de::Deserialize<'de>> de::Deserialize<'de> for DenseMap<T> {
 60 |     fn deserialize<D: de::Deserializer<'de>>(de: D) -> Result<Self, D::Error> {
 61 |         struct Visitor<T>(PhantomData<T>);
 62 | 
 63 |         impl<'de, T: de::Deserialize<'de>> de::Visitor<'de> for Visitor<T> {
 64 |             type Value = DenseMap<T>;
 65 | 
 66 |             fn expecting(&self, f: &mut fmt::Formatter) -> fmt::Result {
 67 |                 f.write_str("a dense map")
 68 |             }
 69 | 
 70 |             fn visit_map<A>(self, mut map: A) -> Result<Self::Value, A::Error>
 71 |             where
 72 |                 A: de::MapAccess<'de>,
 73 |             {
 74 |                 // Keys start at 1.
 75 |                 let len = map.size_hint().unwrap_or(0) + 1;
 76 |                 let mut vecmap = Vec::with_capacity(len);
 77 |                 while let Some((k, v)) = map.next_entry::<i16, T>()? {
 78 |                     let k = usize::try_from(k).map_err(|_| {
 79 |                         de::Error::invalid_value(
 80 |                             de::Unexpected::Signed(k.into()),
 81 |                             &"an unsigned dense map key",
 82 |                         )
 83 |                     })?;
 84 |                     if vecmap.len() <= k {
 85 |                         vecmap.resize_with(k + 1, || None);
 86 |                     }
 87 |                     vecmap[k] = Some(v);
 88 |                 }
 89 |                 Ok(DenseMap(vecmap))
 90 |             }
 91 |         }
 92 | 
 93 |         de.deserialize_map(Visitor::<T>(PhantomData))
 94 |     }
 95 | }
 96 | 
 97 | impl<T: Serialize> Serialize for DenseMap<T> {
 98 |     fn serialize<S>(&self, ser: S) -> Result<S::Ok, S::Error>
 99 |     where
100 |         S: serde::Serializer,
101 |     {
102 |         use serde::ser::SerializeMap;
103 | 
104 |         let size = self.iter().count();
105 |         let mut ser = ser.serialize_map(Some(size))?;
106 |         for (k, v) in self.iter() {
107 |             ser.serialize_entry(&k, v)?;
108 |         }
109 |         ser.end()
110 |     }
111 | }
112 | 
113 | impl<T> ops::Index<i16> for DenseMap<T> {
114 |     type Output = T;
115 | 
116 |     fn index(&self, index: i16) -> &Self::Output {
117 |         self.get(index).expect("index out of bound")
118 |     }
119 | }
120 | 
121 | impl<T> DenseMap<T> {
122 |     fn is_empty(&self) -> bool {
123 |         self.0.is_empty()
124 |     }
125 | 
126 |     fn get(&self, i: i16) -> Option<&T> {
127 |         self.0.get(usize::try_from(i).ok()?)?.as_ref()
128 |     }
129 | 
130 |     fn iter(&self) -> impl Iterator<Item = (i16, &T)> + use<'_, T> {
131 |         self.0
132 |             .iter()
133 |             .enumerate()
134 |             .filter_map(|(k, v)| Some((k as i16, v.as_ref()?)))
135 |     }
136 | }
137 | 
138 | /// The Frozen schema. You should treat this type as opaque.
139 | ///
140 | /// See [module level documentation][self] for details.
141 | #[expect(missing_docs, reason = "users should check upstream docs")]
142 | #[derive(Debug, Default, Clone, PartialEq, Eq, Deserialize, Serialize)]
143 | #[non_exhaustive]
144 | pub struct Schema {
145 |     // NB. Field order matters for ser/de impl.
146 |     #[serde(default, skip_serializing_if = "is_default")]
147 |     pub relax_type_checks: bool,
148 |     pub layouts: DenseMap<SchemaLayout>,
149 |     #[serde(default, skip_serializing_if = "is_default")]
150 |     pub root_layout: i16,
151 |     #[serde(default, skip_serializing_if = "is_default")]
152 |     pub file_version: i32,
153 | }
154 | 
155 | /// You should treat this type as opaque.
156 | ///
157 | /// See [module level documentation][self] for details.
158 | #[expect(missing_docs, reason = "users should check upstream docs")]
159 | #[derive(Debug, Default, Clone, PartialEq, Eq, Hash, Deserialize, Serialize)]
160 | #[non_exhaustive]
161 | pub struct SchemaLayout {
162 |     // NB. Field order matters for ser/de impl.
163 |     #[serde(default, skip_serializing_if = "is_default")]
164 |     pub size: i32,
165 |     #[serde(default, skip_serializing_if = "is_default")]
166 |     pub bits: i16,
167 |     pub fields: DenseMap<SchemaField>,
168 |     pub type_name: String,
169 | }
170 | 
171 | fn is_default<T: Default + PartialEq>(v: &T) -> bool {
172 |     *v == T::default()
173 | }
174 | 
175 | /// You should treat this type as opaque.
176 | ///
177 | /// See [module level documentation][self] for details.
178 | #[expect(missing_docs, reason = "users should check upstream docs")]
179 | #[derive(Debug, Default, Clone, PartialEq, Eq, Hash, Deserialize, Serialize)]
180 | #[non_exhaustive]
181 | pub struct SchemaField {
182 |     // NB. Field order matters for ser/de impl.
183 |     pub layout_id: i16,
184 |     #[serde(default, skip_serializing_if = "is_default")]
185 |     pub offset: i16,
186 | }
187 | 
188 | impl SchemaField {
189 |     fn offset_bits(&self) -> u16 {
190 |         let o = self.offset;
191 |         if o >= 0 { o as u16 * 8 } else { (-o) as u16 }
192 |     }
193 | }
194 | 
195 | impl Schema {
196 |     /// Parse the schema from the on-disk serialized from
197 |     /// ([`SectionType::METADATA_V2_SCHEMA`](crate::section::SectionType::METADATA_V2_SCHEMA)),
198 |     ///
199 |     /// The schema type and parser are specialized for [`Metadata::parse`]. It
200 |     /// should not be used for Frozen schema of other data structures.
201 |     ///
202 |     /// # Errors
203 |     ///
204 |     /// Returns `Err` if fails to parse the input, or the parsed result fails
205 |     /// basic invariant validations. Currently only index ranges are checked,
206 |     /// the validated invariants may change in the future.
207 |     pub fn parse(input: &[u8]) -> Result<Self> {
208 |         let this = de_thrift::deserialize_struct::<Self>(input)
209 |             .map_err(|err| Error(format!("failed to parse schema: {err}").into()))?;
210 |         this.validate()?;
211 |         Ok(this)
212 |     }
213 | 
214 |     /// Serialize the schema to on-disk bytes, does the reverse of [`Schema::parse`].
215 |     ///
216 |     /// The serialization format is not canonical and the result may change
217 |     /// between versions of this library. It is not considered a breaking
218 |     /// change but a minor change.
219 |     ///
220 |     /// # Properties
221 |     ///
222 |     /// - If `schema1 == schema2`, then `schema1.to_bytes()? == schema2.to_bytes()?`
223 |     ///
224 |     /// - `Schema::parse(schema.to_bytes()?)? == schema`
225 |     ///
226 |     /// - `Schema::parse(bytes)?.to_bytes() == bytes` may *NOT* hold.
227 |     ///
228 |     /// # Errors
229 |     ///
230 |     /// Returns `Err` if serialization fails. Currently this can happen on
231 |     /// overly large collections whose length exceeds `i32::MAX`.
232 |     #[cfg(feature = "serialize")]
233 |     pub fn to_bytes(&self) -> Result<Vec<u8>> {
234 |         ser_thrift::serialize_struct(self)
235 |             .map_err(|err| Error(format!("failed to serialize schema: {err}").into()))
236 |     }
237 | 
238 |     fn validate(&self) -> Result<()> {
239 |         self.validate_inner()
240 |             .map_err(|msg| Error(msg.into_boxed_str()))
241 |     }
242 | 
243 |     fn validate_inner(&self) -> Result<(), String> {
244 |         const FILE_VERSION: i32 = 1;
245 | 
246 |         if self.file_version != FILE_VERSION {
247 |             bail!(format!(
248 |                 "unsupported schema file_version {:?}",
249 |                 self.file_version
250 |             ));
251 |         }
252 |         if self.layouts.get(self.root_layout).is_none() {
253 |             bail!("missing root_layout");
254 |         }
255 | 
256 |         for (layout_id, layout) in self.layouts.iter() {
257 |             if layout.fields.is_empty() && layout.bits > 64 {
258 |                 bail!(format!(
259 |                     "layout {}: primitive type is too large to have {}bits",
260 |                     layout_id, layout.bits,
261 |                 ));
262 |             }
263 | 
264 |             for (field_id, field) in layout.fields.iter() {
265 |                 (|| -> Result<(), &str> {
266 |                     let field_layout = self
267 |                         .layouts
268 |                         .get(field.layout_id)
269 |                         .ok_or("layout index out of range")?;
270 |                     let bit_offset = if field.offset >= 0 {
271 |                         field.offset.checked_mul(8)
272 |                     } else {
273 |                         field.offset.checked_neg()
274 |                     };
275 |                     if field_layout.bits < 0 {
276 |                         bail!("layout bits cannot be negative");
277 |                     }
278 |                     let bit_total_size = bit_offset
279 |                         .and_then(|off| (off as u16).checked_add(field_layout.bits as u16));
280 |                     bit_total_size.ok_or("offset overflows")?;
281 |                     Ok(())
282 |                 })()
283 |                 .map_err(|err| format!("field {field_id} of layout {layout_id}: {err}"))?;
284 |             }
285 |         }
286 | 
287 |         Ok(())
288 |     }
289 | }
290 | 
291 | /// A wrapper of a `Vec<T>` representing a ordered set of ascending `T`.
292 | #[derive(Default, Clone, PartialEq, Deserialize, Serialize)]
293 | #[serde(transparent)]
294 | pub struct OrderedSet<T>(pub Vec<T>);
295 | 
296 | impl<T: fmt::Debug> fmt::Debug for OrderedSet<T> {
297 |     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
298 |         f.debug_set().entries(&self.0).finish()
299 |     }
300 | }
301 | 
302 | impl<T> OrderedSet<T> {
303 |     /// Returns the number of elements in the underlying `Vec`.
304 |     #[must_use]
305 |     #[inline]
306 |     pub fn len(&self) -> usize {
307 |         self.0.len()
308 |     }
309 | 
310 |     /// Returns true if the underlying `Vec` contains no elements.
311 |     #[must_use]
312 |     #[inline]
313 |     pub fn is_empty(&self) -> bool {
314 |         self.0.is_empty()
315 |     }
316 | 
317 |     /// Return true if the underlying `Vec` contains `value`.
318 |     ///
319 |     /// This uses binary search and the underlying `Vec` must be sorted by
320 |     /// ascending `T`, otherwise, it will return an unspecified result but will
321 |     /// not panic.
322 |     #[must_use]
323 |     pub fn contains<Q>(&self, value: &Q) -> bool
324 |     where
325 |         T: Borrow<Q> + Ord,
326 |         Q: Ord + ?Sized,
327 |     {
328 |         self.0
329 |             .binary_search_by(|probe| Ord::cmp(probe.borrow(), value))
330 |             .is_ok()
331 |     }
332 | }
333 | 
334 | /// A wrapper of a `Vec<(K, V)>` representing a ordered map of ascending key `K`.
335 | #[derive(Default, Clone, PartialEq)]
336 | pub struct OrderedMap<K, V>(pub Vec<(K, V)>);
337 | 
338 | impl<K: fmt::Debug, V: fmt::Debug> fmt::Debug for OrderedMap<K, V> {
339 |     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
340 |         f.debug_map()
341 |             .entries(self.0.iter().map(|(k, v)| (k, v)))
342 |             .finish()
343 |     }
344 | }
345 | 
346 | impl<'de, K: Deserialize<'de>, V: Deserialize<'de>> Deserialize<'de> for OrderedMap<K, V> {
347 |     fn deserialize<D>(de: D) -> std::result::Result<Self, D::Error>
348 |     where
349 |         D: de::Deserializer<'de>,
350 |     {
351 |         struct Visitor<K, V>(PhantomData<(K, V)>);
352 | 
353 |         impl<'de, K: Deserialize<'de>, V: Deserialize<'de>> de::Visitor<'de> for Visitor<K, V> {
354 |             type Value = OrderedMap<K, V>;
355 | 
356 |             fn expecting(&self, f: &mut fmt::Formatter) -> fmt::Result {
357 |                 f.write_str("a map")
358 |             }
359 | 
360 |             fn visit_map<A>(self, mut map: A) -> Result<Self::Value, A::Error>
361 |             where
362 |                 A: de::MapAccess<'de>,
363 |             {
364 |                 let mut v = Vec::with_capacity(map.size_hint().unwrap_or(0));
365 |                 while let Some(pair) = map.next_entry()? {
366 |                     v.push(pair);
367 |                 }
368 |                 Ok(OrderedMap(v))
369 |             }
370 |         }
371 | 
372 |         de.deserialize_map(Visitor::<K, V>(PhantomData))
373 |     }
374 | }
375 | 
376 | impl<K: Serialize, V: Serialize> Serialize for OrderedMap<K, V> {
377 |     fn serialize<S>(&self, ser: S) -> Result<S::Ok, S::Error>
378 |     where
379 |         S: serde::Serializer,
380 |     {
381 |         ser.collect_map(self.0.iter().map(|(k, v)| (k, v)))
382 |     }
383 | }
384 | 
385 | impl<K, V> OrderedMap<K, V> {
386 |     /// Returns the number of elements in the underlying `Vec`.
387 |     #[must_use]
388 |     #[inline]
389 |     pub fn len(&self) -> usize {
390 |         self.0.len()
391 |     }
392 | 
393 |     /// Returns true if the underlying `Vec` contains no elements.
394 |     #[must_use]
395 |     #[inline]
396 |     pub fn is_empty(&self) -> bool {
397 |         self.0.is_empty()
398 |     }
399 | 
400 |     /// Search and get the `value` corresponding to `key` in the map.
401 |     ///
402 |     /// This uses binary search and the underlying `Vec` must be sorted by
403 |     /// ascending `K`, otherwise, it will return an unspecified result but will
404 |     /// not panic.
405 |     #[must_use]
406 |     pub fn get<Q>(&self, key: &Q) -> Option<&V>
407 |     where
408 |         K: Borrow<Q> + Ord,
409 |         Q: Ord + ?Sized,
410 |     {
411 |         let i = self
412 |             .0
413 |             .binary_search_by(|(probe, _)| Ord::cmp(probe.borrow(), key))
414 |             .ok()?;
415 |         Some(&self.0[i].1)
416 |     }
417 | }
418 | 
419 | impl Metadata {
420 |     /// Parse the metadata from on-disk serialized form
421 |     /// ([`SectionType::METADATA_V2`](crate::section::SectionType::METADATA_V2)),
422 |     /// using layout defined by the given schema.
423 |     ///
424 |     /// # Errors
425 |     ///
426 |     /// Returns `Err` if fails to deserialize. This can happen on invalid input
427 |     /// bytes, invalid `schema`, length overflows and etc.
428 |     ///
429 |     /// Since only structures but not values are checked, this method may
430 |     /// optmisticly accept some "semantically invalid" `Metadata`.
431 |     /// The tolorence on invalid parts may change in the future.
432 |     pub fn parse(schema: &Schema, bytes: &[u8]) -> Result<Self> {
433 |         de_frozen::deserialize(schema, bytes)
434 |             .map_err(|err| Error(format!("failed to parse metadata: {err}").into()))
435 |     }
436 | 
437 |     /// Serialize the metadata to on-disk bytes, does the reverse of [`Metadata::parse`].
438 |     ///
439 |     /// The serialization format is not canonical and the result may change
440 |     /// between versions of this library. It is not considered a breaking
441 |     /// change but a minor change.
442 |     ///
443 |     /// # Properties
444 |     ///
445 |     /// - If `meta1 == meta2`, then `meta1.to_schema_and_bytes()? == meta2.to_schema_and_bytesto_bytes()?`
446 |     ///
447 |     /// - `let (schema, bytes) = meta.to_schema_and_bytes()?; meta == Metadata::parse(&schema, &bytes)?`
448 |     ///
449 |     /// # Errors
450 |     ///
451 |     /// Returns `Err` if serialization fails. Currently this can happen on
452 |     /// overly large collections whose length exceeds `i32::MAX`.
453 |     #[cfg(feature = "serialize")]
454 |     pub fn to_schema_and_bytes(&self) -> Result<(Schema, Vec<u8>)> {
455 |         ser_frozen::serialize_struct(self)
456 |             .map_err(|err| Error(format!("failed to serialize metadata: {err}").into()))
457 |     }
458 | }
459 | 
460 | /// See [module level documentation][self] for details.
461 | #[expect(missing_docs, reason = "users should check upstream docs")]
462 | #[derive(Default, Debug, Clone, PartialEq, Deserialize, Serialize)]
463 | #[non_exhaustive]
464 | #[serde(default)]
465 | pub struct Metadata {
466 |     // NB. Field order matters for ser/de impl.
467 |     // #1
468 |     pub chunks: Vec<Chunk>,
469 |     pub directories: Vec<Directory>,
470 |     pub inodes: Vec<InodeData>,
471 |     pub chunk_table: Vec<u32>,
472 |     #[deprecated = "deprecated since DwarFS 2.3"]
473 |     pub entry_table: Vec<u32>,
474 |     pub symlink_table: Vec<u32>,
475 |     pub uids: Vec<u32>,
476 |     pub gids: Vec<u32>,
477 |     pub modes: Vec<u32>,
478 |     pub names: Vec<BString>,
479 |     pub symlinks: Vec<BString>,
480 |     pub timestamp_base: u64,
481 | 
482 |     // #13
483 |     pub chunk_inode_offset: u32,
484 |     pub link_inode_offset: u32,
485 | 
486 |     // #15
487 |     pub block_size: u32,
488 |     pub total_fs_size: u64,
489 | 
490 |     // #17
491 |     pub devices: Option<Vec<u64>>,
492 |     pub options: Option<FsOptions>,
493 | 
494 |     // #19
495 |     pub dir_entries: Option<Vec<DirEntry>>,
496 |     pub shared_files_table: Option<Vec<u32>>,
497 |     pub total_hardlink_size: Option<u64>,
498 |     pub dwarfs_version: Option<BString>,
499 |     pub create_timestamp: Option<u64>,
500 |     pub compact_names: Option<StringTable>,
501 |     pub compact_symlinks: Option<StringTable>,
502 | 
503 |     // #26
504 |     pub preferred_path_separator: Option<u32>,
505 |     pub features: Option<OrderedSet<BString>>,
506 |     pub category_names: Option<Vec<BString>>,
507 |     pub block_categories: Option<Vec<BString>>,
508 |     pub reg_file_size_cache: Option<InodeSizeCache>,
509 | }
510 | 
511 | /// See [module level documentation][self] for details.
512 | #[expect(missing_docs, reason = "users should check upstream docs")]
513 | #[derive(Default, Debug, Clone, PartialEq, Deserialize, Serialize)]
514 | #[non_exhaustive]
515 | #[serde(default)]
516 | pub struct Chunk {
517 |     // NB. Field order matters for ser/de impl.
518 |     pub block: u32,
519 |     pub offset: u32,
520 |     pub size: u32,
521 | }
522 | 
523 | /// See [module level documentation][self] for details.
524 | #[expect(missing_docs, reason = "users should check upstream docs")]
525 | #[derive(Default, Debug, Clone, PartialEq, Deserialize, Serialize)]
526 | #[non_exhaustive]
527 | #[serde(default)]
528 | pub struct Directory {
529 |     // NB. Field order matters for ser/de impl.
530 |     pub parent_entry: u32,
531 |     pub first_entry: u32,
532 |     pub self_entry: u32,
533 | }
534 | 
535 | /// See [module level documentation][self] for details.
536 | #[expect(missing_docs, reason = "users should check upstream docs")]
537 | #[derive(Default, Debug, Clone, PartialEq, Deserialize, Serialize)]
538 | #[non_exhaustive]
539 | #[serde(default)]
540 | pub struct InodeData {
541 |     // NB. Field order matters for ser/de impl.
542 |     #[deprecated = "deprecated since DwarFS 2.3"]
543 |     pub name_index: u32,
544 |     pub mode_index: u32,
545 |     #[deprecated = "deprecated since DwarFS 2.3"]
546 |     pub inode: u32,
547 |     pub owner_index: u32,
548 |     pub group_index: u32,
549 |     pub atime_offset: u32,
550 |     pub mtime_offset: u32,
551 |     pub ctime_offset: u32,
552 | }
553 | 
554 | /// See [module level documentation][self] for details.
555 | #[expect(missing_docs, reason = "users should check upstream docs")]
556 | #[derive(Default, Debug, Clone, PartialEq, Deserialize, Serialize)]
557 | #[non_exhaustive]
558 | #[serde(default)]
559 | pub struct DirEntry {
560 |     // NB. Field order matters for ser/de impl.
561 |     pub name_index: u32,
562 |     pub inode_num: u32,
563 | }
564 | 
565 | /// See [module level documentation][self] for details.
566 | #[expect(missing_docs, reason = "users should check upstream docs")]
567 | #[expect(clippy::struct_excessive_bools, reason = "follows upstream")]
568 | #[derive(Default, Debug, Clone, PartialEq, Deserialize, Serialize)]
569 | #[non_exhaustive]
570 | #[serde(default)]
571 | pub struct FsOptions {
572 |     // NB. Field order matters for ser/de impl.
573 |     pub mtime_only: bool,
574 |     pub time_resolution_sec: Option<u32>,
575 |     pub packed_chunk_table: bool,
576 |     pub packed_directories: bool,
577 |     pub packed_shared_files_table: bool,
578 | }
579 | 
580 | /// See [module level documentation][self] for details.
581 | #[expect(missing_docs, reason = "users should check upstream docs")]
582 | #[derive(Default, Debug, Clone, PartialEq, Deserialize, Serialize)]
583 | #[non_exhaustive]
584 | #[serde(default)]
585 | pub struct StringTable {
586 |     // NB. Field order matters for ser/de impl.
587 |     pub buffer: BString,
588 |     pub symtab: Option<BString>,
589 |     pub index: Vec<u32>,
590 |     pub packed_index: bool,
591 | }
592 | 
593 | /// See [module level documentation][self] for details.
594 | #[expect(missing_docs, reason = "users should check upstream docs")]
595 | #[derive(Default, Debug, Clone, PartialEq, Deserialize, Serialize)]
596 | #[non_exhaustive]
597 | #[serde(default)]
598 | pub struct InodeSizeCache {
599 |     // NB. Field order matters for ser/de impl.
600 |     pub lookup: OrderedMap<u32, u64>,
601 |     pub min_chunk_count: u64,
602 | }
603 | 


--------------------------------------------------------------------------------
/dwarfs/src/metadata/de_frozen.rs:
--------------------------------------------------------------------------------
  1 | //! fbthrift's Frozen2 format, a bit-compressed compact format that has
  2 | //! nothing to do with Thrift.
  3 | //!
  4 | //! Here we use serde for structure deserialization. The metadata is expected to
  5 | //! be fully unpacked in memory for performance, thus it makes little sense to
  6 | //! lazily parse it, except seeking for more trouble on invalid unread data.
  7 | //!
  8 | //! Source: <https://github.com/facebook/fbthrift/blob/4375e4b08135d06fd56399b86ef93f3e6d43017c/thrift/lib/cpp2/frozen/Frozen.h>
  9 | //!
 10 | //! There is almost no documentation about this format. The details are mostly from:
 11 | //! - Helps from Marcus Holland-Moritz <github@mhxnet.de>, who wrote some explanation
 12 | //!   and examples:
 13 | //!   <https://github.com/mhx/dwarfs/blob/63b0cc70d04a95f366399d60be66b34791762058/doc/dwarfs-format.md>
 14 | //!   
 15 | //! - Me (oxalica) reverse engineering bytes layouts in DwarFS metadata block, and
 16 | //!   comparing with the metadata dump from:
 17 | //!   `dwarfsck $imgfile -d metadata_full_dump`
 18 | 
 19 | use serde::{de, forward_to_deserialize_any};
 20 | 
 21 | use super::{Schema, SchemaLayout};
 22 | 
 23 | type Result<T, E = Error> = std::result::Result<T, E>;
 24 | type Error = de::value::Error;
 25 | 
 26 | /// The offset type we use to index into metadata bytes.
 27 | ///
 28 | /// We expect metadata to be relatively small comparing to the actual data and
 29 | /// it's efficiently bit-packed. Assume 4GiB is enough for it.
 30 | pub(crate) type Offset = u32;
 31 | 
 32 | // Assert that offset -> usize never overflows.
 33 | fn to_usize(offset: Offset) -> usize {
 34 |     const _: () = assert!(size_of::<Offset>() <= size_of::<usize>());
 35 |     offset as usize
 36 | }
 37 | 
 38 | pub(crate) fn deserialize<T: de::DeserializeOwned>(schema: &Schema, bytes: &[u8]) -> Result<T> {
 39 |     let root_layout = schema.layouts.get(schema.root_layout).expect("validated");
 40 |     let de = Deserializer {
 41 |         src: &Source { schema, bytes },
 42 |         layout: Some(root_layout),
 43 |         bit_offset: 0,
 44 |         storage_start: 0,
 45 |     };
 46 |     T::deserialize(de)
 47 | }
 48 | 
 49 | /// The input raw bytes with attached schema.
 50 | #[derive(Clone, Copy)]
 51 | struct Source<'a> {
 52 |     schema: &'a Schema,
 53 |     bytes: &'a [u8],
 54 | }
 55 | 
 56 | impl Source<'_> {
 57 |     /// Load 1 bits at `base_bit`, using little-endian.
 58 |     ///
 59 |     /// This assumes the input is in bound. Validation should be done on structs.
 60 |     fn load_bit(&self, base_bit: Offset) -> Result<bool> {
 61 |         let (byte_idx, bit_idx) = (to_usize(base_bit) / 8, base_bit % 8);
 62 |         let b = *self
 63 |             .bytes
 64 |             .get(byte_idx)
 65 |             .ok_or_else(|| de::Error::custom("bit location overflow"))?;
 66 |         Ok((b >> bit_idx) & 1 != 0)
 67 |     }
 68 | 
 69 |     /// Load `bits` bits starting at `base_bit`, using little-endian,
 70 |     /// fill upper bits as 0.
 71 |     ///
 72 |     /// This assumes the input is in bound. Validation should be done on structs.
 73 |     fn load_bits(&self, base_bit: Offset, bits: u16) -> Result<u64> {
 74 |         // Already checked by schema validation.
 75 |         debug_assert!(bits > 0);
 76 |         debug_assert!(bits <= 64);
 77 |         let (byte_idx, bit_start) = (to_usize(base_bit) / 8, base_bit as u16 % 8);
 78 |         let last_byte_idx = (base_bit + Offset::from(bits) - 1) / 8;
 79 |         if to_usize(last_byte_idx) >= self.bytes.len() {
 80 |             return Err(de::Error::custom("bits location overflow"));
 81 |         }
 82 | 
 83 |         // Always load a 8-byte chunk for performance.
 84 |         let rest = &self.bytes[byte_idx..];
 85 |         let x = if rest.len() >= 8 {
 86 |             u64::from_le_bytes(rest[..8].try_into().unwrap())
 87 |         } else {
 88 |             let mut buf = [0u8; 8];
 89 |             buf[..rest.len()].copy_from_slice(rest);
 90 |             u64::from_le_bytes(buf)
 91 |         };
 92 | 
 93 |         let start_and_bits = bit_start + bits;
 94 |         Ok(if start_and_bits <= 64 {
 95 |             // Simple case:
 96 |             // Bit | 63, 62, ...          1, 0 |
 97 |             //     |up_bits|  bits | bit_start |
 98 |             //             ~~~~~~~~~ target
 99 |             x << (64 - start_and_bits) >> (64 - bits)
100 |         } else {
101 |             // Overshooting case:
102 |             // Bit | 71 .. 64 | 63, 62, ...          1, 0 |
103 |             //     |     |      bits          | bit_start |
104 |             //           ~~~~~~~~~~~~~~~~~~~~~~ target
105 | 
106 |             // We need the 9-th (idx=8) byte. This can only happen if bits >= 56.
107 |             let overshooting_bits = start_and_bits & 63;
108 |             let hi = u64::from(rest[8]);
109 |             x >> bit_start | hi << (64 - overshooting_bits) >> (64 - bits)
110 |         })
111 |     }
112 | }
113 | 
114 | #[derive(Clone, Copy)]
115 | struct Deserializer<'a, 'de> {
116 |     src: &'a Source<'de>,
117 |     layout: Option<&'de SchemaLayout>,
118 |     bit_offset: Offset,
119 |     storage_start: Offset,
120 | }
121 | 
122 | impl<'de> Deserializer<'_, 'de> {
123 |     fn field_deserializer(&self, i: i16) -> Self {
124 |         let (layout, offset_bits) = if let Some(field) = self.layout.and_then(|l| l.fields.get(i)) {
125 |             (
126 |                 self.src.schema.layouts.get(field.layout_id),
127 |                 field.offset_bits(),
128 |             )
129 |         } else {
130 |             (None, 0)
131 |         };
132 |         Self {
133 |             src: self.src,
134 |             layout,
135 |             bit_offset: self.bit_offset + Offset::from(offset_bits),
136 |             storage_start: self.storage_start,
137 |         }
138 |     }
139 | 
140 |     fn deserialize_field<T: de::Deserialize<'de>>(&self, i: i16) -> Result<T> {
141 |         de::Deserialize::deserialize(self.field_deserializer(i))
142 |     }
143 | }
144 | 
145 | impl<'de> de::Deserializer<'de> for Deserializer<'_, 'de> {
146 |     type Error = Error;
147 | 
148 |     fn is_human_readable(&self) -> bool {
149 |         false
150 |     }
151 | 
152 |     fn deserialize_any<V>(self, _visitor: V) -> Result<V::Value>
153 |     where
154 |         V: de::Visitor<'de>,
155 |     {
156 |         // Not used.
157 |         unimplemented!()
158 |     }
159 | 
160 |     fn deserialize_bool<V>(self, visitor: V) -> Result<V::Value>
161 |     where
162 |         V: de::Visitor<'de>,
163 |     {
164 |         let b = self.layout.is_some()
165 |             && self
166 |                 .src
167 |                 .load_bit(self.storage_start * 8 + self.bit_offset)?;
168 |         visitor.visit_bool(b)
169 |     }
170 | 
171 |     fn deserialize_u32<V>(self, visitor: V) -> Result<V::Value>
172 |     where
173 |         V: de::Visitor<'de>,
174 |     {
175 |         self.deserialize_u64(visitor)
176 |     }
177 | 
178 |     fn deserialize_u64<V>(self, visitor: V) -> Result<V::Value>
179 |     where
180 |         V: de::Visitor<'de>,
181 |     {
182 |         let Some(layout) = self.layout else {
183 |             return visitor.visit_u64(0);
184 |         };
185 |         if !layout.fields.is_empty() {
186 |             return Err(de::Error::invalid_type(
187 |                 de::Unexpected::Other("a schema layout with some fields"),
188 |                 &"an unsigned integer",
189 |             ));
190 |         }
191 |         let bits = layout.bits;
192 |         if !(0..=64).contains(&bits) {
193 |             return Err(de::Error::custom("too many bits for an unsigned int"));
194 |         }
195 |         visitor.visit_u64(
196 |             self.src
197 |                 .load_bits(self.storage_start * 8 + self.bit_offset, bits as u16)?,
198 |         )
199 |     }
200 | 
201 |     fn deserialize_byte_buf<V>(self, visitor: V) -> Result<V::Value>
202 |     where
203 |         V: de::Visitor<'de>,
204 |     {
205 |         self.deserialize_bytes(visitor)
206 |     }
207 | 
208 |     fn deserialize_bytes<V>(self, visitor: V) -> Result<V::Value>
209 |     where
210 |         V: de::Visitor<'de>,
211 |     {
212 |         let distance = self.deserialize_field::<u32>(1)?;
213 |         let len = self.deserialize_field::<u32>(2)?;
214 | 
215 |         let content = (|| {
216 |             let start = self.storage_start.checked_add(distance)?;
217 |             let end = start.checked_add(len)?;
218 |             self.src
219 |                 .bytes
220 |                 .get(usize::try_from(start).ok()?..usize::try_from(end).ok()?)
221 |         })()
222 |         .ok_or_else(|| <Error as de::Error>::custom("string offset or length overflow"))?;
223 |         visitor.visit_borrowed_bytes(content)
224 |     }
225 | 
226 |     fn deserialize_seq<V>(self, visitor: V) -> Result<V::Value>
227 |     where
228 |         V: de::Visitor<'de>,
229 |     {
230 |         let distance = self.deserialize_field::<u32>(1)?;
231 |         let len = self.deserialize_field::<u32>(2)?;
232 |         let elem_layout = self.layout.and_then(|l| {
233 |             let id = l.fields.get(3)?.layout_id;
234 |             Some(self.src.schema.layouts.get(id).expect("validated"))
235 |         });
236 |         visitor.visit_seq(CollectionDeserializer {
237 |             elem_de: Self {
238 |                 src: self.src,
239 |                 layout: elem_layout,
240 |                 bit_offset: 0,
241 |                 storage_start: self.storage_start + distance,
242 |             },
243 |             len,
244 |         })
245 |     }
246 | 
247 |     fn deserialize_map<V>(self, visitor: V) -> Result<V::Value>
248 |     where
249 |         V: de::Visitor<'de>,
250 |     {
251 |         let distance = self.deserialize_field::<u32>(1)?;
252 |         let len = self.deserialize_field::<u32>(2)?;
253 |         let elem_layout = self.layout.and_then(|l| {
254 |             let id = l.fields.get(3)?.layout_id;
255 |             Some(self.src.schema.layouts.get(id).expect("validated"))
256 |         });
257 |         visitor.visit_map(CollectionDeserializer {
258 |             elem_de: Self {
259 |                 src: self.src,
260 |                 layout: elem_layout,
261 |                 bit_offset: 0,
262 |                 storage_start: self.storage_start + distance,
263 |             },
264 |             len,
265 |         })
266 |     }
267 | 
268 |     fn deserialize_option<V>(self, visitor: V) -> Result<V::Value>
269 |     where
270 |         V: de::Visitor<'de>,
271 |     {
272 |         if !self.deserialize_field::<bool>(1)? {
273 |             return visitor.visit_none();
274 |         }
275 |         visitor.visit_some(self.field_deserializer(2))
276 |     }
277 | 
278 |     fn deserialize_struct<V>(
279 |         self,
280 |         _name: &'static str,
281 |         _fields: &'static [&'static str],
282 |         visitor: V,
283 |     ) -> Result<V::Value>
284 |     where
285 |         V: de::Visitor<'de>,
286 |     {
287 |         visitor.visit_map(StructDeserializer {
288 |             de: self,
289 |             field_id: 0,
290 |         })
291 |     }
292 | 
293 |     fn deserialize_ignored_any<V>(self, visitor: V) -> Result<V::Value>
294 |     where
295 |         V: de::Visitor<'de>,
296 |     {
297 |         visitor.visit_unit()
298 |     }
299 | 
300 |     forward_to_deserialize_any! {
301 |         i8 i16 i32 i64 i128 u8 u16 u128 f32 f64 char str string
302 |         unit unit_struct newtype_struct tuple
303 |         tuple_struct enum identifier
304 |     }
305 | }
306 | 
307 | struct StructDeserializer<'i, 'de> {
308 |     de: Deserializer<'i, 'de>,
309 |     field_id: usize,
310 | }
311 | 
312 | impl<'de> de::MapAccess<'de> for StructDeserializer<'_, 'de> {
313 |     type Error = Error;
314 | 
315 |     fn next_key_seed<K>(&mut self, seed: K) -> Result<Option<K::Value>>
316 |     where
317 |         K: de::DeserializeSeed<'de>,
318 |     {
319 |         let Some(layout) = self.de.layout else {
320 |             return Ok(None);
321 |         };
322 | 
323 |         let fields = &layout.fields.0;
324 |         while self.field_id < fields.len() {
325 |             if fields[self.field_id].is_some() {
326 |                 // Map 1.. to 0.. for serde.
327 |                 let serde_field_id = self.field_id as u64 - 1;
328 |                 return seed
329 |                     .deserialize(de::value::U64Deserializer::new(serde_field_id))
330 |                     .map(Some);
331 |             }
332 |             self.field_id += 1;
333 |         }
334 |         Ok(None)
335 |     }
336 | 
337 |     fn next_value_seed<V>(&mut self, seed: V) -> Result<V::Value>
338 |     where
339 |         V: de::DeserializeSeed<'de>,
340 |     {
341 |         self.field_id += 1;
342 |         seed.deserialize(self.de.field_deserializer(self.field_id as i16 - 1))
343 |     }
344 | }
345 | 
346 | struct CollectionDeserializer<'a, 'de> {
347 |     elem_de: Deserializer<'a, 'de>,
348 |     len: u32,
349 | }
350 | 
351 | impl<'de> de::SeqAccess<'de> for CollectionDeserializer<'_, 'de> {
352 |     type Error = Error;
353 | 
354 |     fn size_hint(&self) -> Option<usize> {
355 |         self.len.try_into().ok()
356 |     }
357 | 
358 |     fn next_element_seed<T>(&mut self, seed: T) -> Result<Option<T::Value>>
359 |     where
360 |         T: de::DeserializeSeed<'de>,
361 |     {
362 |         if self.len == 0 {
363 |             return Ok(None);
364 |         }
365 | 
366 |         let ret = seed.deserialize(self.elem_de);
367 |         self.len -= 1;
368 |         if let Some(layout) = self.elem_de.layout {
369 |             self.elem_de.bit_offset += layout.bits as Offset;
370 |         }
371 |         ret.map(Some)
372 |     }
373 | }
374 | 
375 | impl<'de> de::MapAccess<'de> for CollectionDeserializer<'_, 'de> {
376 |     type Error = Error;
377 | 
378 |     fn size_hint(&self) -> Option<usize> {
379 |         self.len.try_into().ok()
380 |     }
381 | 
382 |     fn next_key_seed<K>(&mut self, seed: K) -> Result<Option<K::Value>>
383 |     where
384 |         K: de::DeserializeSeed<'de>,
385 |     {
386 |         if self.len == 0 {
387 |             return Ok(None);
388 |         }
389 |         self.len -= 1;
390 | 
391 |         seed.deserialize(self.elem_de.field_deserializer(1))
392 |             .map(Some)
393 |     }
394 | 
395 |     fn next_value_seed<V>(&mut self, seed: V) -> Result<V::Value>
396 |     where
397 |         V: de::DeserializeSeed<'de>,
398 |     {
399 |         let ret = seed.deserialize(self.elem_de.field_deserializer(2));
400 |         if let Some(layout) = self.elem_de.layout {
401 |             self.elem_de.bit_offset += layout.bits as Offset;
402 |         }
403 |         ret
404 |     }
405 | }
406 | 


--------------------------------------------------------------------------------
/dwarfs/src/metadata/de_thrift.rs:
--------------------------------------------------------------------------------
  1 | //! Dwarven thrift, with fbthrift flavor.
  2 | //!
  3 | //! This implements just enough features to handle DwarFS schema type (Frozen 2 schema).
  4 | //! It is and will never be standard compliant.
  5 | //! Supported types: struct, map, string, bool, i16, i32, u32 (map/string length).
  6 | //!
  7 | //! Currently it will reject unsupported types thus is not future-proof. I'm not
  8 | //! expecting it to change in the near future and DwarFS, as an on disk format,
  9 | //! should not eagerly update its Frozen dependency.
 10 | //!
 11 | //! Frozen 2 schema: <https://github.com/facebook/fbthrift/blob/5a7214411bfb184c176c437f67c199d4fd50de02/thrift/lib/thrift/frozen.thrift>
 12 | //!
 13 | //! Thrift-compact spec: <https://github.com/apache/thrift/blob/master/doc/specs/thrift-compact-protocol.md>
 14 | //! NB. fbthrift has different handling of varints which seems to be not incompatible with Apache Thrift.
 15 | use serde::{de, forward_to_deserialize_any};
 16 | 
 17 | type Result<T, E = Error> = std::result::Result<T, E>;
 18 | type Error = de::value::Error;
 19 | 
 20 | pub(crate) fn deserialize_struct<T: de::DeserializeOwned>(input: &[u8]) -> Result<T> {
 21 |     let mut de = ValueDeserializer {
 22 |         rest: input,
 23 |         typ: Tag::Struct,
 24 |     };
 25 |     let v = T::deserialize(&mut de)?;
 26 |     if !de.rest.is_empty() {
 27 |         return Err(de::Error::custom(format_args!(
 28 |             "unexpected trailing bytes at {}",
 29 |             input.len() - de.rest.len(),
 30 |         )));
 31 |     }
 32 |     Ok(v)
 33 | }
 34 | 
 35 | #[derive(Debug, Clone, Copy, PartialEq, Eq)]
 36 | #[repr(u8)]
 37 | pub(crate) enum Tag {
 38 |     BoolTrue = 1,
 39 |     BoolFalse = 2,
 40 |     I16 = 4,
 41 |     I32 = 5,
 42 |     Binary = 8,
 43 |     Map = 11,
 44 |     Struct = 12,
 45 | 
 46 |     // Pseudo tags.
 47 |     UnknownBool = 0,
 48 |     Invalid = 15,
 49 | }
 50 | 
 51 | impl Tag {
 52 |     fn without_inline_bool(self) -> Self {
 53 |         if let Self::BoolTrue | Self::BoolFalse = self {
 54 |             Self::UnknownBool
 55 |         } else {
 56 |             self
 57 |         }
 58 |     }
 59 | }
 60 | 
 61 | impl TryFrom<u8> for Tag {
 62 |     type Error = Error;
 63 | 
 64 |     fn try_from(typ: u8) -> Result<Self> {
 65 |         Ok(match typ {
 66 |             1 => Tag::BoolTrue,
 67 |             2 => Tag::BoolFalse,
 68 |             // 3: i8
 69 |             4 => Tag::I16,
 70 |             5 => Tag::I32,
 71 |             // 6: i64
 72 |             // 7: double
 73 |             8 => Tag::Binary,
 74 |             // 9: list
 75 |             // 10: set
 76 |             11 => Tag::Map,
 77 |             12 => Tag::Struct,
 78 |             // 13: float
 79 |             _ => {
 80 |                 return Err(de::Error::custom(format_args!(
 81 |                     "invalid or unsupported type tag: {typ:#x}"
 82 |                 )));
 83 |             }
 84 |         })
 85 |     }
 86 | }
 87 | 
 88 | struct ValueDeserializer<'de> {
 89 |     rest: &'de [u8],
 90 |     typ: Tag,
 91 | }
 92 | 
 93 | impl<'de> ValueDeserializer<'de> {
 94 |     fn eat_byte(&mut self) -> Result<u8> {
 95 |         let (&fst, rest) = self
 96 |             .rest
 97 |             .split_first()
 98 |             .ok_or_else(|| de::Error::custom("unexpected EOF"))?;
 99 |         self.rest = rest;
100 |         Ok(fst)
101 |     }
102 | 
103 |     fn eat_varint(&mut self) -> Result<u32> {
104 |         let mut x = 0u32;
105 |         for i in 0..5 {
106 |             let b = self.eat_byte()?;
107 |             x += u32::from(b & 0x7F) << (i * 7);
108 |             if b & 0x80 == 0 {
109 |                 return Ok(x);
110 |             }
111 |         }
112 |         Err(de::Error::custom("encoded varint is too long"))
113 |     }
114 | 
115 |     fn eat_zigzag(&mut self) -> Result<i32> {
116 |         let x = self.eat_varint()?;
117 |         Ok((x >> 1) as i32 ^ -(x as i32 & 1))
118 |     }
119 | }
120 | 
121 | impl<'de> de::Deserializer<'de> for &mut ValueDeserializer<'de> {
122 |     type Error = Error;
123 | 
124 |     fn is_human_readable(&self) -> bool {
125 |         false
126 |     }
127 | 
128 |     fn deserialize_any<V>(self, visitor: V) -> Result<V::Value>
129 |     where
130 |         V: de::Visitor<'de>,
131 |     {
132 |         match self.typ {
133 |             Tag::UnknownBool => visitor.visit_bool(match self.eat_byte()? {
134 |                 1 => true,
135 |                 2 => false,
136 |                 x => {
137 |                     return Err(de::Error::custom(format_args!(
138 |                         "invalid value for bool: {x:#x}"
139 |                     )));
140 |                 }
141 |             }),
142 |             Tag::BoolTrue => visitor.visit_bool(true),
143 |             Tag::BoolFalse => visitor.visit_bool(false),
144 |             Tag::I16 | Tag::I32 => visitor.visit_i32(self.eat_zigzag()?),
145 |             Tag::Binary => {
146 |                 let len = self.eat_varint()?;
147 |                 // If overflows, it will fail on next slicing anyway.
148 |                 let len = usize::try_from(len).unwrap_or(usize::MAX);
149 |                 let (data, rest) = self
150 |                     .rest
151 |                     .split_at_checked(len)
152 |                     .ok_or_else(|| de::Error::custom("input data is too short"))?;
153 |                 self.rest = rest;
154 |                 visitor.visit_borrowed_bytes(data)
155 |             }
156 |             Tag::Map => {
157 |                 let len = self.eat_varint()?;
158 |                 let (ktype, vtype) = if len == 0 {
159 |                     (Tag::Invalid, Tag::Invalid)
160 |                 } else {
161 |                     let typ = self.eat_byte()?;
162 |                     let ktype = Tag::try_from(typ >> 4)?.without_inline_bool();
163 |                     let vtype = Tag::try_from(typ & 0xF)?.without_inline_bool();
164 |                     (ktype, vtype)
165 |                 };
166 |                 visitor.visit_map(MapDeserializer {
167 |                     de: self,
168 |                     len,
169 |                     ktype,
170 |                     vtype,
171 |                 })
172 |             }
173 |             Tag::Struct => visitor.visit_map(StructDeserializer {
174 |                 de: self,
175 |                 field_id: 0,
176 |                 value_type: Tag::Invalid,
177 |             }),
178 | 
179 |             Tag::Invalid => unreachable!(),
180 |         }
181 |     }
182 | 
183 |     forward_to_deserialize_any! {
184 |         bool i8 i16 i32 i64 i128 u8 u16 u32 u64 u128 f32 f64 char str string
185 |         bytes byte_buf option unit unit_struct newtype_struct seq tuple
186 |         tuple_struct map struct enum identifier ignored_any
187 |     }
188 | }
189 | 
190 | struct StructDeserializer<'a, 'de> {
191 |     de: &'a mut ValueDeserializer<'de>,
192 |     field_id: i16,
193 |     value_type: Tag,
194 | }
195 | 
196 | impl<'de> de::MapAccess<'de> for StructDeserializer<'_, 'de> {
197 |     type Error = Error;
198 | 
199 |     fn next_key_seed<K>(&mut self, seed: K) -> Result<Option<K::Value>>
200 |     where
201 |         K: de::DeserializeSeed<'de>,
202 |     {
203 |         let b = self.de.eat_byte()?;
204 |         if b == 0 {
205 |             return Ok(None);
206 |         }
207 | 
208 |         let id_delta = i16::from(b >> 4);
209 |         self.field_id = if id_delta != 0 {
210 |             self.field_id.checked_add(id_delta)
211 |         } else {
212 |             i16::try_from(self.de.eat_zigzag()?).ok()
213 |         }
214 |         .ok_or_else(|| de::Error::custom("field id overflow"))?;
215 | 
216 |         self.value_type = Tag::try_from(b & 0xF)?;
217 | 
218 |         // Map range 1.. to 0.. for serde.
219 |         let field_id = (self.field_id - 1) as u64;
220 |         seed.deserialize(de::value::U64Deserializer::new(field_id))
221 |             .map(Some)
222 |     }
223 | 
224 |     fn next_value_seed<V>(&mut self, seed: V) -> Result<V::Value>
225 |     where
226 |         V: de::DeserializeSeed<'de>,
227 |     {
228 |         let prev_typ = std::mem::replace(&mut self.de.typ, self.value_type);
229 |         let v = seed.deserialize(&mut *self.de);
230 |         self.de.typ = prev_typ;
231 |         v
232 |     }
233 | }
234 | 
235 | struct MapDeserializer<'a, 'de> {
236 |     de: &'a mut ValueDeserializer<'de>,
237 |     len: u32,
238 |     ktype: Tag,
239 |     vtype: Tag,
240 | }
241 | 
242 | impl<'de> de::MapAccess<'de> for MapDeserializer<'_, 'de> {
243 |     type Error = Error;
244 | 
245 |     fn size_hint(&self) -> Option<usize> {
246 |         usize::try_from(self.len).ok()
247 |     }
248 | 
249 |     fn next_key_seed<K>(&mut self, seed: K) -> Result<Option<K::Value>>
250 |     where
251 |         K: de::DeserializeSeed<'de>,
252 |     {
253 |         if self.len == 0 {
254 |             return Ok(None);
255 |         }
256 |         self.len -= 1;
257 | 
258 |         let prev_typ = std::mem::replace(&mut self.de.typ, self.ktype);
259 |         let k = seed.deserialize(&mut *self.de);
260 |         self.de.typ = prev_typ;
261 |         k.map(Some)
262 |     }
263 | 
264 |     fn next_value_seed<V>(&mut self, seed: V) -> Result<V::Value>
265 |     where
266 |         V: de::DeserializeSeed<'de>,
267 |     {
268 |         let prev_typ = std::mem::replace(&mut self.de.typ, self.vtype);
269 |         let v = seed.deserialize(&mut *self.de);
270 |         self.de.typ = prev_typ;
271 |         v
272 |     }
273 | }
274 | 


--------------------------------------------------------------------------------
/dwarfs/src/metadata/ser_thrift.rs:
--------------------------------------------------------------------------------
  1 | //! The minimal serialization support for minithrift,
  2 | //! specialized for DwarFS schema type only.
  3 | //!
  4 | //! See [`super::de_thrift`] for more details.
  5 | use serde::{de, ser};
  6 | 
  7 | use super::de_thrift::Tag;
  8 | 
  9 | type Result<T, E = Error> = std::result::Result<T, E>;
 10 | type Error = de::value::Error;
 11 | 
 12 | pub(crate) fn serialize_struct<T: ser::Serialize>(input: &T) -> Result<Vec<u8>> {
 13 |     // TODO: Set a good default capacity here.
 14 |     let mut out = Vec::new();
 15 |     input.serialize(ValueSerializer {
 16 |         w: &mut out,
 17 |         inline_bool: false,
 18 |     })?;
 19 |     Ok(out)
 20 | }
 21 | 
 22 | pub(crate) struct ValueSerializer<'w> {
 23 |     w: &'w mut Vec<u8>,
 24 |     inline_bool: bool,
 25 | }
 26 | 
 27 | impl ValueSerializer<'_> {
 28 |     fn write_varint(&mut self, mut v: u32) {
 29 |         loop {
 30 |             let more = v >> 7;
 31 |             let has_more = more > 0;
 32 |             self.w.push((v as u8 & 0x7F) | ((has_more as u8) << 7));
 33 |             v = more;
 34 |             if !has_more {
 35 |                 break;
 36 |             }
 37 |         }
 38 |     }
 39 | 
 40 |     fn write_zigzag(&mut self, v: i32) {
 41 |         self.write_varint((v << 1 ^ (v >> 31)) as u32);
 42 |     }
 43 | }
 44 | 
 45 | impl<'w> ser::Serializer for ValueSerializer<'w> {
 46 |     type Ok = Tag;
 47 |     type Error = Error;
 48 |     type SerializeSeq = ser::Impossible<Self::Ok, Self::Error>;
 49 |     type SerializeTuple = ser::Impossible<Self::Ok, Self::Error>;
 50 |     type SerializeTupleStruct = ser::Impossible<Self::Ok, Self::Error>;
 51 |     type SerializeTupleVariant = ser::Impossible<Self::Ok, Self::Error>;
 52 |     type SerializeMap = MapSerializer<'w>;
 53 |     type SerializeStruct = StructSerializer<'w>;
 54 |     type SerializeStructVariant = ser::Impossible<Self::Ok, Self::Error>;
 55 | 
 56 |     fn serialize_bool(self, v: bool) -> Result<Self::Ok> {
 57 |         let tag = if v { Tag::BoolTrue } else { Tag::BoolFalse };
 58 |         if self.inline_bool {
 59 |             Ok(tag)
 60 |         } else {
 61 |             self.w.push(tag as u8);
 62 |             // TODO: Check behavior of fbthrift on this.
 63 |             Ok(Tag::BoolTrue)
 64 |         }
 65 |     }
 66 | 
 67 |     fn serialize_i16(mut self, v: i16) -> Result<Self::Ok> {
 68 |         self.write_zigzag(v.into());
 69 |         Ok(Tag::I16)
 70 |     }
 71 | 
 72 |     fn serialize_i32(mut self, v: i32) -> Result<Self::Ok> {
 73 |         self.write_zigzag(v);
 74 |         Ok(Tag::I32)
 75 |     }
 76 | 
 77 |     fn serialize_str(mut self, s: &str) -> Result<Self::Ok> {
 78 |         let len = u32::try_from(s.len())
 79 |             .map_err(|_| ser::Error::custom("string length exceeds u32 range"))?;
 80 |         self.write_varint(len);
 81 |         self.w.extend_from_slice(s.as_bytes());
 82 |         Ok(Tag::Binary)
 83 |     }
 84 | 
 85 |     fn serialize_struct(self, _name: &'static str, _len: usize) -> Result<Self::SerializeStruct> {
 86 |         Ok(StructSerializer {
 87 |             w: self.w,
 88 |             field_id_diff_tag: 0x10,
 89 |         })
 90 |     }
 91 | 
 92 |     fn serialize_map(mut self, len: Option<usize>) -> Result<Self::SerializeMap> {
 93 |         let len = len
 94 |             .and_then(|len| u32::try_from(len).ok())
 95 |             .expect("map must have known u32 size");
 96 |         self.write_varint(len);
 97 |         Ok(MapSerializer {
 98 |             type_pos: self.w.len(),
 99 |             w: self.w,
100 |             ktype: None,
101 |             vtype: None,
102 |         })
103 |     }
104 | 
105 |     //// Not needed ////
106 | 
107 |     fn serialize_i8(self, _: i8) -> Result<Self::Ok> {
108 |         unimplemented!()
109 |     }
110 | 
111 |     fn serialize_i64(self, _: i64) -> Result<Self::Ok> {
112 |         unimplemented!()
113 |     }
114 | 
115 |     fn serialize_u8(self, _: u8) -> Result<Self::Ok> {
116 |         unimplemented!()
117 |     }
118 | 
119 |     fn serialize_u16(self, _: u16) -> Result<Self::Ok> {
120 |         unimplemented!()
121 |     }
122 | 
123 |     fn serialize_u32(self, _: u32) -> Result<Self::Ok> {
124 |         unimplemented!()
125 |     }
126 | 
127 |     fn serialize_u64(self, _: u64) -> Result<Self::Ok> {
128 |         unimplemented!()
129 |     }
130 | 
131 |     fn serialize_f32(self, _: f32) -> Result<Self::Ok> {
132 |         unimplemented!()
133 |     }
134 | 
135 |     fn serialize_f64(self, _: f64) -> Result<Self::Ok> {
136 |         unimplemented!()
137 |     }
138 | 
139 |     fn serialize_char(self, _: char) -> Result<Self::Ok> {
140 |         unimplemented!()
141 |     }
142 | 
143 |     fn serialize_bytes(self, _: &[u8]) -> Result<Self::Ok> {
144 |         unimplemented!()
145 |     }
146 | 
147 |     fn serialize_none(self) -> Result<Self::Ok> {
148 |         unimplemented!()
149 |     }
150 | 
151 |     fn serialize_some<T>(self, _value: &T) -> Result<Self::Ok>
152 |     where
153 |         T: ?Sized + ser::Serialize,
154 |     {
155 |         unimplemented!()
156 |     }
157 | 
158 |     fn serialize_unit(self) -> Result<Self::Ok> {
159 |         unimplemented!()
160 |     }
161 | 
162 |     fn serialize_unit_struct(self, _name: &'static str) -> Result<Self::Ok> {
163 |         unimplemented!()
164 |     }
165 | 
166 |     fn serialize_unit_variant(
167 |         self,
168 |         _name: &'static str,
169 |         _variant_index: u32,
170 |         _variant: &'static str,
171 |     ) -> Result<Self::Ok> {
172 |         unimplemented!()
173 |     }
174 | 
175 |     fn serialize_newtype_struct<T>(self, _name: &'static str, _value: &T) -> Result<Self::Ok>
176 |     where
177 |         T: ?Sized + ser::Serialize,
178 |     {
179 |         unimplemented!()
180 |     }
181 | 
182 |     fn serialize_newtype_variant<T>(
183 |         self,
184 |         _name: &'static str,
185 |         _variant_index: u32,
186 |         _variant: &'static str,
187 |         _value: &T,
188 |     ) -> Result<Self::Ok>
189 |     where
190 |         T: ?Sized + ser::Serialize,
191 |     {
192 |         unimplemented!()
193 |     }
194 | 
195 |     fn serialize_seq(self, _len: Option<usize>) -> Result<Self::SerializeSeq> {
196 |         unimplemented!()
197 |     }
198 | 
199 |     fn serialize_tuple(self, _len: usize) -> Result<Self::SerializeTuple> {
200 |         unimplemented!()
201 |     }
202 | 
203 |     fn serialize_tuple_struct(
204 |         self,
205 |         _name: &'static str,
206 |         _len: usize,
207 |     ) -> Result<Self::SerializeTupleStruct> {
208 |         unimplemented!()
209 |     }
210 | 
211 |     fn serialize_tuple_variant(
212 |         self,
213 |         _name: &'static str,
214 |         _variant_index: u32,
215 |         _variant: &'static str,
216 |         _len: usize,
217 |     ) -> Result<Self::SerializeTupleVariant> {
218 |         unimplemented!()
219 |     }
220 | 
221 |     fn serialize_struct_variant(
222 |         self,
223 |         _name: &'static str,
224 |         _variant_index: u32,
225 |         _variant: &'static str,
226 |         _len: usize,
227 |     ) -> Result<Self::SerializeStructVariant> {
228 |         unimplemented!()
229 |     }
230 | }
231 | 
232 | pub(crate) struct StructSerializer<'w> {
233 |     w: &'w mut Vec<u8>,
234 |     /// 0bxxxx0000 where xxxx is the `field_id` delta from the previous field.
235 |     field_id_diff_tag: u8,
236 | }
237 | 
238 | impl ser::SerializeStruct for StructSerializer<'_> {
239 |     type Ok = Tag;
240 |     type Error = Error;
241 | 
242 |     fn skip_field(&mut self, _key: &'static str) -> Result<()> {
243 |         self.field_id_diff_tag = self
244 |             .field_id_diff_tag
245 |             .checked_add(0x10)
246 |             .expect("field count overflow");
247 |         Ok(())
248 |     }
249 | 
250 |     fn serialize_field<T>(&mut self, _key: &'static str, value: &T) -> Result<()>
251 |     where
252 |         T: ?Sized + ser::Serialize,
253 |     {
254 |         // Field id & type.
255 |         let pos = self.w.len();
256 |         self.w.push(0);
257 | 
258 |         let tag = value.serialize(ValueSerializer {
259 |             w: self.w,
260 |             inline_bool: true,
261 |         })?;
262 |         self.w[pos] = self.field_id_diff_tag | tag as u8;
263 |         self.field_id_diff_tag = 0x10;
264 |         Ok(())
265 |     }
266 | 
267 |     fn end(self) -> Result<Self::Ok> {
268 |         self.w.push(0);
269 |         Ok(Tag::Struct)
270 |     }
271 | }
272 | 
273 | pub(crate) struct MapSerializer<'w> {
274 |     w: &'w mut Vec<u8>,
275 |     type_pos: usize,
276 |     ktype: Option<Tag>,
277 |     vtype: Option<Tag>,
278 | }
279 | 
280 | impl ser::SerializeMap for MapSerializer<'_> {
281 |     type Ok = Tag;
282 |     type Error = Error;
283 | 
284 |     fn serialize_key<T>(&mut self, key: &T) -> Result<()>
285 |     where
286 |         T: ?Sized + ser::Serialize,
287 |     {
288 |         if self.ktype.is_none() {
289 |             // Reserve a type byte.
290 |             self.w.push(0);
291 |         }
292 | 
293 |         let tag = key.serialize(ValueSerializer {
294 |             w: self.w,
295 |             inline_bool: false,
296 |         })?;
297 |         let prev = *self.ktype.get_or_insert(tag);
298 |         debug_assert_eq!(prev, tag);
299 |         Ok(())
300 |     }
301 | 
302 |     fn serialize_value<T>(&mut self, value: &T) -> Result<()>
303 |     where
304 |         T: ?Sized + ser::Serialize,
305 |     {
306 |         let tag = value.serialize(ValueSerializer {
307 |             w: self.w,
308 |             inline_bool: false,
309 |         })?;
310 |         let prev = *self.vtype.get_or_insert(tag);
311 |         debug_assert_eq!(prev, tag);
312 |         Ok(())
313 |     }
314 | 
315 |     fn end(self) -> Result<Self::Ok> {
316 |         // This condition is false if the map contains zero elements.
317 |         if self.type_pos < self.w.len() {
318 |             self.w[self.type_pos] = (self.ktype.unwrap() as u8) << 4 | self.vtype.unwrap() as u8;
319 |         }
320 |         Ok(Tag::Map)
321 |     }
322 | }
323 | 


--------------------------------------------------------------------------------
/dwarfs/src/metadata/tests.rs:
--------------------------------------------------------------------------------
 1 | use super::*;
 2 | 
 3 | #[test]
 4 | #[cfg(feature = "serialize")]
 5 | fn serde_schema() {
 6 |     let schema = Schema {
 7 |         relax_type_checks: true,
 8 |         layouts: DenseMap(vec![Some(SchemaLayout {
 9 |             size: 1,
10 |             bits: 0,
11 |             fields: DenseMap::default(),
12 |             type_name: String::new(),
13 |         })]),
14 |         root_layout: 0,
15 |         file_version: 1,
16 |     };
17 |     let bytes = schema.to_bytes().unwrap();
18 | 
19 |     let expect = [
20 |         // struct
21 |         0x11, // field `relax_type_checks` (id = 1), value = true
22 |         0x1b, // field `layouts` (id = 0 + 1 = 1), type = 0xb map
23 |         0x01, //   map size = 1
24 |         0x4c, //   key = i16, value = struct
25 |         0x00, //     key i16 = 0 = zigzag(0)
26 |         /**/  //     value struct
27 |         0x15, //       field `size` (id = 0 + 1 = 1)
28 |         0x02, //         2 = zigzag(1)
29 |         0x2b, //       field `fields` (id = 1 + 2 = 3), type = 0xb map
30 |         0x00, //         len = 0
31 |         0x18, //       field `type_name` (id = 3 + 1 = 4), type = 0x8 binary
32 |         0x00, //         len = 0
33 |         0x00, //     struct end
34 |         0x25, // field `field_version` (id = 1 + 2 = 3), type = 0x5 i32
35 |         0x02, //   2 = zigzag(1)
36 |         0x00, // struct end
37 |     ];
38 |     assert_eq!(bytes, expect);
39 | 
40 |     let got = Schema::parse(&bytes).unwrap();
41 |     assert_eq!(got, schema);
42 | }
43 | 
44 | #[test]
45 | fn de_frozen() {
46 |     #[derive(Debug, PartialEq, Eq, Deserialize)]
47 |     struct Pair {
48 |         a: u32,
49 |         #[serde(default)]
50 |         b: u32,
51 |         c: u32,
52 |     }
53 | 
54 |     let schema = Schema {
55 |         relax_type_checks: true,
56 |         layouts: DenseMap(vec![
57 |             None,
58 |             Some(SchemaLayout {
59 |                 size: 0,
60 |                 bits: 8,
61 |                 fields: DenseMap(vec![
62 |                     None,
63 |                     Some(SchemaField {
64 |                         layout_id: 2,
65 |                         offset: 0,
66 |                     }),
67 |                     None,
68 |                     Some(SchemaField {
69 |                         layout_id: 2,
70 |                         offset: -4,
71 |                     }),
72 |                 ]),
73 |                 type_name: String::new(),
74 |             }),
75 |             Some(SchemaLayout {
76 |                 size: 0,
77 |                 bits: 4,
78 |                 fields: DenseMap::default(),
79 |                 type_name: String::new(),
80 |             }),
81 |         ]),
82 |         root_layout: 1,
83 |         file_version: 1,
84 |     };
85 | 
86 |     let de = de_frozen::deserialize::<Pair>(&schema, b"\x42\0\0\0\0\0\0\0").unwrap();
87 |     assert_eq!(
88 |         de,
89 |         Pair {
90 |             a: 0x2,
91 |             b: 0,
92 |             c: 0x4
93 |         }
94 |     );
95 | }
96 | 


--------------------------------------------------------------------------------
/flake.lock:
--------------------------------------------------------------------------------
 1 | {
 2 |   "nodes": {
 3 |     "nixpkgs": {
 4 |       "locked": {
 5 |         "lastModified": 1748186667,
 6 |         "narHash": "sha256-UQubDNIQ/Z42R8tPCIpY+BOhlxO8t8ZojwC9o2FW3c8=",
 7 |         "owner": "NixOS",
 8 |         "repo": "nixpkgs",
 9 |         "rev": "bdac72d387dca7f836f6ef1fe547755fb0e9df61",
10 |         "type": "github"
11 |       },
12 |       "original": {
13 |         "owner": "NixOS",
14 |         "ref": "nixpkgs-unstable",
15 |         "repo": "nixpkgs",
16 |         "type": "github"
17 |       }
18 |     },
19 |     "root": {
20 |       "inputs": {
21 |         "nixpkgs": "nixpkgs"
22 |       }
23 |     }
24 |   },
25 |   "root": "root",
26 |   "version": 7
27 | }
28 | 


--------------------------------------------------------------------------------
/flake.nix:
--------------------------------------------------------------------------------
 1 | {
 2 |   inputs.nixpkgs.url = "github:NixOS/nixpkgs/nixpkgs-unstable";
 3 | 
 4 |   outputs =
 5 |     { self, nixpkgs }:
 6 |     let
 7 |       inherit (nixpkgs) lib;
 8 |       eachSystems =
 9 |         f: lib.genAttrs lib.systems.flakeExposed (system: f system nixpkgs.legacyPackages.${system});
10 |     in
11 |     {
12 |       devShells = eachSystems (
13 |         system: pkgs: rec {
14 |           without-rust = pkgs.mkShell {
15 |             nativeBuildInputs = [
16 |               pkgs.dwarfs
17 |               pkgs.fakeroot
18 |             ];
19 |           };
20 |         }
21 |       );
22 |     };
23 | }
24 | 


--------------------------------------------------------------------------------
/typos.toml:
--------------------------------------------------------------------------------
1 | [default.extend-words]
2 | # A workaround for keyword "type".
3 | typ = "typ"


--------------------------------------------------------------------------------