├── .github └── workflows │ └── ci.yaml ├── .gitignore ├── Cargo.lock ├── Cargo.toml ├── LICENSE ├── Makefile ├── README.md ├── doc ├── chunking.md ├── index.md └── metadata.md ├── docs ├── .gitignore ├── _config.yml ├── _posts │ └── 2023-10-30-Linux-kernel-setup.md └── index.md ├── exe ├── .gitignore ├── Cargo.toml ├── src │ └── main.rs └── tests │ ├── cleanup_verity_device.sh │ ├── extract.rs │ ├── helpers │ └── mod.rs │ ├── setup_verity_device.sh │ ├── verity.rs │ └── verity_setup │ └── mod.rs ├── puzzlefs-lib ├── Cargo.toml ├── build.rs └── src │ ├── builder.rs │ ├── builder │ ├── filesystem.rs │ └── test │ │ └── test-1 │ │ └── SekienAkashita.jpg │ ├── common.rs │ ├── compression.rs │ ├── compression │ ├── noop.rs │ └── zstd_seekable_wrapper.rs │ ├── extractor.rs │ ├── format.rs │ ├── format │ ├── error.rs │ ├── metadata.capnp │ └── types.rs │ ├── fsverity_helpers.rs │ ├── lib.rs │ ├── oci.rs │ ├── oci │ └── media_types.rs │ ├── reader.rs │ └── reader │ ├── fuse.rs │ ├── puzzlefs.rs │ └── walk.rs └── rust-toolchain.toml /.github/workflows/ci.yaml: -------------------------------------------------------------------------------- 1 | name: ci 2 | 3 | on: 4 | push: 5 | pull_request: 6 | schedule: 7 | - cron: '0 0 * * 0' # weekly 8 | 9 | jobs: 10 | build: 11 | runs-on: ubuntu-latest 12 | steps: 13 | - uses: actions/checkout@v2 14 | - uses: actions-rs/toolchain@v1 15 | with: 16 | toolchain: nightly 17 | components: clippy, rustfmt 18 | - name: install dependencies 19 | run: | 20 | sudo apt-get install skopeo umoci capnproto 21 | - run: make lint check 22 | - name: publish 23 | if: startsWith(github.ref, 'refs/tags/') && github.event_name == 'push' 24 | run: | 25 | cargo publish -p puzzlefs-lib --token ${CRATES_TOKEN} 26 | cargo publish -p puzzlefs --token ${CRATES_TOKEN} 27 | env: 28 | CRATES_TOKEN: ${{ secrets.CRATES_TOKEN }} 29 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | target 2 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [workspace] 2 | 3 | members = [ 4 | "puzzlefs-lib", 5 | "exe", 6 | ] 7 | 8 | resolver = "2" 9 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | SRC=$(shell find . -name \*.rs | grep -v "^./target") 2 | PREFIX?=/usr/local 3 | ROOT_SBINDIR?=$(PREFIX)/sbin 4 | INSTALL=install 5 | 6 | .PHONY: release 7 | release: 8 | cargo build --release 9 | 10 | .PHONY: debug 11 | debug: 12 | cargo build 13 | 14 | .PHONY: check 15 | check: 16 | RUST_BACKTRACE=1 cargo test -- --nocapture 17 | 18 | .PHONY: lint 19 | lint: $(SRC) 20 | rustfmt --check $(SRC) 21 | cargo clippy --all-targets --all-features -- -D warnings -D rust-2018-idioms -D rust-2021-compatibility -A clippy::upper-case-acronyms 22 | 23 | .PHONY: fmt 24 | fmt: 25 | rustfmt --emit files $(SRC) 26 | 27 | .PHONY: clean 28 | clean: 29 | -cargo clean 30 | 31 | install: 32 | $(INSTALL) -m0755 -D target/release/puzzlefs -t $(ROOT_SBINDIR) 33 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # PuzzleFS [![Build Status](https://github.com/project-machine/puzzlefs/workflows/ci/badge.svg?branch=master)](https://github.com/project-machine/puzzlefs/actions) [![Crates.io][crates-exe-badge]][crates-exe-url] 2 | 3 | [crates-exe-badge]: https://img.shields.io/crates/v/puzzlefs.svg 4 | [crates-exe-url]: https://crates.io/crates/puzzlefs 5 | 6 | PuzzleFS is a next-generation container filesystem. 7 | 8 | ## Design Goals 9 | 10 | * Do computation when we want to, i.e.: 11 | * Image building should be fast 12 | * Image mounting/reading should be fast 13 | * Optional "canonicalization" step in the middle 14 | * No full-tree walk required 15 | * mtree style walks of filesystems are not necessary with clever use of 16 | overlay 17 | * casync style generate-a-tar-then-diff is more for general purpose use 18 | where you don't want to have a special filesystem setup 19 | * Be simple enough to decode in the kernel 20 | * A primary motivator for our working on this at Cisco is direct-mount 21 | support 22 | 23 | ## Abstract 24 | Puzzlefs is a container filesystem designed to address the limitations of the 25 | existing OCI format. The main goals of the project are reduced duplication, 26 | reproducible image builds, direct mounting support and memory safety 27 | guarantees, some inspired by the 28 | [OCIv2](https://hackmd.io/@cyphar/ociv2-brainstorm) design document. 29 | 30 | Reduced duplication is achieved using the content defined chunking algorithm 31 | FastCDC. This implementation allows chunks to be shared among layers. Building 32 | a new layer starting from an existing one allows reusing most of the chunks. 33 | 34 | Another goal of the project is reproducible image builds, which is achieved by 35 | defining a canonical representation of the image format. 36 | 37 | Direct mounting support is a key feature of puzzlefs and, together with 38 | fs-verity, it provides data integrity. Currently, puzzlefs is implemented as a 39 | userspace filesystem (FUSE). A read-only kernel filesystem driver is underway. 40 | 41 | Lastly, memory safety is critical to puzzlefs, leading to the decision to 42 | implement it in Rust. Another goal is to share the same code between user space 43 | and kernel space in order to provide one secure implementation. 44 | 45 | ## OCIv2 Design doc 46 | 47 | https://hackmd.io/@cyphar/ociv2-brainstorm 48 | 49 | For the most part, I think this addresses things there except for two: 50 | 51 | * Explicit Minimal Metadata: this is mostly unaddressed because I didn't think 52 | about it very hard; there's no reason we couldn't just drop e.g. block 53 | devices from the spec, or at least add a note about discouraging their use. 54 | Perhaps we should make mtimes and such optional? But then canonicalization 55 | would be harder. Maybe this should be specified at image build time, sort of 56 | like the chunking algorithm is in our design. 57 | 58 | * Lazy fetch support: this seems directly at odds with the "direct mount" 59 | support at least if the direct mount code is to live in the kernel; we 60 | probably don't want to implement lazy fetch directly in the kernel, because 61 | it involves the network and lots of other stuff. However, this would be 62 | relatively easy to do using fuse, which suggests that perhaps we should 63 | choose a good language (e.g. rust :) for the implementation so that we could 64 | use the same code in the kernel and userspace, thus easily supporting this 65 | one. 66 | 67 | ## Getting started 68 | ### Build dependencies 69 | Puzzlefs is written in rust, which you can download from https://www.rust-lang.org/tools/install. 70 | It requires a [nightly toolchain](https://rust-lang.github.io/rustup/concepts/channels.html#working-with-nightly-rust) which you can add with `rustup toolchain install nightly`. 71 | 72 | The [capnp tool](https://capnproto.org/install.html) is required for 73 | autogenerating rust code from the capnproto schema language. This is done at 74 | build time using the [capnpc crate](https://docs.rs/capnpc/latest/capnpc/). 75 | 76 | ### How to build 77 | Run `make` (or `cargo build`) for the debug build and `make release` (`cargo build --release`) for the release build. The 78 | resulting binaries are in `target/debug/puzzlefs` and 79 | `target/release/puzzlefs`, respectively. 80 | 81 | ### Running tests 82 | To run the tests, run `make check`. 83 | 84 | The tests require 85 | [skopeo](https://github.com/containers/skopeo/blob/main/install.md) and 86 | [umoci](https://umo.ci/) to be installed. It also requires root to run the 87 | `test_fs_verity` test. 88 | 89 | ### Building a puzzlefs image 90 | To build a puzzlefs image, you need to specify a directory with the root 91 | filesystem you want included in your image. For example: 92 | ``` 93 | $ tree /tmp/example-rootfs 94 | /tmp/example-rootfs 95 | ├── algorithms 96 | │   └── binary-search.txt 97 | └── lorem_ipsum.txt 98 | 99 | 2 directories, 2 files 100 | ``` 101 | 102 | Then run: 103 | ``` 104 | $ cargo run --release -- build /tmp/example-rootfs /tmp/puzzlefs-image:puzzlefs_example 105 | puzzlefs image manifest digest: 9ac9abc098870c55cc61431dae8635806273d8f61274d34bec062560e79dc2f5 106 | ``` 107 | This builds a puzzlefs image with the above root filesystem in `/tmp/puzzlefs-image`, with the tag `puzzlefs_example`. 108 | It also outputs the image's manifest digest, which is useful for verifying the integrity of the image using [fs-verity](https://www.kernel.org/doc/html/next/filesystems/fsverity.html). 109 | 110 | For additional build options, run `puzzlefs build -h`. 111 | 112 | ### Mounting a puzzlefs image 113 | To mount the above puzzlefs image, first we need to create a mountpoint: 114 | ``` 115 | mkdir /tmp/mounted-image 116 | ``` 117 | Then run `puzzlefs mount` with the location of the puzzlefs image, the image tag and the mountpoint: 118 | ``` 119 | $ cargo run --release -- mount /tmp/puzzlefs-image:puzzlefs_example /tmp/mounted-image 120 | ``` 121 | 122 | If everything was successful, you will see a `fuse` entry in the output of `mount`: 123 | ``` 124 | $ mount 125 | ... 126 | /dev/fuse on /tmp/mounted-image type fuse (rw,nosuid,nodev,relatime,user_id=1000,group_id=1000) 127 | ``` 128 | 129 | and the following message in the journal: 130 | ``` 131 | $ journalctl --since "2 min ago" | grep puzzlefs 132 | Aug 14 10:30:27 archlinux-cisco puzzlefs[55544]: Mounting /tmp/mounted-image 133 | ``` 134 | 135 | The mountpoint also contains the rootfs: 136 | ``` 137 | $ tree /tmp/mounted-image 138 | /tmp/mounted-image 139 | ├── algorithms 140 | │   └── binary-search.txt 141 | └── lorem_ipsum.txt 142 | 143 | 2 directories, 2 files 144 | ``` 145 | 146 | For additional mount options, run `cargo run -- mount -h`. 147 | 148 | ### Mounting with fs-verity enabled 149 | If you want to mount the filesystem with `fs-verity` authenticity protection, first enable `fs-verity` by running: 150 | ``` 151 | $ cargo run --release -- enable-fs-verity /tmp/puzzlefs-image:puzzlefs_example 9ac9abc098870c55cc61431dae8635806273d8f61274d34bec062560e79dc2f5 152 | ``` 153 | This makes the data and metadata files readonly. Any reads of corrupted data will fail. 154 | 155 | Then run mount with the `--digest` option: 156 | ``` 157 | $ cargo run --release -- mount --digest 9ac9abc098870c55cc61431dae8635806273d8f61274d34bec062560e79dc2f5 /tmp/puzzlefs-image puzzlefs_example /tmp/mounted-image 158 | ``` 159 | PuzzleFS now ensures that each file it opens has fs-verity enabled and that the 160 | fs-verity measurement matches the fs-verity data stored in the manifest. The 161 | image manifest's fs-verity digest is compared with the digest passed on the 162 | command line via the `--digest` option. 163 | 164 | This only works if `fsverity` is [supported and 165 | enabled](https://www.kernel.org/doc/html/latest/filesystems/fsverity.html#filesystem-support) 166 | in the underlying filesystem on which the puzzlefs image resides. Otherwise 167 | you might get an error like this when running `enable-fs-verity`: 168 | ``` 169 | Error: fs error: Inappropriate ioctl for device (os error 25) 170 | 171 | Caused by: 172 | Inappropriate ioctl for device (os error 25) 173 | ``` 174 | 175 | To check wheter fs-verity is enabled, use `tune2fs`: 176 | ``` 177 | $ mount | grep -w '/' 178 | /dev/mapper/MyVolGroup-root on / type ext4 (rw,relatime) 179 | 180 | $ sudo tune2fs -l /dev/mapper/MyVolGroup-root | grep verity 181 | Filesystem features: has_journal ext_attr resize_inode dir_index filetype needs_recovery extent 64bit flex_bg sparse_super large_file huge_file dir_nlink extra_isize metadata_csum verity 182 | ``` 183 | 184 | To set up an 1MB loop device with an ext4 filesystem which supports `fs-verity` 185 | and mount it under `/mnt`, run: 186 | ``` 187 | $ mktemp -u 188 | /tmp/tmp.2CDDHVPLXp 189 | 190 | $ touch /tmp/tmp.2CDDHVPLXp 191 | 192 | $ dd if=/dev/zero of=/tmp/tmp.2CDDHVPLXp bs=1k count=1024 193 | 1024+0 records in 194 | 1024+0 records out 195 | 1048576 bytes (1.0 MB, 1.0 MiB) copied, 0.00203188 s, 516 MB/s 196 | 197 | $ sudo losetup -f --show /tmp/tmp.2CDDHVPLXp 198 | /dev/loop1 199 | 200 | $ sudo mkfs -t ext4 -F -b4096 -O verity /dev/loop1 201 | mke2fs 1.47.0 (5-Feb-2023) 202 | 203 | Filesystem too small for a journal 204 | Discarding device blocks: done 205 | Creating filesystem with 256 4k blocks and 128 inodes 206 | 207 | Allocating group tables: done 208 | Writing inode tables: done 209 | Writing superblocks and filesystem accounting information: done 210 | 211 | $ sudo mount /dev/loop1 /mnt 212 | 213 | $ sudo chown -R $(id -u):$(id -g) /mnt 214 | 215 | $ sudo tune2fs -l /dev/loop1 | grep verity 216 | Filesystem features: ext_attr resize_inode dir_index filetype extent 64bit flex_bg metadata_csum_seed sparse_super large_file huge_file dir_nlink extra_isize metadata_csum verity 217 | ``` 218 | 219 | Now copy the puzzlefs image to `/mnt` and try the verity setup commands again. 220 | 221 | ### Debugging mount issues 222 | When mounting a puzzlefs filesystem in the background (i.e. without `-f` flag), 223 | then errors are logged into the journal, e.g.: 224 | ``` 225 | $ journalctl --since "2 min ago" | grep puzzlefs 226 | Jul 13 18:37:30 archlinux-cisco puzzlefs[305462]: mount_background failed: fs error: fs error: Inappropriate ioctl for device (os error 25) 227 | ``` 228 | For debugging purposes you can use the [RUST_LOG](https://docs.rs/env_logger/latest/env_logger/) environment variable together with `-f` flag of mount: 229 | ``` 230 | $ RUST_LOG=DEBUG cargo run --release -- mount -f /tmp/puzzlefs-image puzzlefs_example /tmp/mounted-image 231 | [2023-07-13T16:08:27Z INFO fuser::session] Mounting /tmp/mounted-image 232 | [2023-07-13T16:08:27Z DEBUG fuser::mnt::fuse_pure] fusermount: 233 | [2023-07-13T16:08:27Z DEBUG fuser::mnt::fuse_pure] fusermount: 234 | [2023-07-13T16:08:27Z DEBUG fuser::request] FUSE( 2) ino 0x0000000000000000 INIT kernel ABI 7.38, capabilities 0x73fffffb, max readahead 131072 235 | [2023-07-13T16:08:27Z DEBUG fuser::request] INIT response: ABI 7.8, flags 0x1, max readahead 131072, max write 16777216 236 | ... 237 | ``` 238 | 239 | ### Notification when the mountpoint is ready 240 | #### Foreground mount (`mount -f`) 241 | A named pipe can be passed to the `mount` command. Reading from this pipe is 242 | blocking operation, waiting until puzzlefs signals that the mountpoint is 243 | ready. If the mount operation is successful, the `s` character is written to 244 | the pipe, otherwise `f` is written. It is inspired by this [squashfuse 245 | issue](https://github.com/vasi/squashfuse/issues/49#issuecomment-785398828). 246 | 247 | The following script shows how to wait until the puzzlefs mountpoint is ready. 248 | The script assumes there is puzzlefs image available at `/tmp/puzzlefs-image` 249 | and the directory `/tmp/mounted-image` already exists. 250 | ``` 251 | #!/bin/bash 252 | FIFO=$(mktemp -u) 253 | mkfifo "$FIFO" 254 | cargo run --release -- mount -i "$FIFO" -f /tmp/puzzlefs-image puzzlefs_example /tmp/mounted-image& 255 | STATUS=$(head -c1 "$FIFO") 256 | if [ "$STATUS" = "s" ]; then 257 | echo "Mountpoint contains:" 258 | ls /tmp/mounted-image 259 | else 260 | echo "Mounting puzzlefs on /tmp/mounted-image failed" 261 | fi 262 | ``` 263 | 264 | #### Background mount 265 | When mounting in the background, puzzlefs uses an anonymous pipe to communicate 266 | between its original process and the daemon it spawns in order to wait until 267 | the mountpoint is available. This means that the `puzzlefs mount` command 268 | finishes its execution only after the mountpoint becomes ready. 269 | 270 | ### Umounting a puzzlefs image 271 | If you have specified the `-f` flag to `mount`, simply press `Ctrl-C`. 272 | 273 | Otherwise, run `fusermount -u /tmp/mounted-image`. You will need to have `fuse` package installed. 274 | 275 | ### Inspecting a puzzlefs image 276 | ``` 277 | $ cd /tmp/puzzlefs-image 278 | $ cat index.json | jq 279 | { 280 | "manifests": [ 281 | { 282 | "annotations": { 283 | "org.opencontainers.image.ref.name": "puzzlefs_example" 284 | }, 285 | "digest": "sha256:c9106994f5e18833e45164e2028431e9c822b4697172f8a997a0d9a3b0d26c9e", 286 | "mediaType": "application/vnd.oci.image.manifest.v1+json", 287 | "platform": { 288 | "architecture": "amd64", 289 | "os": "linux" 290 | }, 291 | "size": 619 292 | } 293 | ], 294 | "schemaVersion": 2 295 | } 296 | ``` 297 | `index.json` follows the [OCI Image Index Specification](https://github.com/opencontainers/image-spec/blob/main/image-index.md). 298 | 299 | The digest tagged with the `puzzlefs_example` tag is an [OCI Image 300 | Manifest](https://github.com/opencontainers/image-spec/blob/main/manifest.md) 301 | with the caveat that `layers` are not applied in the usual way (i.e. by 302 | stacking each one on top of one another). See below for details about the 303 | PuzzleFS `layer` descriptors. 304 | 305 | The Image Manifest looks like this: 306 | ``` 307 | $ cat blobs/sha256/c9106994f5e18833e45164e2028431e9c822b4697172f8a997a0d9a3b0d26c9e | jq 308 | { 309 | "config": { 310 | "data": "e30=", 311 | "digest": "sha256:44136fa355b3678a1146ad16f7e8649e94fb4fc21fe77e8310c060f61caaff8a", 312 | "mediaType": "application/vnd.oci.empty.v1+json", 313 | "size": 2 314 | }, 315 | "layers": [ 316 | { 317 | "digest": "sha256:b7f1ee9373416a49835747455ec4d287bcccc5a4bf8c38156483d46b35ce4dbd", 318 | "mediaType": "application/vnd.puzzlefs.image.filedata.v1", 319 | "size": 27 320 | }, 321 | { 322 | "annotations": { 323 | "io.puzzlefsoci.puzzlefs.puzzlefs_verity_root_hash": "7b22d0210c16134159be75d8239d100817b451591d39af2031d94ae84ac4f8c7" 324 | }, 325 | "digest": "sha256:9e2edc6917b65606b1112ac8663665dfd2d945cfea960ca595accf790922b910", 326 | "mediaType": "application/vnd.puzzlefs.image.rootfs.v1", 327 | "size": 552 328 | } 329 | ], 330 | "schemaVersion": 2 331 | } 332 | ``` 333 | 334 | There are two types of layer descriptors: 335 | * `application/vnd.puzzlefs.image.rootfs.v1`: the PuzzleFS image rootfs which 336 | contains metadata in Capnproto format and must appear only once in the 337 | `layers` array 338 | * `application/vnd.puzzlefs.image.filedata.v1`: a PuzzleFS data chunk generated 339 | by the FastCDC algorithm; usually there are multiple chunks in an image and 340 | they contain all the filesystem data 341 | 342 | There is no extraction step for these layers, PuzzleFS mounts the filesystem by 343 | reading the PuzzleFS image rootfs and using this metadata to combine the data 344 | chunks back into the original files. In fact, the data chunks are part of the 345 | OCI Image Manifest so that the other tools copy the image correctly. For 346 | example, with skopeo: 347 | ``` 348 | $ skopeo --version 349 | skopeo version 1.15.2 350 | $ skopeo copy oci:/tmp/puzzlefs-image:puzzlefs_example oci:/tmp/copy-puzzlefs-image:puzzlefs_example 351 | ``` 352 | The information about the data chunks is also stored in the PuzzleFS image rootfs, 353 | so that PuzzleFS could mount the filesystem efficiently and that the PuzzleFS 354 | image could also be decoded in the kernel. 355 | 356 | The `digest` of the PuzzleFS iamge rootfs contains the filesystem metadata and 357 | it can be decoded using the `capnp tool` and the capnp metadata schema (the 358 | following snippet assumes that you've cloned puzzlefs in `~/puzzlefs`): 359 | ``` 360 | $ capnp convert binary:json ~/puzzlefs/puzzlefs-lib/src/format/metadata.capnp Rootfs < blobs/sha256/9e2edc6917b65606b1112ac8663665dfd2d945cfea960ca595accf790922b910 361 | { "metadatas": [{"inodes": [ 362 | { "ino": "1", 363 | "mode": {"dir": { 364 | "entries": [ 365 | { "ino": "2", 366 | "name": [97, 108, 103, 111, 114, 105, 116, 104, 109, 115] }, 367 | { "ino": "3", 368 | "name": [108, 111, 114, 101, 109, 95, 105, 112, 115, 117, 109, 46, 116, 120, 116] } ], 369 | "lookBelow": false }}, 370 | "uid": 1000, 371 | "gid": 1000, 372 | "permissions": 493 }, 373 | { "ino": "2", 374 | "mode": {"dir": { 375 | "entries": [{ "ino": "4", 376 | "name": [98, 105, 110, 97, 114, 121, 45, 115, 101, 97, 114, 99, 104, 46, 116, 120, 116] }], 377 | "lookBelow": false }}, 378 | "uid": 1000, 379 | "gid": 1000, 380 | "permissions": 509 }, 381 | { "ino": "3", 382 | "mode": {"file": [{ "blob": { 383 | "digest": [183, 241, 238, 147, 115, 65, 106, 73, 131, 87, 71, 69, 94, 196, 210, 135, 188, 204, 197, 164, 191, 140, 56, 21, 100, 131, 212, 107, 53, 206, 77, 189], 384 | "offset": "0", 385 | "compressed": false }, 386 | "len": "27" }]}, 387 | "uid": 1000, 388 | "gid": 1000, 389 | "permissions": 436 }, 390 | {"ino": "4", "mode": {"file": []}, "uid": 1000, "gid": 1000, "permissions": 436} ]}], 391 | "fsVerityData": [{ "digest": [183, 241, 238, 147, 115, 65, 106, 73, 131, 87, 71, 69, 94, 196, 210, 135, 188, 204, 197, 164, 191, 140, 56, 21, 100, 131, 212, 107, 53, 206, 77, 189], 392 | "verity": [91, 20, 52, 173, 44, 8, 31, 244, 53, 178, 16, 121, 46, 144, 14, 39, 2, 30, 196, 43, 104, 230, 143, 98, 219, 173, 82, 223, 224, 201, 247, 164] }], 393 | "manifestVersion": "3" } 394 | ``` 395 | 396 | `metadatas` contains a list of PuzzleFS layers, each layer consisting of a 397 | vector of Inodes. See the [capnp 398 | schema](./puzzlefs-lib/src/format/metadata.capnp) for details. 399 | 400 | ## Implementation 401 | 402 | This workspace contains a library and an executable crate: 403 | * `puzzlefs-lib` is the library crate 404 | * `format` is the module for serializing/de-serializing the puzzlefs format 405 | * `builder` is the module for building a puzzlefs image 406 | * `extractor` is the module for extracting a puzzlefs image 407 | * `reader` is the module for fuse mounting a puzzlefs image 408 | * `exe/` is the executable frontend for the above 409 | 410 | ### Contributing 411 | 412 | Contributions need to pass all static analysis. 413 | 414 | In addition, all commits must include a `Signed-off-by:` line in their 415 | description. This indicates that you certify [the following statement, known as 416 | the Developer Certificate of Origin][dco]). You can automatically add this line 417 | to your commits by using `git commit -s --amend`. 418 | 419 | ``` 420 | Developer Certificate of Origin 421 | Version 1.1 422 | 423 | Copyright (C) 2004, 2006 The Linux Foundation and its contributors. 424 | 1 Letterman Drive 425 | Suite D4700 426 | San Francisco, CA, 94129 427 | 428 | Everyone is permitted to copy and distribute verbatim copies of this 429 | license document, but changing it is not allowed. 430 | 431 | 432 | Developer's Certificate of Origin 1.1 433 | 434 | By making a contribution to this project, I certify that: 435 | 436 | (a) The contribution was created in whole or in part by me and I 437 | have the right to submit it under the open source license 438 | indicated in the file; or 439 | 440 | (b) The contribution is based upon previous work that, to the best 441 | of my knowledge, is covered under an appropriate open source 442 | license and I have the right under that license to submit that 443 | work with modifications, whether created in whole or in part 444 | by me, under the same open source license (unless I am 445 | permitted to submit under a different license), as indicated 446 | in the file; or 447 | 448 | (c) The contribution was provided directly to me by some other 449 | person who certified (a), (b) or (c) and I have not modified 450 | it. 451 | 452 | (d) I understand and agree that this project and the contribution 453 | are public and that a record of the contribution (including all 454 | personal information I submit with it, including my sign-off) is 455 | maintained indefinitely and may be redistributed consistent with 456 | this project or the open source license(s) involved. 457 | ``` 458 | 459 | [dco]: https://developercertificate.org/ 460 | 461 | ### License 462 | 463 | puzzlefs is released under the [Apache License, Version 2.0](LICENSE), and is: 464 | 465 | Copyright (C) 2020-2021 Cisco Systems, Inc. 466 | -------------------------------------------------------------------------------- /doc/chunking.md: -------------------------------------------------------------------------------- 1 | ## Chunking a filesystem 2 | 3 | This should definitely be rabin fingerprint/rolling hash based. casync is the 4 | obvious prior art here. 5 | 6 | ## Defining the stream 7 | 8 | In order to do content defined chunking, we need to serialize the filesystem 9 | content into a stream. We can ignore everything besides regular files, since 10 | everything else will be captured in the metadata representation. Since the 11 | metadata representation is *not* stored inline with this stream, images built 12 | at slightly different times (resulting in different mtimes for config files in 13 | /etc) have a chance to share chunks. 14 | 15 | We serialize the filesystem by doing a breadth first walk, ordering directory 16 | entries lexicographically. We use a breadth first search so that hopefully 17 | package directories can be shared. For example, if one image has a bunch of 18 | stuff in `/etc/apt/sources.list.d` and another image has nothing there, 19 | hopefully using this ordering we'll have a chance at sharing the contents of 20 | `/etc`. This makes little difference for `/etc` since it only contains text 21 | files, but could make a bigger difference e.g. for stuff in `/lib`, e.g. when 22 | one image has a python package installed that the other does not. 23 | 24 | ## Rolling hash/Content Defined Chunking parameters 25 | 26 | There are two philosophies about this: 1. let images define their own 27 | parameters, so people can fine tune things for their particular image to get 28 | good results on update or 2. hard code these parameters in the spec, so 29 | everyone has to use the same algorithms and algorithm parameters. It seems (2) 30 | would potentially enable more sharing across images, since it's hard to see how 31 | anything (e.g. /etc/lsb-release should be mostly the same everywhere that's 32 | based on the same distro, but may not be shared if e.g. ngnix chooses different 33 | parameters than mysql). Additionally, (2) seems to be more in line with the 34 | "canonicalization" goal, so that different image builders would be required to 35 | choose the same parameters. 36 | 37 | However, we leave the choice of hash, parameters, etc. as an exercise to the 38 | reader :) 39 | -------------------------------------------------------------------------------- /doc/index.md: -------------------------------------------------------------------------------- 1 | # PuzzleFS format 2 | 3 | Puzzlefs consists of two parts: a metadata format for inode information, and 4 | actual filesystem data chunks, defined by various chunking algorithms. 5 | 6 | All enums are encoded as u32s; all encodings are little endian. 7 | 8 | All puzzlefs blobs are wrapped in the following structure: 9 | 10 | enum hash { 11 | sha256, 12 | } 13 | 14 | typedef hash_value byte[32] // for sha256 15 | 16 | enum blob_type { 17 | root, 18 | metadata, 19 | file, 20 | } 21 | 22 | type puzzlefs_blob { 23 | enum hash; 24 | u64 references_len; 25 | hash_value references[]; 26 | blob_type type; 27 | // followed by the actual blob 28 | } 29 | 30 | ## Metadata 31 | 32 | See metadata.md 33 | 34 | ## Chunking 35 | 36 | See chunking.md 37 | -------------------------------------------------------------------------------- /doc/metadata.md: -------------------------------------------------------------------------------- 1 | ## PuzzleFS filesystem metadata 2 | 3 | // struct rootfs is the entry point for the filesystem metadata. it has a 4 | // list of metadata objects, the 0th being the "highest" in the stack. 5 | // 6 | // this list of metadatas can either be included at offsets in the rootfs 7 | // file, or referenced as other blobs. 8 | struct rootfs { 9 | u64 metadata_count; 10 | metadata_ref metadatas[]; 11 | } 12 | 13 | enum metadata_type { 14 | local, 15 | blob, 16 | }; 17 | struct metadata_ref { 18 | metadata_type type; 19 | hash_value blob; 20 | u64 offset; 21 | } 22 | 23 | struct metadata { 24 | u64 inode_count; 25 | struct inode inodes[]; 26 | } 27 | 28 | // defined in dirent.h 29 | enum inode_type { 30 | unknown, 31 | fifo, 32 | chr, 33 | dir, 34 | blk, 35 | reg, 36 | lnk, 37 | sock, 38 | /* 39 | * bsd style whiteout, mostly unused in linux. maybe we can teach 40 | * overlay about these? 41 | */ 42 | wht, 43 | } 44 | 45 | struct string { 46 | u64 len; 47 | char val[]; 48 | }; 49 | 50 | struct dirent { 51 | u64 ino; 52 | string name; 53 | } 54 | 55 | // when set, also look at layers below for the dir list. when not set, this 56 | // dir list is complete. note that dirlists allow wht inode_types to white out 57 | // entries below them. 58 | #define DIR_LIST_LOOK_BELOW 1 59 | 60 | struct dir_list { 61 | u64 flags; 62 | u64 entries_len; 63 | dirent entries[]; 64 | } 65 | 66 | struct chunk { 67 | metadata_ref chunk; 68 | u64 file_offset; 69 | u64 len; 70 | } 71 | 72 | struct chunk_list { 73 | u64 chunks_len; 74 | chunk chunks[]; 75 | } 76 | 77 | // this must be a fixed size so we can binary search over it easily 78 | struct inode { 79 | u64 ino; 80 | inode_type type; 81 | u32 uid; 82 | u32 gid; 83 | u16 mode; 84 | u64 mtime, atime, ctime; /* seems like we should require these? */ 85 | union { 86 | // fifo 87 | struct { 88 | /* nothing additional about fifos */ 89 | }, 90 | 91 | // chr 92 | struct { 93 | dev_t major; 94 | dev_t minor; 95 | }, 96 | 97 | // dir 98 | struct { 99 | u64 dir_offset; 100 | }, 101 | 102 | // blk; do we even want these? seems like maybe not since they're 103 | // system specific. 104 | struct { 105 | dev_t major; 106 | dev_t minor; 107 | }, 108 | 109 | // reg 110 | struct { 111 | u64 file_size; /* total file size */ 112 | u64 file_offset; 113 | }, 114 | 115 | // lnk 116 | struct { 117 | string target[PATH_MAX]; 118 | #define LNK_HARD 1 119 | #define LNK_SOFT 2 120 | u32 flags; 121 | }, 122 | 123 | // sock; this seems like it should probably also be ignored? 124 | struct { 125 | /* no extra info */ 126 | }, 127 | 128 | // wht, unused for now 129 | struct { 130 | /* no extra info */ 131 | }, 132 | }; 133 | 134 | metadata_ref inode_addl; /* ref to the additional inode metadata */ 135 | } 136 | 137 | /* 138 | * additional inode metadata; here's where we put everything that's not 139 | * fixed length so it doesn't screw up our binary search for inodes. 140 | */ 141 | struct xattr { 142 | u64 key_len; 143 | char key[]; 144 | u64 value_len; 145 | char value[]; 146 | } 147 | struct inode_addl { 148 | u64 xattrs_len; 149 | xattr xattrs; 150 | string lnk_target; 151 | }; 152 | 153 | ### Reading compressed data at an offset 154 | 155 | This is called "random access" in the compression format community. Mainly, 156 | people suggest breaking things up into smaller chunks and compressing the 157 | chunks individually (this would be after and independent of the chunking above, 158 | which is likely to be some kind of Rabin fingerprinting style chunking). 159 | 160 | It looks like zstd has some experimental out of tree support for it: 161 | 162 | https://github.com/facebook/zstd/issues/395 163 | https://github.com/facebook/zstd/tree/dev/contrib/seekable_format 164 | 165 | So we should probably play around with that. Alternatively, we could add some 166 | kind of wrapper in the specification about this for arbitrary compression 167 | formats. But it's likely that the compression people themselves will implement 168 | a better version of this, so it seems like we should try to use theirs first. 169 | (Maybe not; it looks like zstd just chunks things up in a hard coded chunk 170 | size, same as we would potentially do. Additionally, there are some proposals 171 | out there for [storing other kinds of 172 | metadata](https://github.com/containers/storage/pull/775) in the zstd skippable 173 | frames, same as the `seekable_format` code does above, and that code currently 174 | dies if it encounters skippable frames that it doesn't understand. So perhaps 175 | we should just design our own seekable container format.) 176 | 177 | In any case, for now this document assumes this is possible without specifying 178 | how it is done. 179 | 180 | ### Algorithm for generating a delta on top of an existing layer 181 | 182 | given some new set of files S: 183 | 184 | create a .catar of S 185 | chunks = run chunking algorithm on S 186 | generate metadata for S 187 | 188 | should we do some special handling for overlay -> bsd style whiteouts? since 189 | we'll have direct mount support, maybe this is a "good time" to change the 190 | convention, since the kernel can just interpret the thing for us correctly. 191 | 192 | ### Algorithm for finding inode n 193 | 194 | Given a target inode `ino`: 195 | 196 | for each metadata_ref: 197 | i = binary_search(metadata_ref->inodes, ino) 198 | if i: 199 | return i 200 | 201 | ### Algorithm for looking up a dirlist 202 | 203 | Given a target inode `ino`: 204 | 205 | dirlist = [] 206 | for each metadata_ref: 207 | i = binary_search(metadata_ref->inodes, ino) 208 | if not i: 209 | continue // not every layer has to change every directory 210 | dl = resolve_dirlist_at_offset(i, metadata_ref) 211 | append_respecting_whiteouts(dirlist, dl) 212 | if !(dl->flags & DIR_LIST_LOOK_BELOW) 213 | break 214 | 215 | ### Algorithm for looking up file contents 216 | 217 | Given a target inode `ino`: 218 | 219 | 220 | struct chunk_info { 221 | chunk *chunk; 222 | u64 offset_in_chunk; 223 | u64 len; 224 | } 225 | 226 | inode_chunks = xarray_new() 227 | size = binary_search(metadata_refs[0], ino).size 228 | 229 | def have_all_chunks(xa): 230 | max_seen = 0 231 | xa_for_each_range(xa, ent) 232 | // make sure that we have [0, size) populated 233 | // i.e. max_seen should == ent.min; max_seen = ent.max 234 | 235 | for each metadata_ref: 236 | if have_all_chunks(inode_chunks): 237 | break 238 | 239 | // add the inode chunks here 240 | i = binary_search(metadata_ref->inodes, ino) 241 | if not i: 242 | fail("incomplete inode definition") 243 | 244 | def add_chunk(xa, chunk): 245 | while 1: 246 | existing = xa_find(xa, chunk.chunk_offset, chunk.chunk_offset+chunk.len) 247 | if not existing: 248 | xa_store_range(xa, chunk, chunk.file_offset, chunk.file_offset+len) 249 | return 250 | 251 | // does the existing chunk cover the whole range? if so split 252 | // it and insert ours 253 | if existing.offset + existing.len > chunk.max: 254 | xa_store_range(xa, existing, existing.offset, existing.offset - chunk.offset) 255 | xa_store_range(xa, chunk, chunk.offset, chunk.len) 256 | xa_store_range(xa, existing, existing.offset+chunk.len, existing.offset + existing.len - chunk.len) 257 | return 258 | 259 | // special cases where it only covers the left or right half of 260 | // the range, handle as above 261 | 262 | 263 | // otherwise, remove the whole chunk, as our new chunk subsumes 264 | // it, and keep iterating 265 | xa_remove(xa, existing) 266 | 267 | for chunk in i.chunks: 268 | 269 | // uh oh, some existing chunk overlaps with our range. let's 270 | find_file_offsets(inode_chunks, chunk) 271 | xa_store_range(chunk, chunk.file_offset, chunk.file_offset+len) 272 | 273 | Now, when someone causes a fault at offset `off`: 274 | 275 | chunk_info = xa_load(inode_chunks, off) 276 | read_from_chunk(chunk, chunk_info.offeset_in_chunk + chunk.file_offset) 277 | /* 278 | * in reality this is a little more complicated, because you could ask for 279 | * more than a single chunk, but in that case you just xa_load(inode_chunks, 280 | * off+num_read) 281 | */ 282 | 283 | 284 | ### Canonicalization 285 | 286 | While the above metadata can be additive (i.e., it is explicit that puzzlefs 287 | ignores metadata in lower metadata files for indoes whose metadata is present 288 | in files above), the canonical representation of metadata for a puzzlefs 289 | filesystem is as one single metadata layer (and some set of chunks for this 290 | single filesystem represented by the chunking algorithm), i.e. everything is 291 | included in the current file. 292 | 293 | This means that the only thing left to do is define the ordering of things, and 294 | the ordering should be the "sensible" order for objects: dirents are stored in 295 | lexicographic order, inodes are stored by inode number, etc. 296 | -------------------------------------------------------------------------------- /docs/.gitignore: -------------------------------------------------------------------------------- 1 | _site/ 2 | .sass-cache/ 3 | .jekyll-cache/ 4 | .jekyll-metadata 5 | # Ignore folders generated by Bundler 6 | .bundle/ 7 | vendor/ 8 | -------------------------------------------------------------------------------- /docs/_config.yml: -------------------------------------------------------------------------------- 1 | theme: minima 2 | title: PuzzleFS, a next-generation container filesystem 3 | author: Ariel Miculas 4 | description: PuzzleFS blog 5 | -------------------------------------------------------------------------------- /docs/_posts/2023-10-30-Linux-kernel-setup.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Linux kernel setup for the PuzzleFS driver" 3 | date: 2023-10-30 4 | --- 5 | The setup is based on [Wedson's 6 | tutorial](https://www.youtube.com/watch?v=tPs1uRqOnlk). This document describes 7 | the necessary steps to build an initrd and run a custom kernel under qemu. This 8 | custom kernel includes the patches for the PuzzleFS driver. 9 | 10 | # Prerequisites 11 | Install the necessary tools for building the Linux kernel: 12 | 13 | * For Ubuntu, you can find a list [here](https://wiki.ubuntu.com/Kernel/BuildYourOwnKernel#Build_Environment) 14 | * For Fedora, you can find a list [here](https://docs.fedoraproject.org/en-US/quick-docs/kernel-build-custom/#_get_the_dependencies) 15 | * For Arch Linux, you can find a list [here](https://wiki.archlinux.org/title/Kernel/Traditional_compilation#Install_the_core_packages) 16 | 17 | Install [qemu](https://wiki.qemu.org/Main_Page). 18 | 19 | # Steps 20 | 21 | 1. Get the [PuzzleFS branch](https://github.com/ariel-miculas/linux/tree/puzzlefs) 22 | ``` 23 | git clone https://github.com/ariel-miculas/linux.git --branch puzzlefs 24 | ``` 25 | We'll call this path `KERNEL_PATH`. 26 | 27 | 2. Follow the [rust quickstart guide](https://docs.kernel.org/rust/quick-start.html) 28 | 29 | 3. Make sure `make LLVM=1 rustavailable` is successful 30 | 31 | This is especially important because `CONFIG_RUST=y` will be silently 32 | ignored if the rust toolchain is not available. 33 | 34 | 4. Configure and build the kernel 35 | ``` 36 | $ make LLVM=1 allnoconfig qemu-busybox-min.config puzzlefs.config 37 | $ make LLVM=1 -j$(nproc) 38 | ``` 39 | 40 | 5. Setup busybox 41 | ``` 42 | git clone git://git.busybox.net/busybox 43 | cd busybox 44 | make menuconfig # enable 'Build static binary' config 45 | make 46 | make install 47 | ``` 48 | This will create the `_install` directory with the rootfs inside it. We'll 49 | call the busybox path `BUSYBOX_PATH`. 50 | 51 | 6. Create a home directory in the rootfs and build a puzzlefs image inside 52 | (`$BUSYBOX_PATH/_install/home/puzzlefs_oci`) 53 | 54 | To build a puzzlefs image: 55 | * install puzzlefs using cargo: `cargo install puzzlefs` or clone the 56 | [puzzlefs repository](https://github.com/project-machine/puzzlefs) and 57 | run `make release` 58 | 59 | * create a simple filesystem structure with a few directories and files 60 | (e.g. in `/tmp/simple_rootfs`) 61 | ``` 62 | $ tree simple_rootfs 63 | simple_rootfs 64 | ├── dir-1 65 | ├── dir-2 66 | ├── dir-3 67 | ├── dir-4 68 | ├── file1 69 | └── file2 70 | 71 | 5 directories, 2 files 72 | ``` 73 | 74 | * build a puzzlefs oci image at 75 | `$BUSYBOX_PATH/_install/home/puzzlefs_oci` with the tag `first_try`: 76 | 77 | ``` 78 | $ puzzlefs build /tmp/simple_rootfs \ 79 | $BUSYBOX_PATH/_install/home/puzzlefs_oci first_try 80 | ``` 81 | 82 | * get `first_try`'s image manifest from `puzzlefs_oci/index.json` 83 | 84 | ``` 85 | $ jq ".manifests[] | .digest" index.json 86 | "sha256:c43e5ab9d0cee1dcfbf442d18023b34410de3deb0f6dbffcec72732b6830db09" 87 | ``` 88 | 89 | 7. Add the following `init` script in the busybox rootfs (defaults to `$BUSYBOX_PATH/_install`): 90 | 91 | ``` 92 | #!/bin/sh 93 | mount -t devtmpfs none /dev 94 | mkdir -p /proc 95 | mount -t proc none /proc 96 | 97 | ifconfig lo up 98 | udhcpc -i eth0 99 | 100 | mkdir /mnt 101 | mount -t puzzlefs -o oci_root_dir="/home/puzzlefs_oci" -o \ 102 | image_manifest="c43e5ab9d0cee1dcfbf442d18023b34410de3deb0f6dbffcec72732b6830db09" \ 103 | none /mnt 104 | 105 | setsid sh -c 'exec sh -l /dev/ttyS0 2>&1' 106 | ``` 107 | Make sure to replace the `image_manifest` with your own digest. This 108 | init script will be passed to rdinit in the kernel command line. 109 | 110 | 8. Generate the initramfs 111 | 112 | ``` 113 | cd $BUSYBOX_PATH/_install && find . | cpio -H newc -o | gzip > ../ramdisk.img 114 | ``` 115 | This will generate a compressed ramdisk image in 116 | `$BUSYBOX_PATH/ramdisk.img`. 117 | 118 | 9. Run with qemu: 119 | ``` 120 | qemu-system-x86_64 \ 121 | -accel kvm \ 122 | -cpu host \ 123 | -m 4G \ 124 | -initrd $BUSYBOX_PATH/ramdisk.img \ 125 | -kernel $KERNEL_PATH/arch/x86/boot/bzImage \ 126 | -nographic \ 127 | -append 'console=ttyS0 nokaslr debug rdinit=/init' \ 128 | -nic user,model=rtl8139 \ 129 | -no-reboot 130 | ``` 131 | 132 | 10. Check whether puzzlefs has been successfully mounted: 133 | ``` 134 | ~ # grep puzzlefs /proc/filesystems 135 | nodev puzzlefs 136 | ~ # mount | grep puzzlefs 137 | none on /mnt type puzzlefs (rw,relatime) 138 | ~ # ls /mnt/ 139 | dir-1 dir-2 dir-3 dir-4 file1 file2 140 | ``` 141 | -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Welcome to PuzzleFS 3 | --- 4 | Resources: 5 | * [Github](https://github.com/project-machine/puzzlefs) 6 | * [Kernel driver](https://rust-for-linux.com/puzzlefs-filesystem-driver) 7 | -------------------------------------------------------------------------------- /exe/.gitignore: -------------------------------------------------------------------------------- 1 | oci 2 | ubuntu 3 | -------------------------------------------------------------------------------- /exe/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "puzzlefs" 3 | version = "0.2.0" 4 | authors = ["Tycho Andersen ", "Ariel Miculas "] 5 | description = """ 6 | PuzzleFS is a next-generation container filesystem. 7 | """ 8 | documentation = "https://github.com/project-machine/puzzlefs" 9 | homepage = "https://github.com/project-machine/puzzlefs" 10 | repository = "https://github.com/project-machine/puzzlefs" 11 | keywords = ["fuse", "filesystem", "container", "cdc"] 12 | categories = ["filesystem"] 13 | license = "Apache-2.0" 14 | edition = "2021" 15 | 16 | [dependencies] 17 | anyhow = "1.0.75" 18 | nix = {version = "0.27.1", features = ["mount"] } 19 | clap = { version = "4.0.18", features = ["derive"] } 20 | # Version 0.5 drops exit_action so we're stuck with 0.4 21 | daemonize = "0.4.1" 22 | ctrlc = "3.2.0" 23 | log = "0.4.17" 24 | env_logger = "0.9.3" 25 | syslog = "6.0.1" 26 | os_pipe = "1.1.2" 27 | puzzlefs-lib = { path = "../puzzlefs-lib", version = "0.2.0" } 28 | hex = "0.4.3" 29 | libmount = "0.1.15" 30 | 31 | [dev-dependencies] 32 | assert_cmd = "2.0.12" 33 | dir-diff = "0.3.2" 34 | tempfile = "3.10" 35 | walkdir = "2" 36 | -------------------------------------------------------------------------------- /exe/src/main.rs: -------------------------------------------------------------------------------- 1 | use clap::{Args, Parser, Subcommand}; 2 | use daemonize::Daemonize; 3 | use env_logger::Env; 4 | use libmount::mountinfo; 5 | use libmount::Overlay; 6 | use log::{error, info, LevelFilter}; 7 | use nix::mount::umount; 8 | use nix::unistd::Uid; 9 | use os_pipe::{PipeReader, PipeWriter}; 10 | use puzzlefs_lib::{ 11 | builder::{add_rootfs_delta, build_initial_rootfs, enable_fs_verity}, 12 | compression::{Noop, Zstd}, 13 | extractor::extract_rootfs, 14 | fsverity_helpers::get_fs_verity_digest, 15 | oci::Image, 16 | reader::{fuse::PipeDescriptor, mount, spawn_mount}, 17 | }; 18 | use std::ffi::{OsStr, OsString}; 19 | use std::fs; 20 | use std::fs::OpenOptions; 21 | use std::io::prelude::*; 22 | use std::path::{Path, PathBuf}; 23 | use std::process::exit; 24 | use std::sync::Arc; 25 | use syslog::{BasicLogger, Facility, Formatter3164}; 26 | 27 | #[derive(Parser)] 28 | #[command(author, version, about)] 29 | struct Opts { 30 | #[command(subcommand)] 31 | subcmd: SubCommand, 32 | } 33 | 34 | #[derive(Subcommand)] 35 | enum SubCommand { 36 | Build(Build), 37 | Mount(Mount), 38 | Umount(Umount), 39 | Extract(Extract), 40 | EnableFsVerity(FsVerity), 41 | } 42 | 43 | #[derive(Args)] 44 | struct Build { 45 | rootfs: String, 46 | oci_dir: String, 47 | #[arg(short, long, value_name = "base-layer")] 48 | base_layer: Option, 49 | #[arg(short, long, value_name = "compressed")] 50 | compression: bool, 51 | } 52 | 53 | #[derive(Args)] 54 | struct Mount { 55 | oci_dir: String, 56 | mountpoint: String, 57 | #[arg(short, long)] 58 | foreground: bool, 59 | #[arg(short, long, value_name = "init-pipe")] 60 | init_pipe: Option, 61 | #[arg(short, value_delimiter = ',')] 62 | options: Option>, 63 | #[arg(short, long, value_name = "fs verity root digest")] 64 | digest: Option, 65 | #[arg(short, long, conflicts_with = "foreground")] 66 | writable: bool, 67 | #[arg(short, long, conflicts_with = "foreground")] 68 | persist: Option, 69 | } 70 | 71 | #[derive(Args)] 72 | struct Umount { 73 | mountpoint: String, 74 | } 75 | 76 | #[derive(Args)] 77 | struct Extract { 78 | oci_dir: String, 79 | extract_dir: String, 80 | } 81 | 82 | #[derive(Args)] 83 | struct FsVerity { 84 | oci_dir: String, 85 | root_hash: String, 86 | } 87 | 88 | // set default log level when RUST_LOG environment variable is not set 89 | fn init_logging(log_level: &str) { 90 | env_logger::Builder::from_env(Env::default().default_filter_or(log_level)).init(); 91 | } 92 | 93 | fn init_syslog(log_level: &str) -> std::io::Result<()> { 94 | let formatter = Formatter3164 { 95 | facility: Facility::LOG_USER, 96 | hostname: None, 97 | process: "puzzlefs".into(), 98 | pid: 0, 99 | }; 100 | 101 | let logger = match syslog::unix(formatter) { 102 | Err(e) => { 103 | println!("impossible to connect to syslog: {e:?}"); 104 | return Err(std::io::Error::last_os_error()); 105 | } 106 | Ok(logger) => logger, 107 | }; 108 | log::set_boxed_logger(Box::new(BasicLogger::new(logger))) 109 | .map(|()| { 110 | log::set_max_level(match log_level { 111 | "off" => LevelFilter::Off, 112 | "error" => LevelFilter::Error, 113 | "warn" => LevelFilter::Warn, 114 | "info" => LevelFilter::Info, 115 | "debug" => LevelFilter::Debug, 116 | "trace" => LevelFilter::Trace, 117 | _ => panic!("unexpected log level"), 118 | }) 119 | }) 120 | .unwrap(); 121 | Ok(()) 122 | } 123 | 124 | #[allow(clippy::too_many_arguments)] 125 | fn mount_background( 126 | image: Image, 127 | tag: &str, 128 | mountpoint: &Path, 129 | options: Option>, 130 | manifest_verity: Option>, 131 | mut recv: PipeReader, 132 | init_notify: &PipeWriter, 133 | parent_action: impl FnOnce() -> anyhow::Result<()> + 'static, 134 | ) -> anyhow::Result<()> { 135 | let daemonize = Daemonize::new().exit_action(move || { 136 | let mut read_buffer = [0]; 137 | if let Err(e) = recv.read_exact(&mut read_buffer) { 138 | info!("error reading from pipe {e}") 139 | } else if read_buffer[0] == b'f' { 140 | // in case of failure, 'f' is written into the pipe 141 | // we explicitly exit with an error code, otherwise exit(0) is done by daemonize 142 | exit(1); 143 | } 144 | if let Err(e) = parent_action() { 145 | error!("parent_action error {e}"); 146 | } 147 | }); 148 | 149 | match daemonize.start() { 150 | Ok(_) => { 151 | mount( 152 | image, 153 | tag, 154 | mountpoint, 155 | &options.unwrap_or_default()[..], 156 | Some(PipeDescriptor::UnnamedPipe(init_notify.try_clone()?)), 157 | manifest_verity.as_deref(), 158 | )?; 159 | } 160 | Err(e) => { 161 | return Err(e.into()); 162 | } 163 | }; 164 | Ok(()) 165 | } 166 | 167 | fn parse_oci_dir(oci_dir: &str) -> anyhow::Result<(&str, &str)> { 168 | let components: Vec<&str> = oci_dir.split_terminator(":").collect(); 169 | if components.len() != 2 { 170 | anyhow::bail!("Expected oci_dir in the following format : ") 171 | } 172 | 173 | Ok((components[0], components[1])) 174 | } 175 | 176 | fn get_mount_type(mountpoint: &str) -> anyhow::Result { 177 | let contents = fs::read_to_string("/proc/self/mountinfo")?; 178 | let mut parser = mountinfo::Parser::new(contents.as_bytes()); 179 | let mount_info = parser.find(|mount_info| { 180 | mount_info 181 | .as_ref() 182 | .map(|mount_info| mount_info.mount_point == OsStr::new(mountpoint)) 183 | .unwrap_or(false) 184 | }); 185 | let mount_info = mount_info 186 | .ok_or_else(|| anyhow::anyhow!("cannot find mountpoint in /proc/self/mountpoints"))??; 187 | Ok(mount_info.fstype.into_owned()) 188 | } 189 | 190 | fn main() -> anyhow::Result<()> { 191 | let opts: Opts = Opts::parse(); 192 | match opts.subcmd { 193 | SubCommand::Build(b) => { 194 | let rootfs = Path::new(&b.rootfs); 195 | let (oci_dir, tag) = parse_oci_dir(&b.oci_dir)?; 196 | let oci_dir = Path::new(oci_dir); 197 | let image = Image::new(oci_dir)?; 198 | let new_image = match b.base_layer { 199 | Some(base_layer) => { 200 | let (_desc, image) = if b.compression { 201 | add_rootfs_delta::(rootfs, image, tag, &base_layer)? 202 | } else { 203 | add_rootfs_delta::(rootfs, image, tag, &base_layer)? 204 | }; 205 | image 206 | } 207 | None => { 208 | if b.compression { 209 | build_initial_rootfs::(rootfs, &image, tag)? 210 | } else { 211 | build_initial_rootfs::(rootfs, &image, tag)? 212 | }; 213 | Arc::new(image) 214 | } 215 | }; 216 | let mut manifest_fd = new_image.get_image_manifest_fd(tag)?; 217 | let mut read_buffer = Vec::new(); 218 | manifest_fd.read_to_end(&mut read_buffer)?; 219 | let manifest_digest = get_fs_verity_digest(&read_buffer)?; 220 | println!( 221 | "puzzlefs image manifest digest: {}", 222 | hex::encode(manifest_digest) 223 | ); 224 | Ok(()) 225 | } 226 | SubCommand::Mount(m) => { 227 | let log_level = "info"; 228 | if m.foreground { 229 | init_logging(log_level); 230 | } else { 231 | init_syslog(log_level)?; 232 | } 233 | 234 | if (m.writable || m.persist.is_some()) && !Uid::effective().is_root() { 235 | anyhow::bail!("Writable mounts can only be created by the root user!") 236 | } 237 | 238 | let (oci_dir, tag) = parse_oci_dir(&m.oci_dir)?; 239 | let oci_dir = Path::new(oci_dir); 240 | let oci_dir = fs::canonicalize(oci_dir)?; 241 | let image = Image::open(&oci_dir)?; 242 | let mountpoint = Path::new(&m.mountpoint); 243 | let mountpoint = fs::canonicalize(mountpoint)?; 244 | 245 | let manifest_verity = m.digest.map(hex::decode).transpose()?; 246 | 247 | if m.writable || m.persist.is_some() { 248 | // We only support background mounts with the writable|persist flag 249 | let (recv, mut init_notify) = os_pipe::pipe()?; 250 | let pfs_mountpoint = mountpoint.join("ro"); 251 | fs::create_dir_all(&pfs_mountpoint)?; 252 | 253 | if let Err(e) = mount_background( 254 | image, 255 | tag, 256 | &pfs_mountpoint.clone(), 257 | m.options, 258 | manifest_verity, 259 | recv, 260 | &init_notify, 261 | move || { 262 | let ovl_workdir = mountpoint.join("work"); 263 | fs::create_dir_all(&ovl_workdir)?; 264 | let ovl_upperdir = match m.persist { 265 | None => mountpoint.join("upper"), 266 | Some(upperdir) => Path::new(&upperdir).to_path_buf(), 267 | }; 268 | fs::create_dir_all(&ovl_upperdir)?; 269 | let overlay = Overlay::writable( 270 | [pfs_mountpoint.as_path()].into_iter(), 271 | ovl_upperdir, 272 | ovl_workdir, 273 | &mountpoint, 274 | ); 275 | overlay.mount().map_err(|e| anyhow::anyhow!("{e}")) 276 | }, 277 | ) { 278 | if let Err(e) = init_notify.write_all(b"f") { 279 | error!("puzzlefs will hang because we couldn't write to pipe, {e}"); 280 | } 281 | error!("mount_background failed: {e}"); 282 | return Err(e); 283 | } 284 | return Ok(()); 285 | } 286 | 287 | if m.foreground { 288 | let (send, recv) = std::sync::mpsc::channel(); 289 | let send_ctrlc = send.clone(); 290 | 291 | ctrlc::set_handler(move || { 292 | println!("puzzlefs unmounted"); 293 | send_ctrlc.send(()).unwrap(); 294 | }) 295 | .unwrap(); 296 | 297 | let fuse_thread_finished = send; 298 | let named_pipe = m.init_pipe.map(PathBuf::from); 299 | let result = spawn_mount( 300 | image, 301 | tag, 302 | &mountpoint, 303 | &m.options.unwrap_or_default(), 304 | named_pipe.clone().map(PipeDescriptor::NamedPipe), 305 | Some(fuse_thread_finished), 306 | manifest_verity.as_deref(), 307 | ); 308 | if let Err(e) = result { 309 | if let Some(pipe) = named_pipe { 310 | let file = OpenOptions::new().write(true).open(&pipe); 311 | match file { 312 | Ok(mut file) => { 313 | if let Err(e) = file.write_all(b"f") { 314 | error!("cannot write to pipe {}, {e}", pipe.display()); 315 | } 316 | } 317 | Err(e) => { 318 | error!("cannot open pipe {}, {e}", pipe.display()); 319 | } 320 | } 321 | } 322 | return Err(e.into()); 323 | } 324 | 325 | // This blocks until either ctrl-c is pressed or the filesystem is unmounted 326 | let () = recv.recv().unwrap(); 327 | } else { 328 | let (recv, mut init_notify) = os_pipe::pipe()?; 329 | 330 | if let Err(e) = mount_background( 331 | image, 332 | tag, 333 | &mountpoint, 334 | m.options, 335 | manifest_verity, 336 | recv, 337 | &init_notify, 338 | || Ok(()), 339 | ) { 340 | if let Err(e) = init_notify.write_all(b"f") { 341 | error!("puzzlefs will hang because we couldn't write to pipe, {e}"); 342 | } 343 | error!("mount_background failed: {e}"); 344 | return Err(e); 345 | } 346 | } 347 | 348 | Ok(()) 349 | } 350 | SubCommand::Umount(e) => { 351 | let mountpoint = Path::new(&e.mountpoint); 352 | let mount_type = get_mount_type(&e.mountpoint)?; 353 | match mount_type.to_str() { 354 | Some("overlay") => { 355 | if !Uid::effective().is_root() { 356 | anyhow::bail!("Overlay mounts can only be unmounted by the root user!") 357 | } 358 | umount(mountpoint)?; 359 | // Now unmount the read-only puzzlefs mountpoint 360 | let pfs_mountpoint = mountpoint.join("ro"); 361 | umount(pfs_mountpoint.as_os_str())?; 362 | // TODO: Decide whether to remove the directories we've created. For the LXC 363 | // case, we don't want to remove them because we want to persist state between 364 | // multiple mounts. Should we add a --delete flag to unmount? 365 | // let ovl_workdir = mountpoint.join("work"); 366 | // let ovl_upperdir = mountpoint.join("upper"); 367 | // std::fs::remove_dir_all(&pfs_mountpoint)?; 368 | // std::fs::remove_dir_all(&ovl_workdir)?; 369 | // std::fs::remove_dir_all(&ovl_upperdir)?; 370 | return Ok(()); 371 | } 372 | Some("fuse") => { 373 | // We call "fusermount -u" because we don't have permissions to umount directly 374 | // fusermount and umount binaries have the setuid bit set 375 | let status = std::process::Command::new("fusermount") 376 | .arg("-u") 377 | .arg(&e.mountpoint) 378 | .status()?; 379 | if !status.success() { 380 | anyhow::bail!( 381 | "umount exited with status {}", 382 | status 383 | .code() 384 | .map(|code| code.to_string()) 385 | .unwrap_or("terminated by signal".to_string()) 386 | ); 387 | } 388 | } 389 | _ => anyhow::bail!( 390 | "Unknown mountpoint type {} for {}", 391 | mount_type.to_str().unwrap_or("unknown mount type"), 392 | &e.mountpoint 393 | ), 394 | } 395 | 396 | Ok(()) 397 | } 398 | SubCommand::Extract(e) => { 399 | let (oci_dir, tag) = parse_oci_dir(&e.oci_dir)?; 400 | init_logging("info"); 401 | extract_rootfs(oci_dir, tag, &e.extract_dir) 402 | } 403 | SubCommand::EnableFsVerity(v) => { 404 | let (oci_dir, tag) = parse_oci_dir(&v.oci_dir)?; 405 | let oci_dir = Path::new(oci_dir); 406 | let oci_dir = fs::canonicalize(oci_dir)?; 407 | let image = Image::open(&oci_dir)?; 408 | enable_fs_verity(image, tag, &v.root_hash)?; 409 | Ok(()) 410 | } 411 | } 412 | } 413 | -------------------------------------------------------------------------------- /exe/tests/cleanup_verity_device.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | MOUNTPOINT=$1 4 | BLOCKDEV=$2 5 | BACKING_FILE=$3 6 | 7 | [ -z "$MOUNTPOINT" ] && exit 1 8 | [ -z "$BLOCKDEV" ] && exit 1 9 | [ -z "$BACKING_FILE" ] && exit 1 10 | 11 | echo "unmounting: $MOUNTPOINT, deleting blockdev $BLOCKDEV and backing file $BACKING_FILE" 12 | sudo umount "$MOUNTPOINT" 13 | sudo losetup -d "$BLOCKDEV" 14 | rm "$BACKING_FILE" 15 | -------------------------------------------------------------------------------- /exe/tests/extract.rs: -------------------------------------------------------------------------------- 1 | use std::ffi::OsStr; 2 | use tempfile::tempdir; 3 | 4 | // see https://github.com/rust-lang/rust/issues/46379#issuecomment-548787629 5 | pub mod helpers; 6 | use helpers::{get_image, puzzlefs}; 7 | 8 | #[test] 9 | fn build_and_extract_is_noop() -> anyhow::Result<()> { 10 | let dir = tempdir().unwrap(); 11 | let ubuntu = dir.path().join("ubuntu"); 12 | let ubuntu_rootfs = get_image(ubuntu)?; 13 | 14 | // TODO: figure out a better way to do all this osstr stuff... 15 | let oci = dir.path().join("oci"); 16 | let mut oci_arg = oci.into_os_string(); 17 | oci_arg.push(OsStr::new(":test")); 18 | puzzlefs([ 19 | OsStr::new("build"), 20 | ubuntu_rootfs.as_ref(), 21 | oci_arg.as_ref(), 22 | ])?; 23 | 24 | let extracted = dir.path().join("extracted"); 25 | puzzlefs([ 26 | OsStr::new("extract"), 27 | oci_arg.as_os_str(), 28 | extracted.as_os_str(), 29 | ])?; 30 | assert!(!dir_diff::is_different(ubuntu_rootfs, extracted).unwrap()); 31 | Ok(()) 32 | } 33 | -------------------------------------------------------------------------------- /exe/tests/helpers/mod.rs: -------------------------------------------------------------------------------- 1 | use std::ffi::OsStr; 2 | use std::fs; 3 | use std::path::Path; 4 | use std::path::PathBuf; 5 | use std::process::Command; 6 | use std::str; 7 | 8 | use anyhow::bail; 9 | use assert_cmd::cargo::CommandCargoExt; 10 | use std::env; 11 | 12 | pub fn get_image>(to_dir: P) -> anyhow::Result { 13 | let image = "ubuntu"; 14 | let tag = "latest"; 15 | if to_dir.as_ref().exists() { 16 | let rootfs = to_dir.as_ref().join("rootfs"); 17 | if rootfs.exists() { 18 | return Ok(rootfs); 19 | } else { 20 | bail!( 21 | "{:?} exists but does not have a rootfs directory in it", 22 | to_dir.as_ref().display() 23 | ); 24 | } 25 | } 26 | 27 | let mut xdg_data_home_default = env::var("HOME").unwrap(); 28 | xdg_data_home_default.push_str("/.local/share"); 29 | 30 | let mut xdg_data_home = env::var("XDG_DATA_HOME").unwrap_or(xdg_data_home_default.clone()); 31 | if xdg_data_home.is_empty() { 32 | xdg_data_home = xdg_data_home_default; 33 | } 34 | 35 | let puzzlefs_data_path = format!("{xdg_data_home}/puzzlefs"); 36 | fs::create_dir_all(&puzzlefs_data_path)?; 37 | 38 | // skopeo copy docker://ubuntu:latest oci:$HOME/.local/share/puzzlefs/ubuntu:latest 39 | let output = Command::new("skopeo") 40 | .args([ 41 | "copy", 42 | &format!("docker://{image}:{tag}"), 43 | &format!("oci:{puzzlefs_data_path}/{image}:{tag}"), 44 | ]) 45 | .output()?; 46 | if !output.status.success() { 47 | bail!( 48 | "skopeo exited with error:\n{}", 49 | str::from_utf8(&output.stderr)?, 50 | ); 51 | } 52 | 53 | if !output.stdout.is_empty() { 54 | println!( 55 | "skopeo output\n{}", 56 | str::from_utf8(&output.stdout).expect("Script output should not contain non-UTF8") 57 | ); 58 | } 59 | 60 | // umoci unpack --rootless --image ubuntu:latest /tmp/.tmpxyz/ubuntu 61 | let output = Command::new("umoci") 62 | .args([ 63 | OsStr::new("unpack"), 64 | OsStr::new("--rootless"), 65 | OsStr::new("--image"), 66 | OsStr::new(&format!("{puzzlefs_data_path}/{image}:{tag}")), 67 | to_dir.as_ref().as_os_str(), 68 | ]) 69 | .output()?; 70 | if !output.status.success() { 71 | bail!( 72 | "umoci exited with error:\n{}", 73 | str::from_utf8(&output.stderr)?, 74 | ); 75 | } 76 | 77 | if !output.stdout.is_empty() { 78 | println!( 79 | "umoci output\n{}", 80 | str::from_utf8(&output.stdout).expect("Script output should not contain non-UTF8") 81 | ); 82 | } 83 | 84 | Ok(to_dir.as_ref().join("rootfs")) 85 | } 86 | 87 | pub fn puzzlefs(args: I) -> anyhow::Result 88 | where 89 | I: IntoIterator, 90 | S: AsRef, 91 | { 92 | let mut cmd = Command::cargo_bin("puzzlefs").unwrap(); 93 | let output = cmd.args(args).output()?; 94 | if !output.status.success() { 95 | bail!( 96 | "puzzlefs exited with error:\n{}", 97 | str::from_utf8(&output.stderr)?, 98 | ); 99 | } 100 | let output = str::from_utf8(&output.stdout).expect("Script output should not contain non-UTF8"); 101 | Ok(output.to_string()) 102 | } 103 | -------------------------------------------------------------------------------- /exe/tests/setup_verity_device.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | 4 | BACKING_FILE=$(mktemp -u) 5 | touch $BACKING_FILE 6 | dd if=/dev/zero of="$BACKING_FILE" bs=1k count=1024 7 | BLOCKDEV=$(sudo losetup -f --show "$BACKING_FILE") 8 | sudo mkfs -t ext4 -F -b4096 -O verity "$BLOCKDEV" 9 | MOUNTPOINT=$(mktemp -u) 10 | mkdir "$MOUNTPOINT" 11 | sudo mount "$BLOCKDEV" "$MOUNTPOINT" 12 | USER_ID=$(id -u) 13 | GROUP_ID=$(id -g) 14 | sudo chown -R "$USER_ID":"$GROUP_ID" "$MOUNTPOINT" 15 | echo "mounted $BLOCKDEV backed by $BACKING_FILE at $MOUNTPOINT" 16 | -------------------------------------------------------------------------------- /exe/tests/verity.rs: -------------------------------------------------------------------------------- 1 | mod verity_setup; 2 | use std::fs; 3 | use std::io::ErrorKind; 4 | use std::path::Path; 5 | use std::path::PathBuf; 6 | use std::process::Command; 7 | use verity_setup::VeritySetup; 8 | pub mod helpers; 9 | use helpers::puzzlefs; 10 | use std::ffi::OsStr; 11 | use std::fs::OpenOptions; 12 | use walkdir::WalkDir; 13 | 14 | const RANDOM_DIGEST: &str = "99a3d81481ed522712e5a8208024984778ec302971129e3f28b646a354fd27d0"; 15 | 16 | fn fuser_umount(puzzlefs_mountpoint: PathBuf) -> anyhow::Result<()> { 17 | // try fusermount3 18 | let mut cmd = Command::new("fusermount3"); 19 | let status = cmd 20 | .args([ 21 | "-u", 22 | &puzzlefs_mountpoint 23 | .clone() 24 | .into_os_string() 25 | .into_string() 26 | .unwrap(), 27 | ]) 28 | .status(); 29 | 30 | match status { 31 | Err(e) => { 32 | // figure out how to write if not let 33 | if let ErrorKind::NotFound = e.kind() { 34 | } else { 35 | return Err(e.into()); 36 | } 37 | } 38 | Ok(res) => { 39 | assert!(res.success()); 40 | return Ok(()); 41 | } 42 | } 43 | 44 | // try fusermount 45 | let mut cmd = Command::new("fusermount"); 46 | let status = cmd 47 | .args([ 48 | "-u", 49 | &puzzlefs_mountpoint.into_os_string().into_string().unwrap(), 50 | ]) 51 | .status()?; 52 | 53 | assert!(status.success()); 54 | Ok(()) 55 | } 56 | 57 | fn check_tamper(oci_path: &Path) -> anyhow::Result<()> { 58 | for file in WalkDir::new(oci_path.join("blobs").join("sha256")).into_iter() { 59 | let file = file?; 60 | if !file.metadata()?.is_file() { 61 | continue; 62 | } 63 | // we should get permission denied when trying to open blobs for writing 64 | let error = OpenOptions::new() 65 | .write(true) 66 | .open(file.path()) 67 | .unwrap_err(); 68 | if let ErrorKind::PermissionDenied = error.kind() { 69 | } else { 70 | return Err(error.into()); 71 | } 72 | } 73 | Ok(()) 74 | } 75 | 76 | #[test] 77 | fn test_fs_verity() -> anyhow::Result<()> { 78 | let v = VeritySetup::new()?; 79 | 80 | let mount_path = Path::new(&v.mountpoint); 81 | let rootfs = Path::new("../puzzlefs-lib/src/builder/test/test-1/"); 82 | 83 | let oci = mount_path.join("oci"); 84 | let mut oci_arg = oci.clone().into_os_string(); 85 | oci_arg.push(OsStr::new(":test")); 86 | let output = puzzlefs([OsStr::new("build"), rootfs.as_ref(), oci_arg.as_ref()])?; 87 | 88 | let tokens = output.split_whitespace().collect::>(); 89 | 90 | let digest = tokens 91 | .last() 92 | .expect("puzzlefs build should have returned the puzzlefs image manifest digest"); 93 | 94 | // 32 bytes in SHA256, each represented by 2 hex digits 95 | assert_eq!(digest.len(), 32 * 2); 96 | 97 | println!("digest: {digest}"); 98 | 99 | puzzlefs([ 100 | OsStr::new("enable-fs-verity"), 101 | oci_arg.as_ref(), 102 | OsStr::new(digest), 103 | ])?; 104 | 105 | check_tamper(&oci)?; 106 | 107 | let puzzlefs_mountpoint = mount_path.join("mount"); 108 | fs::create_dir_all(&puzzlefs_mountpoint)?; 109 | 110 | // test that we can't mount with the wrong digest 111 | let mount_output = puzzlefs([ 112 | OsStr::new("mount"), 113 | // foreground mode because background mode hangs on errors 114 | OsStr::new("-f"), 115 | OsStr::new("-d"), 116 | OsStr::new(RANDOM_DIGEST), 117 | oci_arg.as_ref(), 118 | OsStr::new(&puzzlefs_mountpoint), 119 | ]); 120 | 121 | assert!(mount_output 122 | .unwrap_err() 123 | .to_string() 124 | .contains("invalid fs_verity data: fsverity mismatch")); 125 | 126 | // test that we can mount with the right digest 127 | puzzlefs([ 128 | OsStr::new("mount"), 129 | OsStr::new("-d"), 130 | OsStr::new(digest), 131 | oci_arg.as_ref(), 132 | OsStr::new(&puzzlefs_mountpoint), 133 | ])?; 134 | 135 | fuser_umount(puzzlefs_mountpoint)?; 136 | 137 | Ok(()) 138 | } 139 | -------------------------------------------------------------------------------- /exe/tests/verity_setup/mod.rs: -------------------------------------------------------------------------------- 1 | use anyhow::bail; 2 | use std::process::Command; 3 | use std::str; 4 | 5 | #[derive(Debug)] 6 | pub struct VeritySetup { 7 | pub mountpoint: String, 8 | lo_device: String, 9 | backing_file: String, 10 | } 11 | 12 | impl VeritySetup { 13 | pub fn new() -> anyhow::Result { 14 | let output = Command::new("tests/setup_verity_device.sh").output()?; 15 | 16 | if !output.status.success() { 17 | bail!("tests/setup_fs_verity_device.sh failed!"); 18 | } 19 | 20 | let output = 21 | str::from_utf8(&output.stdout).expect("Script output should not contain non-UTF8"); 22 | let tokens; 23 | 24 | println!("output: {output}"); 25 | 26 | for line in output.lines() { 27 | if line.starts_with("mounted ") { 28 | tokens = line.split_whitespace().collect::>(); 29 | // the script outputs something like: 30 | // mounted /dev/loop1 backed by /tmp/tmp.ACBmpxbuul at /tmp/tmp.pU1MTG0K70 31 | let setup = VeritySetup { 32 | lo_device: String::from(tokens[1]), 33 | backing_file: String::from(tokens[4]), 34 | mountpoint: String::from(tokens[6]), 35 | }; 36 | return Ok(setup); 37 | } 38 | } 39 | bail!("Didn't find backing_file, lo_device and mountpoint in script output") 40 | } 41 | } 42 | 43 | impl Drop for VeritySetup { 44 | fn drop(&mut self) { 45 | let status = Command::new("tests/cleanup_verity_device.sh") 46 | .arg(&self.mountpoint) 47 | .arg(&self.lo_device) 48 | .arg(&self.backing_file) 49 | .status(); 50 | 51 | if let Err(e) = status { 52 | println!("Could not cleanup the verity setup {e}"); 53 | } 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /puzzlefs-lib/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "puzzlefs-lib" 3 | version = "0.2.0" 4 | authors = ["Tycho Andersen ", "Ariel Miculas "] 5 | description = """ 6 | Build, mount and extract PuzzleFS images. 7 | """ 8 | documentation = "http://docs.rs/puzzlefs-lib" 9 | homepage = "https://github.com/project-machine/puzzlefs" 10 | repository = "https://github.com/project-machine/puzzlefs" 11 | keywords = ["fuse", "filesystem", "container", "cdc"] 12 | categories = ["filesystem"] 13 | license = "Apache-2.0" 14 | edition = "2021" 15 | 16 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 17 | 18 | [build-dependencies] 19 | capnpc = "0.19" 20 | 21 | [dependencies] 22 | anyhow = "1.0.75" 23 | nix = { version = "0.27.1", features = ["user", "fs"] } 24 | xattr = "1.3.0" 25 | log = "0.4.17" 26 | zstd = "0.13.1" 27 | serde = { version = "1.0.27", features = [ "derive" ] } 28 | serde_json = "1.0.106" 29 | thiserror = "1.0.46" 30 | hex = "0.4.3" 31 | memmap2 = "0.9.4" 32 | capnp = "0.19" 33 | fs-verity = "0.2.0" 34 | sha2 = "0.10.8" 35 | walkdir = "2" 36 | # Fastcdc breaks semver and version 3.1 is not backwards compatible with 3.0 37 | fastcdc = "=3.0.0" 38 | fuser = {version = "0.14", default-features = false} 39 | os_pipe = "1.1.2" 40 | tempfile = "3.10" 41 | openat = "0.1.21" 42 | zstd-seekable = "0.1.23" 43 | ocidir = {git="https://github.com/containers/ocidir-rs"} 44 | cap-std = "3.2.0" 45 | 46 | 47 | [dev-dependencies] 48 | tempfile = "3.10" 49 | anyhow = "1.0.75" 50 | walkdir = "2" 51 | serde = "1.0.27" 52 | sha2 = "0.10.6" 53 | hex = "0.4.3" 54 | xattr = "1.3.0" 55 | -------------------------------------------------------------------------------- /puzzlefs-lib/build.rs: -------------------------------------------------------------------------------- 1 | fn main() { 2 | ::capnpc::CompilerCommand::new() 3 | .src_prefix("src/format") 4 | .file("src/format/metadata.capnp") 5 | .run() 6 | .expect("compiling metadata schema"); 7 | } 8 | -------------------------------------------------------------------------------- /puzzlefs-lib/src/builder.rs: -------------------------------------------------------------------------------- 1 | use crate::common::{AVG_CHUNK_SIZE, MAX_CHUNK_SIZE, MIN_CHUNK_SIZE}; 2 | use crate::compression::{Compression, Noop, Zstd}; 3 | use crate::fsverity_helpers::{ 4 | check_fs_verity, fsverity_enable, InnerHashAlgorithm, FS_VERITY_BLOCK_SIZE_DEFAULT, 5 | }; 6 | use crate::oci::Digest; 7 | use std::any::Any; 8 | use std::backtrace::Backtrace; 9 | use std::cmp::min; 10 | use std::collections::{BTreeMap, HashMap}; 11 | use std::ffi::{OsStr, OsString}; 12 | use std::fs; 13 | use std::io; 14 | use std::os::fd::AsRawFd; 15 | use std::os::unix::ffi::{OsStrExt, OsStringExt}; 16 | use std::os::unix::fs::MetadataExt; 17 | use std::path::Path; 18 | use std::sync::Arc; 19 | 20 | use walkdir::WalkDir; 21 | 22 | use crate::format::{ 23 | BlobRef, DirEnt, DirList, FileChunk, FileChunkList, Ino, Inode, InodeAdditional, InodeMode, 24 | Result, Rootfs, VerityData, WireFormatError, 25 | }; 26 | use crate::metadata_capnp; 27 | use crate::oci::media_types; 28 | use crate::oci::{Descriptor, Image}; 29 | use crate::reader::{PuzzleFS, PUZZLEFS_IMAGE_MANIFEST_VERSION}; 30 | use ocidir::oci_spec::image::{ImageManifest, Platform}; 31 | 32 | use nix::errno::Errno; 33 | 34 | use fastcdc::v2020::StreamCDC; 35 | mod filesystem; 36 | use filesystem::FilesystemStream; 37 | 38 | fn walker(rootfs: &Path) -> WalkDir { 39 | // breadth first search for sharing, don't cross filesystems just to be safe, order by file 40 | // name. we only return directories here, so we can more easily do delta generation to detect 41 | // what's missing in an existing puzzlefs. 42 | WalkDir::new(rootfs) 43 | .contents_first(false) 44 | .follow_links(false) 45 | .same_file_system(true) 46 | .sort_by(|a, b| a.file_name().cmp(b.file_name())) 47 | } 48 | 49 | // a struct to hold a directory's information before it can be rendered into a InodeSpecific::Dir 50 | // (aka the offset is unknown because we haven't accumulated all the inodes yet) 51 | struct Dir { 52 | ino: u64, 53 | dir_list: DirList, 54 | md: fs::Metadata, 55 | additional: Option, 56 | } 57 | 58 | impl Dir { 59 | fn add_entry(&mut self, name: OsString, ino: Ino) { 60 | self.dir_list.entries.push(DirEnt { 61 | name: OsString::into_vec(name), 62 | ino, 63 | }); 64 | } 65 | } 66 | 67 | // similar to the above, but holding file metadata 68 | struct File { 69 | ino: u64, 70 | chunk_list: FileChunkList, 71 | md: fs::Metadata, 72 | additional: Option, 73 | } 74 | 75 | struct Other { 76 | ino: u64, 77 | md: fs::Metadata, 78 | additional: Option, 79 | } 80 | 81 | fn serialize_metadata(rootfs: Rootfs) -> Result> { 82 | let mut message = ::capnp::message::Builder::new_default(); 83 | let mut capnp_rootfs = message.init_root::>(); 84 | 85 | rootfs.fill_capnp(&mut capnp_rootfs)?; 86 | 87 | let mut buf = Vec::new(); 88 | ::capnp::serialize::write_message(&mut buf, &message)?; 89 | Ok(buf) 90 | } 91 | 92 | fn process_chunks( 93 | oci: &Image, 94 | mut chunker: StreamCDC, 95 | files: &mut [File], 96 | verity_data: &mut VerityData, 97 | image_manifest: &mut ImageManifest, 98 | ) -> Result<()> { 99 | let mut file_iter = files.iter_mut(); 100 | let mut file_used = 0; 101 | let mut file = None; 102 | for f in file_iter.by_ref() { 103 | if f.md.size() > 0 { 104 | file = Some(f); 105 | break; 106 | } 107 | } 108 | 109 | 'outer: for result in &mut chunker { 110 | let chunk = result.unwrap(); 111 | let mut chunk_used: u64 = 0; 112 | 113 | let (desc, fs_verity_digest, compressed) = 114 | oci.put_blob::(&chunk.data, image_manifest, media_types::Chunk {})?; 115 | let digest = Digest::try_from(desc.digest().digest())?.underlying(); 116 | 117 | let verity_hash = fs_verity_digest; 118 | verity_data.insert(digest, verity_hash); 119 | 120 | while chunk_used < chunk.length as u64 { 121 | let room = min( 122 | file.as_ref().unwrap().md.len() - file_used, 123 | chunk.length as u64 - chunk_used, 124 | ); 125 | 126 | let blob = BlobRef { 127 | offset: chunk_used, 128 | digest, 129 | compressed, 130 | }; 131 | 132 | file.as_mut() 133 | .unwrap() 134 | .chunk_list 135 | .chunks 136 | .push(FileChunk { blob, len: room }); 137 | 138 | chunk_used += room; 139 | file_used += room; 140 | 141 | // get next file 142 | if file_used == file.as_ref().unwrap().md.len() { 143 | file_used = 0; 144 | file = None; 145 | 146 | for f in file_iter.by_ref() { 147 | if f.md.size() > 0 { 148 | file = Some(f); 149 | break; 150 | } 151 | } 152 | 153 | if file.is_none() { 154 | break 'outer; 155 | } 156 | } 157 | } 158 | } 159 | 160 | // If there are no files left we also expect there are no chunks left 161 | assert!(chunker.next().is_none()); 162 | 163 | Ok(()) 164 | } 165 | 166 | fn build_delta( 167 | rootfs: &Path, 168 | oci: &Image, 169 | mut existing: Option, 170 | verity_data: &mut VerityData, 171 | image_manifest: &mut ImageManifest, 172 | ) -> Result> { 173 | let mut dirs = HashMap::::new(); 174 | let mut files = Vec::::new(); 175 | let mut others = Vec::::new(); 176 | let mut pfs_inodes = Vec::::new(); 177 | let mut fs_stream = FilesystemStream::new(); 178 | 179 | // host to puzzlefs inode mapping for hard link deteciton 180 | let mut host_to_pfs = HashMap::::new(); 181 | 182 | let mut next_ino: u64 = existing 183 | .as_mut() 184 | .map(|pfs| pfs.max_inode().map(|i| i + 1)) 185 | .unwrap_or_else(|| Ok(2))?; 186 | 187 | fn lookup_existing(existing: &mut Option, p: &Path) -> Result> { 188 | existing 189 | .as_mut() 190 | .map(|pfs| pfs.lookup(p)) 191 | .transpose() 192 | .map(|o| o.flatten()) 193 | } 194 | 195 | let rootfs_dirs = walker(rootfs) 196 | .into_iter() 197 | .filter_entry(|de| de.metadata().map(|md| md.is_dir()).unwrap_or(true)); 198 | 199 | // we specially create the "/" InodeMode::Dir object, since we will not iterate over it as a 200 | // child of some other directory 201 | let root_metadata = fs::symlink_metadata(rootfs)?; 202 | let root_additional = InodeAdditional::new(rootfs, &root_metadata)?; 203 | dirs.insert( 204 | root_metadata.ino(), 205 | Dir { 206 | ino: 1, 207 | md: root_metadata, 208 | dir_list: DirList { 209 | entries: Vec::::new(), 210 | look_below: false, 211 | }, 212 | additional: root_additional, 213 | }, 214 | ); 215 | 216 | let rootfs_relative = |p: &Path| { 217 | // .unwrap() here because we assume no programmer errors in this function (i.e. it is a 218 | // puzzlefs bug here) 219 | Path::new("/").join(p.strip_prefix(rootfs).unwrap()) 220 | }; 221 | 222 | for dir in rootfs_dirs { 223 | let d = dir.map_err(io::Error::from)?; 224 | let dir_path = rootfs_relative(d.path()); 225 | let existing_dirents: Vec<_> = lookup_existing(&mut existing, &dir_path)? 226 | .and_then(|ex| -> Option> { 227 | if let InodeMode::Dir { dir_list } = ex.mode { 228 | Some(dir_list.entries) 229 | } else { 230 | None 231 | } 232 | }) 233 | .unwrap_or_default(); 234 | 235 | let mut new_dirents = fs::read_dir(d.path())?.collect::>>()?; 236 | // sort the entries so we have reproducible puzzlefs images 237 | new_dirents.sort_by_key(|a| a.file_name()); 238 | 239 | // add whiteout information 240 | let this_metadata = fs::symlink_metadata(d.path())?; 241 | let this_dir = dirs 242 | .get_mut(&this_metadata.ino()) 243 | .ok_or_else(|| WireFormatError::from_errno(Errno::ENOENT))?; 244 | for dir_ent in existing_dirents { 245 | if !(new_dirents).iter().any(|new| { 246 | new.path().file_name().unwrap_or_else(|| OsStr::new("")) 247 | == OsStr::from_bytes(&dir_ent.name) 248 | }) { 249 | pfs_inodes.push(Inode::new_whiteout(dir_ent.ino)); 250 | this_dir.add_entry(OsString::from_vec(dir_ent.name), dir_ent.ino); 251 | } 252 | } 253 | 254 | for e in new_dirents { 255 | let md = e.metadata()?; 256 | 257 | let existing_inode = existing 258 | .as_mut() 259 | .map(|pfs| { 260 | let puzzlefs_path = rootfs_relative(&e.path()); 261 | pfs.lookup(&puzzlefs_path) 262 | }) 263 | .transpose()? 264 | .flatten(); 265 | 266 | let cur_ino = existing_inode.map(|ex| ex.ino).unwrap_or_else(|| { 267 | let next = next_ino; 268 | next_ino += 1; 269 | next 270 | }); 271 | 272 | // now that we know the ino of this thing, let's put it in the parent directory (assuming 273 | // this is not "/" for our image, aka inode #1) 274 | if cur_ino != 1 { 275 | // is this a hard link? if so, just use the existing ino we have rendered. otherewise, 276 | // use a new one 277 | let the_ino = host_to_pfs.get(&md.ino()).copied().unwrap_or(cur_ino); 278 | let parent_path = e.path().parent().map(|p| p.to_path_buf()).ok_or_else(|| { 279 | io::Error::new( 280 | io::ErrorKind::Other, 281 | format!("no parent for {}", e.path().display()), 282 | ) 283 | })?; 284 | let parent = dirs 285 | .get_mut(&fs::symlink_metadata(parent_path)?.ino()) 286 | .ok_or_else(|| { 287 | io::Error::new( 288 | io::ErrorKind::Other, 289 | format!("no pfs inode for {}", e.path().display()), 290 | ) 291 | })?; 292 | parent.add_entry( 293 | e.path() 294 | .file_name() 295 | .unwrap_or_else(|| OsStr::new("")) 296 | .to_os_string(), 297 | the_ino, 298 | ); 299 | 300 | // if it was a hard link, we don't need to actually render it again 301 | if host_to_pfs.contains_key(&md.ino()) { 302 | continue; 303 | } 304 | } 305 | 306 | host_to_pfs.insert(md.ino(), cur_ino); 307 | 308 | // render as much of the inode as we can 309 | // TODO: here are a bunch of optimizations we should do: no need to re-render things 310 | // that are the same (whole inodes, metadata, etc.). For now we just re-render the 311 | // whole metadata tree. 312 | let additional = InodeAdditional::new(&e.path(), &md)?; 313 | 314 | if md.is_dir() { 315 | dirs.insert( 316 | md.ino(), 317 | Dir { 318 | ino: cur_ino, 319 | md, 320 | dir_list: DirList { 321 | entries: Vec::::new(), 322 | look_below: false, 323 | }, 324 | additional, 325 | }, 326 | ); 327 | } else if md.is_file() { 328 | fs_stream.push(&e.path()); 329 | 330 | let file = File { 331 | ino: cur_ino, 332 | md, 333 | chunk_list: FileChunkList { 334 | chunks: Vec::::new(), 335 | }, 336 | additional, 337 | }; 338 | 339 | files.push(file); 340 | } else { 341 | let o = Other { 342 | ino: cur_ino, 343 | md, 344 | additional, 345 | }; 346 | others.push(o); 347 | } 348 | } 349 | } 350 | 351 | let fcdc = StreamCDC::new( 352 | Box::new(fs_stream), 353 | MIN_CHUNK_SIZE, 354 | AVG_CHUNK_SIZE, 355 | MAX_CHUNK_SIZE, 356 | ); 357 | process_chunks::(oci, fcdc, &mut files, verity_data, image_manifest)?; 358 | 359 | // TODO: not render this whole thing in memory, stick it all in the same blob, etc. 360 | let mut sorted_dirs = dirs.into_values().collect::>(); 361 | 362 | // render dirs 363 | pfs_inodes.extend( 364 | sorted_dirs 365 | .drain(..) 366 | .map(|d| Ok(Inode::new_dir(d.ino, &d.md, d.dir_list, d.additional)?)) 367 | .collect::>>()?, 368 | ); 369 | 370 | // render files 371 | pfs_inodes.extend( 372 | files 373 | .drain(..) 374 | .map(|f| { 375 | Ok(Inode::new_file( 376 | f.ino, 377 | &f.md, 378 | f.chunk_list.chunks, 379 | f.additional, 380 | )?) 381 | }) 382 | .collect::>>()?, 383 | ); 384 | 385 | pfs_inodes.extend( 386 | others 387 | .drain(..) 388 | .map(|o| Ok(Inode::new_other(o.ino, &o.md, o.additional)?)) 389 | .collect::>>()?, 390 | ); 391 | 392 | pfs_inodes.sort_by(|a, b| a.ino.cmp(&b.ino)); 393 | 394 | Ok(pfs_inodes) 395 | } 396 | 397 | pub fn build_initial_rootfs( 398 | rootfs: &Path, 399 | oci: &Image, 400 | tag: &str, 401 | ) -> Result { 402 | let mut verity_data: VerityData = BTreeMap::new(); 403 | let mut image_manifest = oci.get_empty_manifest()?; 404 | let inodes = build_delta::(rootfs, oci, None, &mut verity_data, &mut image_manifest)?; 405 | 406 | let rootfs_buf = serialize_metadata(Rootfs { 407 | metadatas: vec![inodes], 408 | fs_verity_data: verity_data, 409 | manifest_version: PUZZLEFS_IMAGE_MANIFEST_VERSION, 410 | })?; 411 | 412 | let rootfs_descriptor = oci 413 | .put_blob::( 414 | rootfs_buf.as_slice(), 415 | &mut image_manifest, 416 | media_types::Rootfs {}, 417 | )? 418 | .0; 419 | oci.0 420 | .insert_manifest(image_manifest, Some(tag), Platform::default())?; 421 | 422 | Ok(rootfs_descriptor) 423 | } 424 | 425 | // add_rootfs_delta adds whatever the delta between the current rootfs and the puzzlefs 426 | // representation from the tag is. 427 | pub fn add_rootfs_delta( 428 | rootfs_path: &Path, 429 | oci: Image, 430 | tag: &str, 431 | base_layer: &str, 432 | ) -> Result<(Descriptor, Arc)> { 433 | let mut verity_data: VerityData = BTreeMap::new(); 434 | let mut image_manifest = oci.get_empty_manifest()?; 435 | 436 | let pfs = PuzzleFS::open(oci, base_layer, None)?; 437 | let oci = Arc::clone(&pfs.oci); 438 | let mut rootfs = Rootfs::try_from(oci.open_rootfs_blob(base_layer, None)?)?; 439 | 440 | let inodes = build_delta::( 441 | rootfs_path, 442 | &oci, 443 | Some(pfs), 444 | &mut verity_data, 445 | &mut image_manifest, 446 | )?; 447 | 448 | if !rootfs.metadatas.iter().any(|x| *x == inodes) { 449 | rootfs.metadatas.insert(0, inodes); 450 | } 451 | 452 | rootfs.fs_verity_data.extend(verity_data); 453 | let rootfs_buf = serialize_metadata(rootfs)?; 454 | let rootfs_descriptor = oci 455 | .put_blob::( 456 | rootfs_buf.as_slice(), 457 | &mut image_manifest, 458 | media_types::Rootfs {}, 459 | )? 460 | .0; 461 | oci.0 462 | .insert_manifest(image_manifest, Some(tag), Platform::default())?; 463 | Ok((rootfs_descriptor, oci)) 464 | } 465 | 466 | fn enable_verity_for_file(file: &cap_std::fs::File) -> Result<()> { 467 | if let Err(e) = fsverity_enable( 468 | file.as_raw_fd(), 469 | FS_VERITY_BLOCK_SIZE_DEFAULT, 470 | InnerHashAlgorithm::Sha256, 471 | &[], 472 | ) { 473 | // if fsverity is enabled, ignore the error 474 | if e.kind() != std::io::ErrorKind::AlreadyExists { 475 | return Err(WireFormatError::from(e)); 476 | } 477 | } 478 | Ok(()) 479 | } 480 | 481 | fn enable_and_check_verity_for_file(file: &cap_std::fs::File, expected: &[u8]) -> Result<()> { 482 | enable_verity_for_file(file)?; 483 | check_fs_verity(file, expected) 484 | } 485 | 486 | pub fn enable_fs_verity(oci: Image, tag: &str, manifest_root_hash: &str) -> Result<()> { 487 | // first enable fs verity for the puzzlefs image manifest 488 | let manifest_fd = oci.get_image_manifest_fd(tag)?; 489 | enable_and_check_verity_for_file(&manifest_fd, &hex::decode(manifest_root_hash)?[..])?; 490 | 491 | let pfs = PuzzleFS::open(oci, tag, None)?; 492 | let oci = Arc::clone(&pfs.oci); 493 | let rootfs = oci.open_rootfs_blob(tag, None)?; 494 | 495 | let rootfs_fd = oci.get_pfs_rootfs(tag, None)?; 496 | let rootfs_verity = oci.get_pfs_rootfs_verity(tag)?; 497 | 498 | enable_and_check_verity_for_file(&rootfs_fd, &rootfs_verity[..])?; 499 | 500 | let manifest = oci 501 | .0 502 | .find_manifest_with_tag(tag)? 503 | .ok_or_else(|| WireFormatError::MissingManifest(tag.to_string(), Backtrace::capture()))?; 504 | let config_digest = manifest.config().digest().digest(); 505 | let config_digest_path = Image::blob_path().join(config_digest); 506 | enable_verity_for_file(&oci.0.dir().open(config_digest_path)?)?; 507 | 508 | for (content_addressed_file, verity_hash) in rootfs.get_verity_data()? { 509 | let file_path = Image::blob_path().join(Digest::new(&content_addressed_file).to_string()); 510 | let fd = oci.0.dir().open(&file_path)?; 511 | if let Err(e) = fsverity_enable( 512 | fd.as_raw_fd(), 513 | FS_VERITY_BLOCK_SIZE_DEFAULT, 514 | InnerHashAlgorithm::Sha256, 515 | &[], 516 | ) { 517 | // if fsverity is enabled, ignore the error 518 | if e.kind() != std::io::ErrorKind::AlreadyExists { 519 | return Err(WireFormatError::from(e)); 520 | } 521 | } 522 | check_fs_verity(&fd, &verity_hash)?; 523 | } 524 | 525 | Ok(()) 526 | } 527 | 528 | // TODO: figure out how to guard this with #[cfg(test)] 529 | pub fn build_test_fs(path: &Path, image: &Image, tag: &str) -> Result { 530 | build_initial_rootfs::(path, image, tag) 531 | } 532 | 533 | #[cfg(test)] 534 | pub mod tests { 535 | use super::*; 536 | 537 | use tempfile::tempdir; 538 | 539 | use crate::reader::WalkPuzzleFS; 540 | use cap_std::fs::MetadataExt; 541 | use std::path::PathBuf; 542 | use tempfile::TempDir; 543 | 544 | type DefaultCompression = Zstd; 545 | 546 | #[test] 547 | fn test_fs_generation() -> anyhow::Result<()> { 548 | // TODO: verify the hash value here since it's only one thing? problem is as we change the 549 | // encoding/add stuff to it, the hash will keep changing and we'll have to update the 550 | // test... 551 | // 552 | // but once all that's stabalized, we should verify the metadata hash too. 553 | let dir = tempdir().unwrap(); 554 | let image = Image::new(dir.path()).unwrap(); 555 | build_test_fs(Path::new("src/builder/test/test-1"), &image, "test-tag").unwrap(); 556 | let rootfs = image.open_rootfs_blob("test-tag", None).unwrap(); 557 | 558 | // there should be a blob that matches the hash of the test data, since it all gets input 559 | // as one chunk and there's only one file 560 | const FILE_DIGEST: &str = 561 | "3eee1082ab3babf6c1595f1069d11ebc2a60135890a11e402e017ddd831a220d"; 562 | 563 | let md = image 564 | .0 565 | .dir() 566 | .symlink_metadata(Image::blob_path().join(FILE_DIGEST)) 567 | .unwrap(); 568 | assert!(md.is_file()); 569 | 570 | let mut decompressor = image 571 | .open_compressed_blob::( 572 | &Digest::try_from(FILE_DIGEST).unwrap(), 573 | None, 574 | ) 575 | .unwrap(); 576 | 577 | let mut inodes = Vec::new(); 578 | 579 | // we can at least deserialize inodes and they look sane 580 | for i in 0..2 { 581 | inodes.push(rootfs.find_inode(i + 1)?); 582 | } 583 | 584 | assert_eq!(inodes[0].ino, 1); 585 | if let InodeMode::Dir { ref dir_list } = inodes[0].mode { 586 | assert_eq!(dir_list.entries.len(), 1); 587 | assert_eq!(dir_list.entries[0].ino, 2); 588 | assert_eq!(dir_list.entries[0].name, b"SekienAkashita.jpg"); 589 | } else { 590 | panic!("bad inode mode: {:?}", inodes[0].mode); 591 | } 592 | assert_eq!(inodes[0].uid, md.uid()); 593 | assert_eq!(inodes[0].gid, md.gid()); 594 | 595 | assert_eq!(inodes[1].ino, 2); 596 | assert_eq!(inodes[1].uid, md.uid()); 597 | assert_eq!(inodes[1].gid, md.gid()); 598 | if let InodeMode::File { ref chunks } = inodes[1].mode { 599 | assert_eq!(chunks.len(), 1); 600 | assert_eq!( 601 | chunks[0].len, 602 | decompressor.get_uncompressed_length().unwrap() 603 | ); 604 | } else { 605 | panic!("bad inode mode: {:?}", inodes[1].mode); 606 | }; 607 | image.0.fsck()?; 608 | Ok::<(), anyhow::Error>(()) 609 | } 610 | 611 | #[test] 612 | fn test_delta_generation() -> anyhow::Result<()> { 613 | let dir = tempdir().unwrap(); 614 | let image = Image::new(dir.path()).unwrap(); 615 | let tag = "test"; 616 | build_test_fs(Path::new("src/builder/test/test-1"), &image, tag).unwrap(); 617 | 618 | let delta_dir = dir.path().join(Path::new("delta")); 619 | fs::create_dir_all(delta_dir.join(Path::new("foo"))).unwrap(); 620 | fs::copy( 621 | Path::new("src/builder/test/test-1/SekienAkashita.jpg"), 622 | delta_dir.join("SekienAkashita.jpg"), 623 | ) 624 | .unwrap(); 625 | image.0.fsck()?; 626 | 627 | let new_tag = "test2"; 628 | let (_desc, image) = 629 | add_rootfs_delta::(&delta_dir, image, new_tag, tag).unwrap(); 630 | let delta = Rootfs::try_from(image.open_rootfs_blob(new_tag, None).unwrap()).unwrap(); 631 | assert_eq!(delta.metadatas.len(), 2); 632 | 633 | let image = Image::new(dir.path()).unwrap(); 634 | image.0.fsck()?; 635 | let mut pfs = PuzzleFS::open(image, new_tag, None).unwrap(); 636 | assert_eq!(pfs.max_inode().unwrap(), 3); 637 | let mut walker = WalkPuzzleFS::walk(&mut pfs).unwrap(); 638 | 639 | let root = walker.next().unwrap().unwrap(); 640 | assert_eq!(root.path.to_string_lossy(), "/"); 641 | assert_eq!(root.inode.ino, 1); 642 | assert_eq!(root.inode.dir_entries().unwrap().len(), 2); 643 | 644 | let jpg_file = walker.next().unwrap().unwrap(); 645 | assert_eq!(jpg_file.path.to_string_lossy(), "/SekienAkashita.jpg"); 646 | assert_eq!(jpg_file.inode.ino, 2); 647 | assert_eq!(jpg_file.inode.file_len().unwrap(), 109466); 648 | 649 | let foo_dir = walker.next().unwrap().unwrap(); 650 | assert_eq!(foo_dir.path.to_string_lossy(), "/foo"); 651 | assert_eq!(foo_dir.inode.ino, 3); 652 | assert_eq!(foo_dir.inode.dir_entries().unwrap().len(), 0); 653 | 654 | assert!(walker.next().is_none()); 655 | Ok(()) 656 | } 657 | 658 | fn do_vecs_match(a: &[T], b: &[T]) -> bool { 659 | if a.len() != b.len() { 660 | return false; 661 | } 662 | 663 | let matching = a.iter().zip(b.iter()).filter(|&(a, b)| a == b).count(); 664 | matching == a.len() 665 | } 666 | 667 | fn get_image_blobs() -> Vec { 668 | WalkDir::new(Image::blob_path()) 669 | .contents_first(false) 670 | .follow_links(false) 671 | .same_file_system(true) 672 | .sort_by(|a, b| a.file_name().cmp(b.file_name())) 673 | .into_iter() 674 | .skip(1) 675 | .map(|x| OsString::from(x.unwrap().path().file_stem().unwrap())) 676 | .collect::>() 677 | } 678 | 679 | // given the same directory, test whether building it multiple times results in the same puzzlefs image 680 | fn same_dir_reproducible(path: &Path) -> bool { 681 | let dirs: [_; 10] = std::array::from_fn(|_| tempdir().unwrap()); 682 | let mut sha_suite = Vec::new(); 683 | let images = dirs 684 | .iter() 685 | .map(|dir| Image::new(dir.path()).unwrap()) 686 | .collect::>(); 687 | 688 | for (i, image) in images.iter().enumerate() { 689 | build_test_fs(path, image, "test").unwrap(); 690 | let ents = get_image_blobs(); 691 | sha_suite.push(ents); 692 | 693 | if i != 0 && !do_vecs_match(&sha_suite[i - 1], &sha_suite[i]) { 694 | println!("not matching at iteration: {i}"); 695 | return false; 696 | } 697 | } 698 | 699 | true 700 | } 701 | 702 | // given the same directory contents, test whether building them from multiple paths results in the same puzzlefs image 703 | fn same_dir_contents_reproducible(path: &[PathBuf]) -> bool { 704 | let dirs = path.iter().map(|_| tempdir().unwrap()).collect::>(); 705 | let mut sha_suite = Vec::new(); 706 | let images = dirs 707 | .iter() 708 | .map(|dir| Image::new(dir.path()).unwrap()) 709 | .collect::>(); 710 | 711 | for (i, image) in images.iter().enumerate() { 712 | build_test_fs(&path[i], image, "test").unwrap(); 713 | let ents = get_image_blobs(); 714 | sha_suite.push(ents); 715 | 716 | if i != 0 && !do_vecs_match(&sha_suite[i - 1], &sha_suite[i]) { 717 | println!("not matching at iteration: {i}"); 718 | return false; 719 | } 720 | } 721 | 722 | true 723 | } 724 | 725 | #[test] 726 | fn test_reproducibility() { 727 | fn build_dummy_fs(dir: &Path) -> PathBuf { 728 | let rootfs = dir.join("rootfs"); 729 | let subdirs = ["foo", "bar", "baz"]; 730 | let files = ["foo_file", "bar_file", "baz_file"]; 731 | 732 | for subdir in subdirs { 733 | let path = rootfs.join(subdir); 734 | fs::create_dir_all(path).unwrap(); 735 | } 736 | 737 | for file in files { 738 | let path = rootfs.join(file); 739 | fs::write(path, b"some file contents").unwrap(); 740 | } 741 | 742 | rootfs 743 | } 744 | 745 | let dir = tempdir().unwrap(); 746 | let rootfs = build_dummy_fs(dir.path()); 747 | 748 | assert!( 749 | same_dir_reproducible(&rootfs), 750 | "build not reproducible for {}", 751 | rootfs.display() 752 | ); 753 | 754 | let dirs: [_; 10] = std::array::from_fn(|i| match i % 2 == 0 { 755 | // if /tmp and the current dir reside on different filesystems there are better chances 756 | // for read_dir (which uses readdir under the hood) to yield a different order of the files 757 | true => tempdir().unwrap(), 758 | false => TempDir::new_in(".").unwrap(), 759 | }); 760 | let rootfses = dirs 761 | .iter() 762 | .map(|dir| build_dummy_fs(dir.path())) 763 | .collect::>(); 764 | 765 | assert!( 766 | same_dir_contents_reproducible(&rootfses), 767 | "build not reproducible" 768 | ); 769 | } 770 | } 771 | -------------------------------------------------------------------------------- /puzzlefs-lib/src/builder/filesystem.rs: -------------------------------------------------------------------------------- 1 | use std::io; 2 | use std::io::Read; 3 | use std::path::{Path, PathBuf}; 4 | 5 | struct ReaderLink { 6 | file: PathBuf, 7 | done: bool, 8 | } 9 | 10 | /// A structure used to chain multiple readers, similar to 11 | /// [chain](https://doc.rust-lang.org/std/io/trait.Read.html#method.chain) 12 | /// and [multi_reader](https://docs.rs/multi_reader/latest/multi_reader/) 13 | pub struct FilesystemStream { 14 | reader_chain: Vec, 15 | current_reader: Option, 16 | } 17 | 18 | impl FilesystemStream { 19 | pub fn new() -> Self { 20 | FilesystemStream { 21 | reader_chain: Vec::new(), 22 | current_reader: None, 23 | } 24 | } 25 | 26 | pub fn push(&mut self, file: &Path) { 27 | self.reader_chain.push(ReaderLink { 28 | file: file.into(), 29 | done: false, 30 | }) 31 | } 32 | } 33 | 34 | impl Read for FilesystemStream { 35 | fn read(&mut self, buf: &mut [u8]) -> io::Result { 36 | for link in &mut self.reader_chain { 37 | if link.done { 38 | continue; 39 | } 40 | 41 | let current_reader = match self.current_reader.as_mut() { 42 | Some(reader) => reader, 43 | None => self.current_reader.insert(std::fs::File::open(&link.file)?), 44 | }; 45 | 46 | match current_reader.read(buf)? { 47 | 0 if !buf.is_empty() => { 48 | self.current_reader = None; 49 | link.done = true 50 | } 51 | n => return Ok(n), 52 | } 53 | } 54 | Ok(0) 55 | } 56 | } 57 | 58 | #[cfg(test)] 59 | pub mod tests { 60 | use super::*; 61 | 62 | use std::fs::File; 63 | use std::io::Write; 64 | use tempfile::tempdir; 65 | 66 | #[test] 67 | fn test_fs_stream() -> anyhow::Result<()> { 68 | let dir = tempdir().unwrap(); 69 | let file_name1 = dir.path().join(Path::new("foo")); 70 | let mut file1 = File::create(&file_name1)?; 71 | let file_name2 = dir.path().join(Path::new("bar")); 72 | let mut file2 = File::create(&file_name2)?; 73 | let file_name3 = dir.path().join(Path::new("baz")); 74 | let mut file3 = File::create(&file_name3)?; 75 | let mut buffer = Vec::new(); 76 | 77 | file1.write_all(b"Lorem ipsum ")?; 78 | file2.write_all(b"dolor sit amet, ")?; 79 | file3.write_all(b"consectetur adipiscing elit.")?; 80 | 81 | let mut fs_stream = FilesystemStream::new(); 82 | fs_stream.push(&file_name1); 83 | fs_stream.push(&file_name2); 84 | fs_stream.push(&file_name3); 85 | 86 | fs_stream.read_to_end(&mut buffer)?; 87 | assert_eq!( 88 | buffer, 89 | "Lorem ipsum dolor sit amet, consectetur adipiscing elit.".as_bytes() 90 | ); 91 | 92 | Ok(()) 93 | } 94 | } 95 | -------------------------------------------------------------------------------- /puzzlefs-lib/src/builder/test/test-1/SekienAkashita.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-machine/puzzlefs/40d03712c3e9dfef6acd202ab4ffeae5d1374e68/puzzlefs-lib/src/builder/test/test-1/SekienAkashita.jpg -------------------------------------------------------------------------------- /puzzlefs-lib/src/common.rs: -------------------------------------------------------------------------------- 1 | // Quoting from https://github.com/ronomon/deduplication 2 | // An average chunk size of 64 KB is recommended for optimal end-to-end deduplication and compression efficiency 3 | pub const MIN_CHUNK_SIZE: u32 = 16 * 1024; 4 | pub const AVG_CHUNK_SIZE: u32 = 64 * 1024; 5 | pub const MAX_CHUNK_SIZE: u32 = 256 * 1024; 6 | -------------------------------------------------------------------------------- /puzzlefs-lib/src/compression.rs: -------------------------------------------------------------------------------- 1 | use std::io; 2 | use std::io::Seek; 3 | 4 | mod noop; 5 | pub use noop::Noop; 6 | 7 | mod zstd_seekable_wrapper; 8 | pub use zstd_seekable_wrapper::*; 9 | 10 | pub trait Compressor: io::Write { 11 | // https://users.rust-lang.org/t/how-to-move-self-when-using-dyn-trait/50123 12 | fn end(self: Box) -> io::Result<()>; 13 | } 14 | 15 | pub trait Decompressor: io::Read + io::Seek { 16 | fn get_uncompressed_length(&mut self) -> io::Result; 17 | } 18 | 19 | pub trait Compression { 20 | fn compress<'a, W: std::io::Write + 'a>(dest: W) -> io::Result>; 21 | fn decompress<'a, R: std::io::Read + Seek + 'a>( 22 | source: R, 23 | ) -> io::Result>; 24 | fn append_extension(media_type: &str) -> String; 25 | } 26 | 27 | #[cfg(test)] 28 | mod tests { 29 | use super::*; 30 | use tempfile::NamedTempFile; 31 | 32 | pub const TRUTH: &str = "meshuggah rocks"; 33 | 34 | pub fn compress_decompress() -> anyhow::Result<()> { 35 | let f = NamedTempFile::new()?; 36 | let mut compressed = C::compress(f.reopen()?)?; 37 | compressed.write_all(TRUTH.as_bytes())?; 38 | compressed.end()?; 39 | 40 | let mut buf = vec![0_u8; TRUTH.len()]; 41 | let n = C::decompress(f.reopen()?)?.read(&mut buf)?; 42 | assert_eq!(n, TRUTH.len()); 43 | 44 | assert_eq!(TRUTH.as_bytes(), buf); 45 | Ok(()) 46 | } 47 | 48 | pub fn compression_is_seekable() -> anyhow::Result<()> { 49 | let f = NamedTempFile::new()?; 50 | let mut compressed = C::compress(f.reopen()?)?; 51 | compressed.write_all(TRUTH.as_bytes())?; 52 | compressed.end()?; 53 | 54 | let mut buf = vec![0_u8; 1024]; 55 | let mut decompressor = C::decompress(f.reopen()?)?; 56 | decompressor.seek(io::SeekFrom::Start("meshuggah ".len() as u64))?; 57 | let n = decompressor.read(&mut buf)?; 58 | assert_eq!(n, 5); 59 | 60 | assert_eq!("rocks".as_bytes(), &buf[0..5]); 61 | Ok(()) 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /puzzlefs-lib/src/compression/noop.rs: -------------------------------------------------------------------------------- 1 | use crate::compression::{Compression, Compressor, Decompressor}; 2 | use std::io; 3 | use std::io::{Read, Seek, Write}; 4 | 5 | pub struct Noop {} 6 | 7 | pub struct NoopCompressor { 8 | encoder: Box, 9 | } 10 | 11 | impl io::Write for NoopCompressor { 12 | fn write(&mut self, buf: &[u8]) -> io::Result { 13 | self.encoder.write(buf) 14 | } 15 | 16 | fn flush(&mut self) -> io::Result<()> { 17 | self.encoder.flush() 18 | } 19 | } 20 | 21 | impl Compressor for NoopCompressor { 22 | fn end(self: Box) -> io::Result<()> { 23 | Ok(()) 24 | } 25 | } 26 | 27 | pub struct NoopDecompressor { 28 | decoder: Box, 29 | } 30 | 31 | impl Seek for NoopDecompressor { 32 | fn seek(&mut self, offset: io::SeekFrom) -> io::Result { 33 | self.decoder.seek(offset) 34 | } 35 | } 36 | 37 | impl Read for NoopDecompressor { 38 | fn read(&mut self, out: &mut [u8]) -> io::Result { 39 | self.decoder.read(out) 40 | } 41 | } 42 | 43 | impl Decompressor for NoopDecompressor { 44 | fn get_uncompressed_length(&mut self) -> io::Result { 45 | self.decoder.stream_len() 46 | } 47 | } 48 | 49 | impl Compression for Noop { 50 | fn compress<'a, W: std::io::Write + 'a>(dest: W) -> io::Result> { 51 | Ok(Box::new(NoopCompressor { 52 | encoder: Box::new(dest), 53 | })) 54 | } 55 | 56 | fn decompress<'a, R: std::io::Read + Seek + 'a>( 57 | source: R, 58 | ) -> io::Result> { 59 | Ok(Box::new(NoopDecompressor { 60 | decoder: Box::new(source), 61 | })) 62 | } 63 | 64 | fn append_extension(media_type: &str) -> String { 65 | media_type.to_string() 66 | } 67 | } 68 | 69 | #[cfg(test)] 70 | mod tests { 71 | use super::*; 72 | use crate::compression::tests::{compress_decompress, compression_is_seekable, TRUTH}; 73 | use std::fs; 74 | use tempfile::NamedTempFile; 75 | 76 | #[test] 77 | fn test_noop_roundtrip() -> anyhow::Result<()> { 78 | compress_decompress::() 79 | } 80 | 81 | #[test] 82 | fn test_noop_seekable() -> anyhow::Result<()> { 83 | compression_is_seekable::() 84 | } 85 | 86 | #[test] 87 | fn test_noop_is_noop() -> anyhow::Result<()> { 88 | // shouldn't mangle the file content if in no-op mode 89 | let f = NamedTempFile::new()?; 90 | Noop::compress(f.reopen()?)?.write_all(TRUTH.as_bytes())?; 91 | 92 | let content = fs::read_to_string(f.path())?; 93 | assert_eq!(TRUTH, content); 94 | Ok(()) 95 | } 96 | } 97 | -------------------------------------------------------------------------------- /puzzlefs-lib/src/compression/zstd_seekable_wrapper.rs: -------------------------------------------------------------------------------- 1 | use std::cmp::min; 2 | use std::io; 3 | use std::io::{Read, Seek, Write}; 4 | 5 | use zstd_seekable::{CStream, Seekable, SeekableCStream}; 6 | 7 | use crate::compression::{Compression, Compressor, Decompressor}; 8 | 9 | // We compress files in 4KB frames; it's not clear what the ideal size for this is, but each frame 10 | // is compressed independently so the bigger they are the more compression savings we get. However, 11 | // the bigger they are the more decompression we have to do to get to the data in the middle of a 12 | // frame if someone e.g. mmap()s something in the middle of a frame. 13 | // 14 | // Another consideration is the average chunk size from FastCDC: if we make this the same as the 15 | // chunk size, there's no real point in using seekable compression at all, at least for files. It's 16 | // also possible that we want different frame sizes for metadata blobs and file content. 17 | const FRAME_SIZE: usize = 4096; 18 | const COMPRESSION_LEVEL: usize = 3; 19 | 20 | fn err_to_io(e: E) -> io::Error { 21 | io::Error::new(io::ErrorKind::Other, e) 22 | } 23 | 24 | pub struct ZstdCompressor { 25 | f: W, 26 | stream: SeekableCStream, 27 | buf: Vec, 28 | } 29 | 30 | impl Compressor for ZstdCompressor { 31 | fn end(mut self: Box) -> io::Result<()> { 32 | // end_stream has to be called multiple times until 0 is returned, see 33 | // https://docs.rs/zstd-seekable/0.1.23/src/zstd_seekable/lib.rs.html#224-237 and 34 | // https://fossies.org/linux/zstd/contrib/seekable_format/zstd_seekable.h 35 | loop { 36 | let size = self.stream.end_stream(&mut self.buf).map_err(err_to_io)?; 37 | self.f.write_all(&self.buf[0..size])?; 38 | if size == 0 { 39 | break; 40 | } 41 | } 42 | Ok(()) 43 | } 44 | } 45 | 46 | impl Write for ZstdCompressor { 47 | fn write(&mut self, buf: &[u8]) -> io::Result { 48 | // TODO: we could try to consume all the input, but for now we just consume a single block 49 | let (out_pos, in_pos) = self 50 | .stream 51 | .compress(&mut self.buf, buf) 52 | .map_err(err_to_io)?; 53 | self.f.write_all(&self.buf[0..out_pos])?; 54 | Ok(in_pos) 55 | } 56 | 57 | fn flush(&mut self) -> io::Result<()> { 58 | // we could self.stream.flush(), but that adversely affects compression ratio... let's 59 | // cheat for now. 60 | Ok(()) 61 | } 62 | } 63 | 64 | pub struct ZstdDecompressor<'a, R: Read + Seek> { 65 | stream: Seekable<'a, R>, 66 | offset: u64, 67 | uncompressed_length: u64, 68 | } 69 | 70 | impl Decompressor for ZstdDecompressor<'_, R> { 71 | fn get_uncompressed_length(&mut self) -> io::Result { 72 | Ok(self.uncompressed_length) 73 | } 74 | } 75 | 76 | impl Seek for ZstdDecompressor<'_, R> { 77 | fn seek(&mut self, offset: io::SeekFrom) -> io::Result { 78 | match offset { 79 | io::SeekFrom::Start(s) => { 80 | self.offset = s; 81 | } 82 | io::SeekFrom::End(e) => { 83 | if e > 0 { 84 | return Err(io::Error::new(io::ErrorKind::Other, "zstd seek past end")); 85 | } 86 | self.offset = self.uncompressed_length - u64::try_from(-e).map_err(err_to_io)?; 87 | } 88 | io::SeekFrom::Current(c) => { 89 | if c > 0 { 90 | self.offset += u64::try_from(c).map_err(err_to_io)?; 91 | } else { 92 | self.offset -= u64::try_from(-c).map_err(err_to_io)?; 93 | } 94 | } 95 | } 96 | Ok(self.offset) 97 | } 98 | } 99 | 100 | impl Read for ZstdDecompressor<'_, R> { 101 | fn read(&mut self, out: &mut [u8]) -> io::Result { 102 | // decompress() gets angry (ZSTD("Corrupted block detected")) if you pass it a buffer 103 | // longer than the uncompressable data, so let's be careful to truncate the buffer if it 104 | // would make zstd angry. maybe soon they'll implement a real read() API :) 105 | let end = min(out.len(), (self.uncompressed_length - self.offset) as usize); 106 | let size = self 107 | .stream 108 | .decompress(&mut out[0..end], self.offset) 109 | .map_err(err_to_io)?; 110 | self.offset += size as u64; 111 | Ok(size) 112 | } 113 | } 114 | 115 | pub struct Zstd {} 116 | 117 | impl Compression for Zstd { 118 | fn compress<'a, W: Write + 'a>(dest: W) -> io::Result> { 119 | // a "pretty high" compression level, since decompression should be nearly the same no 120 | // matter what compression level. Maybe we should turn this to 22 or whatever the max is... 121 | let stream = SeekableCStream::new(COMPRESSION_LEVEL, FRAME_SIZE).map_err(err_to_io)?; 122 | Ok(Box::new(ZstdCompressor { 123 | f: dest, 124 | stream, 125 | buf: vec![0_u8; CStream::out_size()], 126 | })) 127 | } 128 | 129 | fn decompress<'a, R: Read + Seek + 'a>(source: R) -> io::Result> { 130 | let stream = Seekable::init(Box::new(source)).map_err(err_to_io)?; 131 | 132 | // zstd-seekable doesn't like it when we pass a buffer past the end of the uncompressed 133 | // stream, so let's figure out the size of the uncompressed file so we can implement 134 | // ::read() in a reasonable way. This also lets us implement SeekFrom::End. 135 | let uncompressed_length = (0..stream.get_num_frames()) 136 | .map(|i| stream.get_frame_decompressed_size(i) as u64) 137 | .sum(); 138 | Ok(Box::new(ZstdDecompressor { 139 | stream, 140 | offset: 0, 141 | uncompressed_length, 142 | })) 143 | } 144 | 145 | fn append_extension(media_type: &str) -> String { 146 | format!("{media_type}+zstd") 147 | } 148 | } 149 | 150 | #[cfg(test)] 151 | mod tests { 152 | use super::*; 153 | use crate::compression::tests::{compress_decompress, compression_is_seekable}; 154 | 155 | #[test] 156 | fn test_ztsd_roundtrip() -> anyhow::Result<()> { 157 | compress_decompress::() 158 | } 159 | 160 | #[test] 161 | fn test_zstd_seekable() -> anyhow::Result<()> { 162 | compression_is_seekable::() 163 | } 164 | } 165 | -------------------------------------------------------------------------------- /puzzlefs-lib/src/extractor.rs: -------------------------------------------------------------------------------- 1 | use crate::format::InodeMode; 2 | use crate::oci::Image; 3 | use crate::reader::{PuzzleFS, WalkPuzzleFS}; 4 | use log::info; 5 | use nix::sys::stat::{makedev, mknod, Mode, SFlag}; 6 | use nix::unistd::{chown, mkfifo, symlinkat, Gid, Uid}; 7 | use std::collections::HashMap; 8 | use std::ffi::OsStr; 9 | use std::fs::Permissions; 10 | use std::os::unix::ffi::OsStrExt; 11 | use std::os::unix::fs::PermissionsExt; 12 | use std::path::{Component, Path, PathBuf}; 13 | use std::{fs, io}; 14 | 15 | fn runs_privileged() -> bool { 16 | Uid::effective().is_root() 17 | } 18 | 19 | fn safe_path(dir: &Path, image_path: &Path) -> anyhow::Result { 20 | // need to be a bit careful here about paths in the case of malicious images so we don't write 21 | // things outside where we're supposed to. Bad cases are paths like "/../../.." or images 22 | // /usr/bin -> /bin and files in /usr/bin, we shouldn't write files anywhere outside the target 23 | // dir. 24 | 25 | let mut buf = PathBuf::new(); 26 | buf.push(dir); 27 | let mut level = 1; 28 | 29 | for component in image_path.components() { 30 | match component { 31 | Component::Prefix(..) => bail!("Path prefix not understood"), // "Does not occur on Unix." 32 | Component::RootDir => {} 33 | Component::CurDir => {} 34 | Component::Normal(c) => { 35 | buf.push(c); 36 | level += 1; 37 | 38 | // make sure this isn't a symlink 39 | match fs::symlink_metadata(&buf) { 40 | Ok(md) => { 41 | if md.file_type().is_symlink() { 42 | bail!("symlink prefixes are not allowed: {:#?}", buf) 43 | } 44 | } 45 | Err(e) => { 46 | if e.kind() != io::ErrorKind::NotFound { 47 | bail!("problem accessing path component {:#?}: {}", buf, e) 48 | } 49 | 50 | // we render each dir, so the first ENOENT should be the lowest path. could 51 | // maybe double check this if we really felt it was necessary... 52 | return Ok(buf); 53 | } 54 | } 55 | } 56 | Component::ParentDir => { 57 | level -= 1; 58 | if level <= 0 { 59 | bail!("image path escapes extract dir: {:#?}", image_path) 60 | } 61 | buf.pop(); 62 | } 63 | } 64 | } 65 | 66 | Ok(buf) 67 | } 68 | 69 | pub fn extract_rootfs(oci_dir: &str, tag: &str, extract_dir: &str) -> anyhow::Result<()> { 70 | let oci_dir = Path::new(oci_dir); 71 | let image = Image::open(oci_dir)?; 72 | let dir = Path::new(extract_dir); 73 | fs::create_dir_all(dir)?; 74 | let mut pfs = PuzzleFS::open(image, tag, None)?; 75 | let mut walker = WalkPuzzleFS::walk(&mut pfs)?; 76 | let mut host_to_pfs = HashMap::::new(); 77 | 78 | walker.try_for_each(|de| -> anyhow::Result<()> { 79 | let dir_entry = de?; 80 | let path = safe_path(dir, &dir_entry.path)?; 81 | let mut is_symlink = false; 82 | info!("extracting {:#?}", path); 83 | if let Some(existing_path) = host_to_pfs.get(&dir_entry.inode.ino) { 84 | fs::hard_link(existing_path, &path)?; 85 | return Ok(()); 86 | } 87 | host_to_pfs.insert(dir_entry.inode.ino, path.clone()); 88 | 89 | match dir_entry.inode.mode { 90 | InodeMode::File { .. } => { 91 | let mut reader = dir_entry.open()?; 92 | let mut f = fs::File::create(&path)?; 93 | io::copy(&mut reader, &mut f)?; 94 | } 95 | InodeMode::Dir { .. } => fs::create_dir_all(&path)?, 96 | // TODO: fix all the hard coded modes when we have modes 97 | InodeMode::Fifo => { 98 | mkfifo(&path, Mode::S_IRWXU)?; 99 | } 100 | InodeMode::Chr { major, minor } => { 101 | mknod(&path, SFlag::S_IFCHR, Mode::S_IRWXU, makedev(major, minor))?; 102 | } 103 | InodeMode::Blk { major, minor } => { 104 | mknod(&path, SFlag::S_IFBLK, Mode::S_IRWXU, makedev(major, minor))?; 105 | } 106 | InodeMode::Lnk => { 107 | let target = dir_entry.inode.symlink_target()?; 108 | is_symlink = true; 109 | symlinkat(target, None, &path)?; 110 | } 111 | InodeMode::Sock => { 112 | todo!(); 113 | } 114 | InodeMode::Wht => { 115 | todo!(); 116 | } 117 | _ => { 118 | bail!("bad inode mode {:#?}", dir_entry.inode.mode) 119 | } 120 | } 121 | if let Some(x) = dir_entry.inode.additional { 122 | for x in &x.xattrs { 123 | xattr::set(&path, OsStr::from_bytes(&x.key), &x.val)?; 124 | } 125 | } 126 | 127 | // trying to change permissions for a symlink would follow the symlink and we might not have extracted the target yet 128 | // anyway, symlink permissions are not used in Linux (although they are used in macOS and FreeBSD) 129 | if !is_symlink { 130 | std::fs::set_permissions( 131 | &path, 132 | Permissions::from_mode(dir_entry.inode.permissions.into()), 133 | )?; 134 | } 135 | 136 | if runs_privileged() { 137 | chown( 138 | &path, 139 | Some(Uid::from_raw(dir_entry.inode.uid)), 140 | Some(Gid::from_raw(dir_entry.inode.gid)), 141 | )?; 142 | } 143 | 144 | Ok(()) 145 | })?; 146 | Ok(()) 147 | } 148 | 149 | #[cfg(test)] 150 | mod tests { 151 | use tempfile::{tempdir, TempDir}; 152 | 153 | use std::fs::File; 154 | 155 | use crate::builder::build_test_fs; 156 | use std::os::unix::fs::MetadataExt; 157 | use walkdir::WalkDir; 158 | 159 | use super::*; 160 | 161 | #[test] 162 | fn test_extracted_xattrs() { 163 | let dir = TempDir::new_in(".").unwrap(); 164 | let oci_dir = dir.path().join("oci"); 165 | let image = Image::new(&oci_dir).unwrap(); 166 | let rootfs = dir.path().join("rootfs"); 167 | let extract_dir = TempDir::new_in(".").unwrap(); 168 | 169 | let foo = rootfs.join("foo"); 170 | let bar = rootfs.join("bar"); 171 | 172 | let mut file_attributes = HashMap::>::new(); 173 | file_attributes.insert("user.meshuggah".to_string(), b"rocks".to_vec()); 174 | file_attributes.insert("user.nothing".to_string(), b"".to_vec()); 175 | 176 | // test directory, file types. we should probably also test "other" types, but on fifos and 177 | // symlinks on linux xattrs aren't allowed, so we just punt for now. maybe when 5.8 is more 178 | // prevalent, we can use mknod c 0 0? 179 | fs::create_dir_all(&foo).unwrap(); 180 | fs::write(&bar, b"bar").unwrap(); 181 | 182 | // set some xattrs 183 | for f in [&foo, &bar] { 184 | for (key, val) in &file_attributes { 185 | xattr::set(f, key, val).unwrap(); 186 | xattr::set(f, key, val).unwrap(); 187 | } 188 | } 189 | 190 | build_test_fs(&rootfs, &image, "test").unwrap(); 191 | 192 | extract_rootfs( 193 | oci_dir.to_str().unwrap(), 194 | "test", 195 | extract_dir.path().to_str().unwrap(), 196 | ) 197 | .unwrap(); 198 | 199 | let ents = WalkDir::new(&extract_dir) 200 | .contents_first(false) 201 | .follow_links(false) 202 | .same_file_system(true) 203 | .sort_by(|a, b| a.file_name().cmp(b.file_name())) 204 | .into_iter() 205 | .collect::, walkdir::Error>>() 206 | .unwrap(); 207 | 208 | // the first directory is extract_dir, we don't check xattrs for it 209 | for ent in ents.into_iter().skip(1) { 210 | for (key, val) in &file_attributes { 211 | let attribute = xattr::get(ent.path(), key); 212 | println!( 213 | "path: {:?} key: {:?} attribute: {:?}", 214 | ent.path(), 215 | key, 216 | attribute 217 | ); 218 | assert!(attribute.unwrap().as_ref().unwrap() == val); 219 | } 220 | } 221 | } 222 | 223 | #[test] 224 | fn test_permissions() { 225 | let dir = tempdir().unwrap(); 226 | let oci_dir = dir.path().join("oci"); 227 | let image = Image::new(&oci_dir).unwrap(); 228 | let rootfs = dir.path().join("rootfs"); 229 | let extract_dir = tempdir().unwrap(); 230 | const TESTED_PERMISSION: u32 = 0o7777; 231 | 232 | let foo = rootfs.join("foo"); 233 | 234 | fs::create_dir_all(&rootfs).unwrap(); 235 | fs::write(&foo, b"foo").unwrap(); 236 | 237 | std::fs::set_permissions(foo, Permissions::from_mode(TESTED_PERMISSION)).unwrap(); 238 | 239 | build_test_fs(&rootfs, &image, "test").unwrap(); 240 | 241 | extract_rootfs( 242 | oci_dir.to_str().unwrap(), 243 | "test", 244 | extract_dir.path().to_str().unwrap(), 245 | ) 246 | .unwrap(); 247 | 248 | let extracted_path = extract_dir.path().join("foo"); 249 | let f = File::open(extracted_path).unwrap(); 250 | let metadata = f.metadata().unwrap(); 251 | 252 | assert_eq!(metadata.permissions().mode() & 0xFFF, TESTED_PERMISSION); 253 | } 254 | 255 | #[test] 256 | fn test_hardlink_extraction() { 257 | let dir = tempdir().unwrap(); 258 | let oci_dir = dir.path().join("oci"); 259 | let image = Image::new(&oci_dir).unwrap(); 260 | let rootfs = dir.path().join("rootfs"); 261 | let extract_dir = tempdir().unwrap(); 262 | 263 | let foo = rootfs.join("foo"); 264 | let bar = rootfs.join("bar"); 265 | 266 | fs::create_dir_all(&rootfs).unwrap(); 267 | fs::write(&foo, b"foo").unwrap(); 268 | 269 | fs::hard_link(&foo, &bar).unwrap(); 270 | 271 | assert_eq!( 272 | fs::metadata(&foo).unwrap().ino(), 273 | fs::metadata(&bar).unwrap().ino() 274 | ); 275 | 276 | build_test_fs(&rootfs, &image, "test").unwrap(); 277 | 278 | extract_rootfs( 279 | oci_dir.to_str().unwrap(), 280 | "test", 281 | extract_dir.path().to_str().unwrap(), 282 | ) 283 | .unwrap(); 284 | 285 | let foo = extract_dir.path().join("foo"); 286 | let bar = extract_dir.path().join("bar"); 287 | 288 | assert_eq!( 289 | fs::metadata(foo).unwrap().ino(), 290 | fs::metadata(bar).unwrap().ino() 291 | ); 292 | } 293 | 294 | #[test] 295 | fn test_empty_file() { 296 | let dir = tempdir().unwrap(); 297 | let oci_dir = dir.path().join("oci"); 298 | let image = Image::new(&oci_dir).unwrap(); 299 | let rootfs = dir.path().join("rootfs"); 300 | let foo = rootfs.join("foo"); 301 | let extract_dir = tempdir().unwrap(); 302 | 303 | fs::create_dir_all(&rootfs).unwrap(); 304 | std::fs::File::create(foo).unwrap(); 305 | 306 | build_test_fs(&rootfs, &image, "test").unwrap(); 307 | 308 | extract_rootfs( 309 | oci_dir.to_str().unwrap(), 310 | "test", 311 | extract_dir.path().to_str().unwrap(), 312 | ) 313 | .unwrap(); 314 | let extracted_foo = extract_dir.path().join("foo"); 315 | assert_eq!(extracted_foo.metadata().unwrap().len(), 0); 316 | } 317 | } 318 | -------------------------------------------------------------------------------- /puzzlefs-lib/src/format.rs: -------------------------------------------------------------------------------- 1 | mod types; 2 | pub use types::*; 3 | 4 | mod error; 5 | pub use error::*; 6 | -------------------------------------------------------------------------------- /puzzlefs-lib/src/format/error.rs: -------------------------------------------------------------------------------- 1 | use std::backtrace::Backtrace; 2 | use std::io; 3 | use std::os::raw::c_int; 4 | 5 | use nix::errno::Errno; 6 | use thiserror::Error; 7 | 8 | #[derive(Error, Debug)] 9 | pub enum WireFormatError { 10 | #[error("cannot turn local ref into a digest")] 11 | LocalRefError(Backtrace), 12 | #[error("cannot seek to other blob")] 13 | SeekOtherError(Backtrace), 14 | #[error("invalid serialized data")] 15 | InvalidSerializedData(Backtrace), 16 | #[error("invalid image schema: {0}")] 17 | InvalidImageSchema(i32, Backtrace), 18 | #[error("invalid image version: {0}")] 19 | InvalidImageVersion(String, Backtrace), 20 | #[error("invalid fs_verity data: {0}")] 21 | InvalidFsVerityData(String, Backtrace), 22 | #[error("missing manifest: {0}")] 23 | MissingManifest(String, Backtrace), 24 | #[error("missing PuzzleFS rootfs")] 25 | MissingRootfs(Backtrace), 26 | #[error("fs error: {0}")] 27 | IOError(#[from] io::Error, Backtrace), 28 | #[error("deserialization error (capnp): {0}")] 29 | CapnpError(#[from] capnp::Error, Backtrace), 30 | #[error("numeric conversion error: {0}")] 31 | FromIntError(#[from] std::num::TryFromIntError, Backtrace), 32 | #[error("deserialization error (json): {0}")] 33 | JSONError(#[from] serde_json::Error, Backtrace), 34 | #[error("TryFromSlice error: {0}")] 35 | FromSliceError(#[from] std::array::TryFromSliceError, Backtrace), 36 | #[error("hex error: {0}")] 37 | HexError(#[from] hex::FromHexError, Backtrace), 38 | #[error("Oci error: {0}")] 39 | OciError(#[from] ocidir::oci_spec::OciSpecError, Backtrace), 40 | #[error("Oci dir error: {0}")] 41 | OciDirError(#[from] ocidir::Error, Backtrace), 42 | } 43 | 44 | impl WireFormatError { 45 | pub fn to_errno(&self) -> c_int { 46 | match self { 47 | WireFormatError::LocalRefError(..) => Errno::EINVAL as c_int, 48 | WireFormatError::SeekOtherError(..) => Errno::ESPIPE as c_int, 49 | WireFormatError::InvalidSerializedData(..) => Errno::EINVAL as c_int, 50 | WireFormatError::InvalidImageSchema(..) => Errno::EINVAL as c_int, 51 | WireFormatError::InvalidImageVersion(..) => Errno::EINVAL as c_int, 52 | WireFormatError::InvalidFsVerityData(..) => Errno::EINVAL as c_int, 53 | WireFormatError::MissingManifest(..) => Errno::EINVAL as c_int, 54 | WireFormatError::MissingRootfs(..) => Errno::EINVAL as c_int, 55 | WireFormatError::IOError(ioe, ..) => { 56 | ioe.raw_os_error().unwrap_or(Errno::EINVAL as i32) as c_int 57 | } 58 | WireFormatError::CapnpError(..) => Errno::EINVAL as c_int, 59 | WireFormatError::JSONError(..) => Errno::EINVAL as c_int, 60 | WireFormatError::HexError(..) => Errno::EINVAL as c_int, 61 | WireFormatError::FromIntError(..) => Errno::EINVAL as c_int, 62 | WireFormatError::FromSliceError(..) => Errno::EINVAL as c_int, 63 | WireFormatError::OciError(..) => Errno::EINVAL as c_int, 64 | WireFormatError::OciDirError(..) => Errno::EINVAL as c_int, 65 | } 66 | } 67 | 68 | pub fn from_errno(errno: Errno) -> Self { 69 | Self::IOError( 70 | io::Error::from_raw_os_error(errno as i32), 71 | Backtrace::capture(), 72 | ) 73 | } 74 | } 75 | 76 | pub type Result = std::result::Result; 77 | -------------------------------------------------------------------------------- /puzzlefs-lib/src/format/metadata.capnp: -------------------------------------------------------------------------------- 1 | @0x84ae5e6e88b7cbb7; 2 | 3 | struct Chr { 4 | major@0: UInt64; 5 | minor@1: UInt64; 6 | } 7 | 8 | struct DirEntry { 9 | ino@0: UInt64; 10 | name@1: Data; 11 | } 12 | 13 | struct Dir { 14 | entries@0: List(DirEntry); 15 | lookBelow@1: Bool; 16 | } 17 | 18 | struct Blk { 19 | major@0: UInt64; 20 | minor@1: UInt64; 21 | } 22 | 23 | struct FileChunk { 24 | blob@0: BlobRef; 25 | len@1: UInt64; 26 | } 27 | 28 | struct BlobRef { 29 | digest@0: Data; 30 | offset@1: UInt64; 31 | compressed@2: Bool; 32 | } 33 | 34 | struct Xattr { 35 | key@0: Data; 36 | val@1: Data; 37 | } 38 | 39 | struct InodeAdditional { 40 | xattrs@0: List(Xattr); 41 | symlinkTarget@1: Data; 42 | } 43 | 44 | struct Inode { 45 | ino@0: UInt64; 46 | mode: union { 47 | unknown@1: Void; 48 | fifo@2: Void; 49 | chr@3: Chr; 50 | dir@4: Dir; 51 | blk@5: Blk; 52 | file@6: List(FileChunk); 53 | lnk@7: Void; 54 | sock@8: Void; 55 | wht@9: Void; 56 | } 57 | uid@10: UInt32; 58 | gid@11: UInt32; 59 | permissions@12: UInt16; 60 | additional@13: InodeAdditional; 61 | } 62 | 63 | struct InodeVector { 64 | inodes@0: List(Inode); 65 | } 66 | 67 | struct VerityData { 68 | digest@0: Data; 69 | verity@1: Data; 70 | } 71 | 72 | struct Rootfs { 73 | metadatas@0: List(InodeVector); 74 | fsVerityData@1: List(VerityData); 75 | manifestVersion@2: UInt64; 76 | } 77 | -------------------------------------------------------------------------------- /puzzlefs-lib/src/fsverity_helpers.rs: -------------------------------------------------------------------------------- 1 | use crate::format::{Result, WireFormatError, SHA256_BLOCK_SIZE}; 2 | use std::backtrace::Backtrace; 3 | use std::io::Write; 4 | use std::os::unix::io::AsRawFd; 5 | 6 | pub use fs_verity::linux::fsverity_enable; 7 | use fs_verity::linux::fsverity_measure; 8 | use fs_verity::FsVeritySha256; 9 | pub use fs_verity::InnerHashAlgorithm; 10 | use sha2::Digest; 11 | 12 | pub const FS_VERITY_BLOCK_SIZE_DEFAULT: usize = 4096; 13 | 14 | pub fn get_fs_verity_digest(data: &[u8]) -> Result<[u8; SHA256_BLOCK_SIZE]> { 15 | let mut digest = FsVeritySha256::new(); 16 | digest.write_all(data)?; 17 | let result = digest.finalize(); 18 | Ok(result.into()) 19 | } 20 | 21 | pub fn check_fs_verity(file: &cap_std::fs::File, expected: &[u8]) -> Result<()> { 22 | if expected.len() != SHA256_BLOCK_SIZE { 23 | return Err(WireFormatError::InvalidFsVerityData( 24 | format!( 25 | "fsverity invalid SHA256 hash length {}", 26 | hex::encode(expected), 27 | ), 28 | Backtrace::capture(), 29 | )); 30 | } 31 | let (_, measurement) = fsverity_measure(file.as_raw_fd())?; 32 | 33 | if *expected != measurement[..] { 34 | return Err(WireFormatError::InvalidFsVerityData( 35 | format!( 36 | "fsverity mismatch {}, expected {}", 37 | hex::encode(expected), 38 | hex::encode(measurement) 39 | ), 40 | Backtrace::capture(), 41 | )); 42 | } 43 | 44 | Ok(()) 45 | } 46 | -------------------------------------------------------------------------------- /puzzlefs-lib/src/lib.rs: -------------------------------------------------------------------------------- 1 | #![feature(error_generic_member_access)] 2 | #![feature(seek_stream_len)] 3 | #[macro_use] 4 | extern crate anyhow; 5 | 6 | pub mod builder; 7 | mod common; 8 | pub mod compression; 9 | pub mod extractor; 10 | mod format; 11 | pub mod fsverity_helpers; 12 | pub mod oci; 13 | pub mod reader; 14 | 15 | #[allow(clippy::needless_lifetimes)] 16 | pub mod metadata_capnp { 17 | include!(concat!(env!("OUT_DIR"), "/metadata_capnp.rs")); 18 | } 19 | -------------------------------------------------------------------------------- /puzzlefs-lib/src/oci.rs: -------------------------------------------------------------------------------- 1 | use crate::fsverity_helpers::{check_fs_verity, get_fs_verity_digest}; 2 | use std::any::Any; 3 | use std::backtrace::Backtrace; 4 | use std::fs; 5 | use std::io; 6 | use std::io::{Read, Seek}; 7 | use std::path::{Path, PathBuf}; 8 | 9 | use sha2::{Digest as Sha2Digest, Sha256}; 10 | 11 | use crate::compression::{Compression, Decompressor, Noop, Zstd}; 12 | use crate::format::{Result, RootfsReader, VerityData, WireFormatError, SHA256_BLOCK_SIZE}; 13 | use std::io::{Error, ErrorKind}; 14 | 15 | pub use crate::format::Digest; 16 | use crate::oci::media_types::{PuzzleFSMediaType, PUZZLEFS_ROOTFS, VERITY_ROOT_HASH_ANNOTATION}; 17 | use ocidir::oci_spec::image; 18 | pub use ocidir::oci_spec::image::Descriptor; 19 | use ocidir::oci_spec::image::{ImageIndex, ImageManifest, MediaType}; 20 | use ocidir::OciDir; 21 | use std::collections::HashMap; 22 | use std::str::FromStr; 23 | 24 | use std::io::Cursor; 25 | 26 | pub mod media_types; 27 | 28 | pub struct Image(pub OciDir); 29 | 30 | impl Image { 31 | pub fn new(oci_dir: &Path) -> Result { 32 | fs::create_dir_all(oci_dir)?; 33 | let d = cap_std::fs::Dir::open_ambient_dir(oci_dir, cap_std::ambient_authority())?; 34 | let oci_dir = OciDir::ensure(d)?; 35 | 36 | Ok(Self(oci_dir)) 37 | } 38 | 39 | pub fn open(oci_dir: &Path) -> Result { 40 | let d = cap_std::fs::Dir::open_ambient_dir(oci_dir, cap_std::ambient_authority())?; 41 | let blobs_dir = cap_std::fs::Dir::open_ambient_dir( 42 | oci_dir.join(Self::blob_path()), 43 | cap_std::ambient_authority(), 44 | )?; 45 | let oci_dir = OciDir::open_with_external_blobs(d, blobs_dir)?; 46 | Ok(Self(oci_dir)) 47 | } 48 | 49 | pub fn blob_path() -> PathBuf { 50 | // TODO: use BLOBDIR constant from ocidir after making it public 51 | PathBuf::from("blobs/sha256") 52 | } 53 | 54 | pub fn put_blob( 55 | &self, 56 | buf: &[u8], 57 | image_manifest: &mut ImageManifest, 58 | media_type: impl PuzzleFSMediaType, 59 | ) -> Result<(Descriptor, [u8; SHA256_BLOCK_SIZE], bool)> { 60 | let mut compressed_data = Cursor::new(Vec::::new()); 61 | let mut compressed = C::compress(&mut compressed_data)?; 62 | let mut hasher = Sha256::new(); 63 | // generics may not be the best way to implement compression, alternatives: 64 | // trait objects, but they add runtime overhead 65 | // an enum together with enum_dispatch 66 | let mut compressed_blob = std::any::TypeId::of::() != std::any::TypeId::of::(); 67 | 68 | // without the clone, the io::copy leaves us with an empty slice 69 | // we're only cloning the reference, which is ok because the slice itself gets mutated 70 | // i.e. the slice advances through the buffer as it is being read 71 | let uncompressed_size = io::copy(&mut <&[u8]>::clone(&buf), &mut compressed)?; 72 | compressed.end()?; 73 | let compressed_size = compressed_data.get_ref().len() as u64; 74 | let final_size = std::cmp::min(compressed_size, uncompressed_size); 75 | 76 | // store the uncompressed blob if the compressed version has bigger size 77 | let final_data = if compressed_blob && compressed_size >= uncompressed_size { 78 | compressed_blob = false; 79 | buf 80 | } else { 81 | compressed_data.get_ref() 82 | }; 83 | 84 | hasher.update(final_data); 85 | let digest = hasher.finalize(); 86 | let media_type_with_extension = C::append_extension(media_type.name()); 87 | let mut digest_string = "sha256:".to_string(); 88 | digest_string.push_str(&hex::encode(digest.as_slice())); 89 | 90 | let fs_verity_digest = get_fs_verity_digest(&compressed_data.get_ref()[..])?; 91 | let mut descriptor = Descriptor::new( 92 | MediaType::Other(media_type_with_extension), 93 | final_size, 94 | image::Digest::from_str(&digest_string)?, 95 | ); 96 | // We need to store the PuzzleFS Rootfs verity digest as an annotation (obviously we cannot 97 | // store it in the Rootfs itself) 98 | if media_type.name() == PUZZLEFS_ROOTFS { 99 | let mut annotations = HashMap::new(); 100 | annotations.insert( 101 | VERITY_ROOT_HASH_ANNOTATION.to_string(), 102 | hex::encode(fs_verity_digest), 103 | ); 104 | descriptor.set_annotations(Some(annotations)); 105 | } 106 | let path = Self::blob_path().join(descriptor.digest().digest()); 107 | 108 | // avoid replacing the data blob so we don't drop fsverity data 109 | if self.0.dir().exists(&path) { 110 | let mut hasher = Sha256::new(); 111 | let mut file = self.0.dir().open(&path)?; 112 | io::copy(&mut file, &mut hasher)?; 113 | let existing_digest = hasher.finalize(); 114 | if existing_digest != digest { 115 | return Err(Error::new( 116 | ErrorKind::AlreadyExists, 117 | format!("blob already exists and it's not content addressable existing digest {}, new digest {}", 118 | hex::encode(existing_digest), hex::encode(digest)) 119 | ) 120 | .into()); 121 | } 122 | } else { 123 | self.0.dir().write(&path, final_data)?; 124 | } 125 | 126 | // Let's make the PuzzleFS image rootfs the first layer so it's easy to find 127 | // The LXC oci template also looks at the first layer in the array to identify the image 128 | // type (see getlayermediatype): 129 | // https://github.com/lxc/lxc/commit/1a2da75b6e8431f3530ebd3f75442d3bd5eec5e2 130 | if media_type.name() == PUZZLEFS_ROOTFS { 131 | image_manifest.layers_mut().insert(0, descriptor.clone()); 132 | } else { 133 | image_manifest.layers_mut().push(descriptor.clone()); 134 | } 135 | Ok((descriptor, fs_verity_digest, compressed_blob)) 136 | } 137 | 138 | fn open_raw_blob(&self, digest: &str, verity: Option<&[u8]>) -> io::Result { 139 | let file = self.0.blobs_dir().open(digest)?; 140 | if let Some(verity) = verity { 141 | check_fs_verity(&file, verity).map_err(|e| io::Error::new(io::ErrorKind::Other, e))?; 142 | } 143 | Ok(file) 144 | } 145 | 146 | pub fn open_compressed_blob( 147 | &self, 148 | digest: &Digest, 149 | verity: Option<&[u8]>, 150 | ) -> io::Result> { 151 | let f = self.open_raw_blob(&digest.to_string(), verity)?; 152 | C::decompress(f) 153 | } 154 | 155 | pub fn get_pfs_rootfs_verity(&self, tag: &str) -> Result<[u8; SHA256_BLOCK_SIZE]> { 156 | let manifest = self.0.find_manifest_with_tag(tag)?.ok_or_else(|| { 157 | WireFormatError::MissingManifest(tag.to_string(), Backtrace::capture()) 158 | })?; 159 | 160 | let rootfs_desc = manifest 161 | .layers() 162 | .iter() 163 | .find(|desc| desc.media_type() == &MediaType::Other(PUZZLEFS_ROOTFS.to_string())) 164 | .ok_or_else(|| WireFormatError::MissingRootfs(Backtrace::capture()))?; 165 | 166 | let rootfs_verity = rootfs_desc 167 | .annotations() 168 | .as_ref() 169 | .ok_or_else(|| { 170 | WireFormatError::InvalidFsVerityData( 171 | "missing rootfs annotations".to_string(), 172 | Backtrace::capture(), 173 | ) 174 | })? 175 | .get(VERITY_ROOT_HASH_ANNOTATION) 176 | .ok_or_else(|| { 177 | WireFormatError::InvalidFsVerityData( 178 | "missing rootfs verity annotation".to_string(), 179 | Backtrace::capture(), 180 | ) 181 | })?; 182 | let mut verity_digest: [u8; SHA256_BLOCK_SIZE] = [0; SHA256_BLOCK_SIZE]; 183 | hex::decode_to_slice(rootfs_verity, &mut verity_digest)?; 184 | 185 | Ok(verity_digest) 186 | } 187 | 188 | pub fn get_pfs_rootfs(&self, tag: &str, verity: Option<&[u8]>) -> Result { 189 | let manifest = self.0.find_manifest_with_tag(tag)?.ok_or_else(|| { 190 | WireFormatError::MissingManifest(tag.to_string(), Backtrace::capture()) 191 | })?; 192 | 193 | let rootfs_desc = manifest 194 | .layers() 195 | .iter() 196 | .find(|desc| desc.media_type() == &MediaType::Other(PUZZLEFS_ROOTFS.to_string())) 197 | .ok_or_else(|| WireFormatError::MissingRootfs(Backtrace::capture()))?; 198 | 199 | let rootfs_digest = rootfs_desc.digest().digest(); 200 | let file = self.open_raw_blob(rootfs_digest, verity)?; 201 | Ok(file) 202 | } 203 | 204 | pub fn get_image_manifest_fd(&self, tag: &str) -> Result { 205 | let image_manifest = self 206 | .0 207 | .find_manifest_descriptor_with_tag(tag)? 208 | .ok_or_else(|| { 209 | WireFormatError::MissingManifest(tag.to_string(), Backtrace::capture()) 210 | })?; 211 | let file = self.open_raw_blob(image_manifest.digest().digest(), None)?; 212 | Ok(file) 213 | } 214 | 215 | pub fn open_rootfs_blob(&self, tag: &str, verity: Option<&[u8]>) -> Result { 216 | let temp_verity; 217 | let rootfs_verity = if let Some(verity) = verity { 218 | let manifest = self.get_image_manifest_fd(tag)?; 219 | check_fs_verity(&manifest, verity)?; 220 | temp_verity = self.get_pfs_rootfs_verity(tag)?; 221 | Some(&temp_verity[..]) 222 | } else { 223 | None 224 | }; 225 | 226 | let rootfs_file = self.get_pfs_rootfs(tag, rootfs_verity)?; 227 | RootfsReader::open(rootfs_file) 228 | } 229 | 230 | pub fn fill_from_chunk( 231 | &self, 232 | chunk: crate::format::BlobRef, 233 | addl_offset: u64, 234 | buf: &mut [u8], 235 | verity_data: &Option, 236 | ) -> crate::format::Result { 237 | let digest = &::try_from(chunk)?; 238 | let file_verity; 239 | if let Some(verity) = verity_data { 240 | file_verity = Some( 241 | &verity 242 | .get(&digest.underlying()) 243 | .ok_or(WireFormatError::InvalidFsVerityData( 244 | format!("missing verity data {digest}"), 245 | Backtrace::capture(), 246 | ))?[..], 247 | ); 248 | } else { 249 | file_verity = None; 250 | } 251 | let mut blob = if chunk.compressed { 252 | self.open_compressed_blob::(digest, file_verity)? 253 | } else { 254 | self.open_compressed_blob::(digest, file_verity)? 255 | }; 256 | blob.seek(io::SeekFrom::Start(chunk.offset + addl_offset))?; 257 | let n = blob.read(buf)?; 258 | Ok(n) 259 | } 260 | 261 | pub fn get_index(&self) -> Result { 262 | Ok(self.0.read_index()?) 263 | } 264 | 265 | pub fn get_empty_manifest(&self) -> Result { 266 | Ok(self.0.new_empty_manifest()?.build()?) 267 | } 268 | } 269 | 270 | #[cfg(test)] 271 | mod tests { 272 | use super::*; 273 | use ocidir::oci_spec::image::{ImageIndexBuilder, Platform, ANNOTATION_REF_NAME}; 274 | use std::collections::HashMap; 275 | use tempfile::tempdir; 276 | type DefaultCompression = Zstd; 277 | 278 | #[test] 279 | fn test_put_blob_correct_hash() -> anyhow::Result<()> { 280 | let dir = tempdir()?; 281 | let image: Image = Image::new(dir.path())?; 282 | let mut image_manifest = image.get_empty_manifest()?; 283 | let (desc, ..) = image.put_blob::( 284 | "meshuggah rocks".as_bytes(), 285 | &mut image_manifest, 286 | media_types::Chunk {}, 287 | )?; 288 | 289 | const DIGEST: &str = "3abd5ce0f91f640d88dca1f26b37037b02415927cacec9626d87668a715ec12d"; 290 | assert_eq!(desc.digest().digest(), DIGEST); 291 | 292 | let md = image 293 | .0 294 | .dir() 295 | .symlink_metadata(Image::blob_path().join(DIGEST))?; 296 | assert!(md.is_file()); 297 | Ok(()) 298 | } 299 | 300 | #[test] 301 | fn test_open_can_open_new_image() -> anyhow::Result<()> { 302 | let dir = tempdir()?; 303 | Image::new(dir.path())?; 304 | Image::open(dir.path())?; 305 | Ok(()) 306 | } 307 | 308 | #[test] 309 | fn test_put_get_index() -> anyhow::Result<()> { 310 | let dir = tempdir()?; 311 | let image = Image::new(dir.path())?; 312 | let mut image_manifest = image.get_empty_manifest()?; 313 | let mut annotations = HashMap::new(); 314 | annotations.insert(ANNOTATION_REF_NAME.to_string(), "foo".to_string()); 315 | image_manifest.set_annotations(Some(annotations)); 316 | let image_manifest_descriptor = 317 | image 318 | .0 319 | .insert_manifest(image_manifest, None, Platform::default())?; 320 | 321 | let index = ImageIndexBuilder::default() 322 | .schema_version(2_u32) 323 | .manifests(vec![image_manifest_descriptor]) 324 | .build()?; 325 | 326 | let image2 = Image::open(dir.path())?; 327 | let index2 = image2.get_index()?; 328 | assert_eq!(index.manifests(), index2.manifests()); 329 | Ok(()) 330 | } 331 | 332 | #[test] 333 | fn double_put_ok() -> anyhow::Result<()> { 334 | let dir = tempdir()?; 335 | let image = Image::new(dir.path())?; 336 | let mut image_manifest = image.get_empty_manifest()?; 337 | let desc1 = image.put_blob::( 338 | "meshuggah rocks".as_bytes(), 339 | &mut image_manifest, 340 | media_types::Chunk {}, 341 | )?; 342 | let desc2 = image.put_blob::( 343 | "meshuggah rocks".as_bytes(), 344 | &mut image_manifest, 345 | media_types::Chunk {}, 346 | )?; 347 | assert_eq!(desc1, desc2); 348 | Ok(()) 349 | } 350 | } 351 | -------------------------------------------------------------------------------- /puzzlefs-lib/src/oci/media_types.rs: -------------------------------------------------------------------------------- 1 | pub trait PuzzleFSMediaType { 2 | fn name(&self) -> &'static str; 3 | } 4 | 5 | pub(crate) const PUZZLEFS_ROOTFS: &str = "application/vnd.puzzlefs.image.rootfs.v1"; 6 | 7 | pub struct Rootfs {} 8 | 9 | impl PuzzleFSMediaType for Rootfs { 10 | fn name(&self) -> &'static str { 11 | PUZZLEFS_ROOTFS 12 | } 13 | } 14 | 15 | pub(crate) const PUZZLEFS_CHUNK_DATA: &str = "application/vnd.puzzlefs.image.filedata.v1"; 16 | 17 | pub struct Chunk {} 18 | 19 | impl PuzzleFSMediaType for Chunk { 20 | fn name(&self) -> &'static str { 21 | PUZZLEFS_CHUNK_DATA 22 | } 23 | } 24 | 25 | pub(crate) const VERITY_ROOT_HASH_ANNOTATION: &str = 26 | "io.puzzlefsoci.puzzlefs.puzzlefs_verity_root_hash"; 27 | -------------------------------------------------------------------------------- /puzzlefs-lib/src/reader.rs: -------------------------------------------------------------------------------- 1 | extern crate fuser as fuse_ffi; 2 | 3 | use std::path::Path; 4 | 5 | use crate::format::Result; 6 | use crate::oci::Image; 7 | 8 | mod puzzlefs; 9 | pub use puzzlefs::PuzzleFS; 10 | pub use puzzlefs::PUZZLEFS_IMAGE_MANIFEST_VERSION; 11 | 12 | pub mod fuse; 13 | pub use fuse::Fuse; 14 | 15 | mod walk; 16 | use fuse::PipeDescriptor; 17 | pub use walk::WalkPuzzleFS; 18 | 19 | // copied from the fuser function 'MountOption::from_str' because it's not exported 20 | fn mount_option_from_str(s: &str) -> fuse_ffi::MountOption { 21 | match s { 22 | "auto_unmount" => fuse_ffi::MountOption::AutoUnmount, 23 | "allow_other" => fuse_ffi::MountOption::AllowOther, 24 | "allow_root" => fuse_ffi::MountOption::AllowRoot, 25 | "default_permissions" => fuse_ffi::MountOption::DefaultPermissions, 26 | "dev" => fuse_ffi::MountOption::Dev, 27 | "nodev" => fuse_ffi::MountOption::NoDev, 28 | "suid" => fuse_ffi::MountOption::Suid, 29 | "nosuid" => fuse_ffi::MountOption::NoSuid, 30 | "ro" => fuse_ffi::MountOption::RO, 31 | "rw" => fuse_ffi::MountOption::RW, 32 | "exec" => fuse_ffi::MountOption::Exec, 33 | "noexec" => fuse_ffi::MountOption::NoExec, 34 | "atime" => fuse_ffi::MountOption::Atime, 35 | "noatime" => fuse_ffi::MountOption::NoAtime, 36 | "dirsync" => fuse_ffi::MountOption::DirSync, 37 | "sync" => fuse_ffi::MountOption::Sync, 38 | "async" => fuse_ffi::MountOption::Async, 39 | x if x.starts_with("fsname=") => fuse_ffi::MountOption::FSName(x[7..].into()), 40 | x if x.starts_with("subtype=") => fuse_ffi::MountOption::Subtype(x[8..].into()), 41 | x => fuse_ffi::MountOption::CUSTOM(x.into()), 42 | } 43 | } 44 | 45 | pub fn mount>( 46 | image: Image, 47 | tag: &str, 48 | mountpoint: &Path, 49 | options: &[T], 50 | init_notify: Option, 51 | manifest_verity: Option<&[u8]>, 52 | ) -> Result<()> { 53 | let pfs = PuzzleFS::open(image, tag, manifest_verity)?; 54 | let fuse = Fuse::new(pfs, None, init_notify); 55 | fuse_ffi::mount2( 56 | fuse, 57 | mountpoint, 58 | &options 59 | .iter() 60 | .map(|option| mount_option_from_str(option.as_ref())) 61 | .collect::>(), 62 | )?; 63 | Ok(()) 64 | } 65 | 66 | pub fn spawn_mount>( 67 | image: Image, 68 | tag: &str, 69 | mountpoint: &Path, 70 | options: &[T], 71 | init_notify: Option, 72 | sender: Option>, 73 | manifest_verity: Option<&[u8]>, 74 | ) -> Result { 75 | let pfs = PuzzleFS::open(image, tag, manifest_verity)?; 76 | let fuse = Fuse::new(pfs, sender, init_notify); 77 | Ok(fuse_ffi::spawn_mount2( 78 | fuse, 79 | mountpoint, 80 | &options 81 | .iter() 82 | .map(|option| mount_option_from_str(option.as_ref())) 83 | .collect::>(), 84 | )?) 85 | } 86 | -------------------------------------------------------------------------------- /puzzlefs-lib/src/reader/fuse.rs: -------------------------------------------------------------------------------- 1 | use log::{debug, warn}; 2 | use os_pipe::PipeWriter; 3 | use std::ffi::CString; 4 | use std::ffi::OsStr; 5 | use std::ffi::OsString; 6 | use std::fs; 7 | use std::fs::OpenOptions; 8 | use std::io::Write; 9 | use std::os::raw::c_int; 10 | use std::os::unix::ffi::OsStrExt; 11 | use std::os::unix::ffi::OsStringExt; 12 | use std::os::unix::fs::FileTypeExt; 13 | use std::path::{Path, PathBuf}; 14 | use std::thread; 15 | 16 | use fuser::{ 17 | FileAttr, FileType, Filesystem, KernelConfig, ReplyData, ReplyEntry, ReplyOpen, Request, 18 | TimeOrNow, 19 | }; 20 | use nix::errno::Errno; 21 | use nix::fcntl::OFlag; 22 | use std::time::{Duration, SystemTime}; 23 | 24 | use crate::format::{DirEnt, Inode, InodeMode, Result, WireFormatError}; 25 | 26 | use super::puzzlefs::{file_read, PuzzleFS}; 27 | 28 | pub enum PipeDescriptor { 29 | UnnamedPipe(PipeWriter), 30 | NamedPipe(PathBuf), 31 | } 32 | 33 | pub struct Fuse { 34 | pfs: PuzzleFS, 35 | sender: Option>, 36 | init_notify: Option, 37 | // TODO: LRU cache inodes or something. I had problems fiddling with the borrow checker for the 38 | // cache, so for now we just do each lookup every time. 39 | } 40 | 41 | fn mode_to_fuse_type(inode: &Inode) -> Result { 42 | Ok(match inode.mode { 43 | InodeMode::File { .. } => FileType::RegularFile, 44 | InodeMode::Dir { .. } => FileType::Directory, 45 | InodeMode::Fifo { .. } => FileType::NamedPipe, 46 | InodeMode::Chr { .. } => FileType::CharDevice, 47 | InodeMode::Blk { .. } => FileType::BlockDevice, 48 | InodeMode::Lnk { .. } => FileType::Symlink, 49 | InodeMode::Sock { .. } => FileType::Socket, 50 | _ => return Err(WireFormatError::from_errno(Errno::EINVAL)), 51 | }) 52 | } 53 | 54 | impl Fuse { 55 | pub fn new( 56 | pfs: PuzzleFS, 57 | sender: Option>, 58 | init_notify: Option, 59 | ) -> Fuse { 60 | Fuse { 61 | pfs, 62 | sender, 63 | init_notify, 64 | } 65 | } 66 | 67 | fn _lookup(&mut self, parent: u64, name: &OsStr) -> Result { 68 | let dir = self.pfs.find_inode(parent)?; 69 | let ino = dir.dir_lookup(name.as_bytes())?; 70 | self._getattr(ino) 71 | } 72 | 73 | fn _getattr(&mut self, ino: u64) -> Result { 74 | let ic = self.pfs.find_inode(ino)?; 75 | let kind = mode_to_fuse_type(&ic)?; 76 | let len = ic.file_len().unwrap_or(0); 77 | Ok(FileAttr { 78 | ino: ic.ino, 79 | size: len, 80 | blocks: 0, 81 | atime: SystemTime::UNIX_EPOCH, 82 | mtime: SystemTime::UNIX_EPOCH, 83 | ctime: SystemTime::UNIX_EPOCH, 84 | crtime: SystemTime::UNIX_EPOCH, 85 | kind, 86 | perm: ic.permissions, 87 | nlink: 0, 88 | uid: ic.uid, 89 | gid: ic.gid, 90 | rdev: 0, 91 | blksize: 0, 92 | flags: 0, 93 | }) 94 | } 95 | 96 | fn _open(&self, flags_i: i32, reply: ReplyOpen) { 97 | let allowed_flags = OFlag::O_RDONLY 98 | | OFlag::O_PATH 99 | | OFlag::O_NONBLOCK 100 | | OFlag::O_DIRECTORY 101 | | OFlag::O_NOFOLLOW 102 | | OFlag::O_NOATIME; 103 | let flags = OFlag::from_bits_truncate(flags_i); 104 | if !allowed_flags.contains(flags) { 105 | warn!("invalid flags {flags:?}, only allowed {allowed_flags:?}"); 106 | reply.error(Errno::EROFS as i32) 107 | } else { 108 | // stateless open for now, slower maybe 109 | reply.opened(0, flags_i.try_into().unwrap()); 110 | } 111 | } 112 | 113 | fn _read(&mut self, ino: u64, offset: u64, size: u32) -> Result> { 114 | let inode = self.pfs.find_inode(ino)?; 115 | let mut buf = vec![0_u8; size as usize]; 116 | let read = file_read( 117 | &self.pfs.oci, 118 | &inode, 119 | offset as usize, 120 | &mut buf, 121 | &self.pfs.verity_data, 122 | )?; 123 | buf.truncate(read); 124 | Ok(buf) 125 | } 126 | 127 | fn _readdir(&mut self, ino: u64, offset: i64, reply: &mut fuser::ReplyDirectory) -> Result<()> { 128 | let inode = self.pfs.find_inode(ino)?; 129 | let entries = inode.dir_entries()?; 130 | for (index, DirEnt { name, ino: ino_r }) in entries.iter().enumerate().skip(offset as usize) 131 | { 132 | let ino = *ino_r; 133 | let inode = self.pfs.find_inode(ino)?; 134 | let kind = mode_to_fuse_type(&inode)?; 135 | 136 | // if the buffer is full, let's skip the extra lookups 137 | if reply.add(ino, (index + 1) as i64, kind, OsStr::from_bytes(name)) { 138 | break; 139 | } 140 | } 141 | 142 | Ok(()) 143 | } 144 | 145 | fn _readlink(&mut self, ino: u64) -> Result { 146 | let inode = self.pfs.find_inode(ino)?; 147 | let error = WireFormatError::from_errno(Errno::EINVAL); 148 | let kind = mode_to_fuse_type(&inode)?; 149 | match kind { 150 | FileType::Symlink => inode 151 | .additional 152 | .and_then(|add| add.symlink_target.map(OsString::from_vec)) 153 | .ok_or(error), 154 | _ => Err(error), 155 | } 156 | } 157 | 158 | fn _listxattr(&mut self, ino: u64) -> Result> { 159 | let inode = self.pfs.find_inode(ino)?; 160 | let xattr_list = inode 161 | .additional 162 | .map(|add| { 163 | add.xattrs 164 | .iter() 165 | .flat_map(|x| { 166 | CString::new(x.key.as_slice()) 167 | .expect("xattr is a valid string") 168 | .as_bytes_with_nul() 169 | .to_vec() 170 | }) 171 | .collect::>() 172 | }) 173 | .unwrap_or_else(Vec::::new); 174 | 175 | Ok(xattr_list) 176 | } 177 | 178 | fn _getxattr(&mut self, ino: u64, name: &OsStr) -> Result> { 179 | let inode = self.pfs.find_inode(ino)?; 180 | inode 181 | .additional 182 | .and_then(|add| { 183 | add.xattrs 184 | .into_iter() 185 | .find(|elem| elem.key == name.as_bytes()) 186 | }) 187 | .map(|xattr| xattr.val) 188 | .ok_or_else(|| WireFormatError::from_errno(Errno::ENODATA)) 189 | } 190 | } 191 | 192 | impl Drop for Fuse { 193 | fn drop(&mut self) { 194 | // This code should be in the destroy function inside the Filesystem implementation 195 | // Unfortunately, destroy is not getting called: https://github.com/zargony/fuse-rs/issues/151 196 | // This is fixed in fuser, which we're not using right now: https://github.com/cberner/fuser/issues/153 197 | if let Some(sender) = &self.sender { 198 | sender.send(()).unwrap(); 199 | } 200 | } 201 | } 202 | 203 | impl Filesystem for Fuse { 204 | fn init( 205 | &mut self, 206 | _req: &Request<'_>, 207 | _config: &mut KernelConfig, 208 | ) -> std::result::Result<(), c_int> { 209 | if let Some(init_notify) = self.init_notify.take() { 210 | match init_notify { 211 | PipeDescriptor::UnnamedPipe(mut pipe_writer) => { 212 | if let Err(e) = pipe_writer.write_all(b"s") { 213 | warn!("unsuccessful send! {e}"); 214 | } 215 | } 216 | PipeDescriptor::NamedPipe(named_pipe) => { 217 | // since opening a pipe for writing blocks until the reading end is opened 218 | // create a new thread so the filesystem can be used even if nobody is reading from the pipe 219 | thread::spawn(move || { 220 | let md = fs::metadata(&named_pipe); 221 | match md { 222 | Err(e) => { 223 | warn!("cannot get file metadata, {e}"); 224 | return; 225 | } 226 | Ok(md) => { 227 | if !md.file_type().is_fifo() { 228 | warn!( 229 | "the provided file {} is not a fifo!", 230 | named_pipe.display() 231 | ); 232 | return; 233 | } 234 | } 235 | } 236 | let file = OpenOptions::new().write(true).open(&named_pipe); 237 | match file { 238 | Ok(mut file) => { 239 | if let Err(e) = file.write_all(b"s") { 240 | warn!("cannot write to pipe {}, {e}", named_pipe.display()); 241 | } 242 | } 243 | Err(e) => { 244 | warn!("cannot open pipe {}, {e}", named_pipe.display()); 245 | } 246 | } 247 | }); 248 | } 249 | } 250 | } 251 | Ok(()) 252 | } 253 | 254 | fn destroy(&mut self) {} 255 | fn forget(&mut self, _req: &Request<'_>, _ino: u64, _nlookup: u64) {} 256 | 257 | // puzzlefs is readonly, so we can ignore a bunch of requests 258 | fn setattr( 259 | &mut self, 260 | _req: &Request<'_>, 261 | _ino: u64, 262 | _mode: Option, 263 | _uid: Option, 264 | _gid: Option, 265 | _size: Option, 266 | _atime: Option, 267 | _mtime: Option, 268 | _ctime: Option, 269 | _fh: Option, 270 | _crtime: Option, 271 | _chgtime: Option, 272 | _bkuptime: Option, 273 | _flags: Option, 274 | reply: fuser::ReplyAttr, 275 | ) { 276 | debug!("setattr not supported!"); 277 | reply.error(Errno::EROFS as i32) 278 | } 279 | 280 | fn mknod( 281 | &mut self, 282 | _req: &Request<'_>, 283 | _parent: u64, 284 | _name: &OsStr, 285 | _mode: u32, 286 | _umask: u32, 287 | _rdev: u32, 288 | reply: ReplyEntry, 289 | ) { 290 | debug!("mknod not supported!"); 291 | reply.error(Errno::EROFS as i32) 292 | } 293 | 294 | fn mkdir( 295 | &mut self, 296 | _req: &Request<'_>, 297 | _parent: u64, 298 | _name: &OsStr, 299 | _mode: u32, 300 | _umask: u32, 301 | reply: ReplyEntry, 302 | ) { 303 | debug!("mkdir not supported!"); 304 | reply.error(Errno::EROFS as i32) 305 | } 306 | 307 | fn unlink( 308 | &mut self, 309 | _req: &Request<'_>, 310 | _parent: u64, 311 | _name: &OsStr, 312 | reply: fuser::ReplyEmpty, 313 | ) { 314 | debug!("unlink not supported!"); 315 | reply.error(Errno::EROFS as i32) 316 | } 317 | 318 | fn rmdir(&mut self, _req: &Request<'_>, _parent: u64, _name: &OsStr, reply: fuser::ReplyEmpty) { 319 | debug!("rmdir not supported!"); 320 | reply.error(Errno::EROFS as i32) 321 | } 322 | 323 | fn symlink( 324 | &mut self, 325 | _req: &Request<'_>, 326 | _parent: u64, 327 | _name: &OsStr, 328 | _link: &Path, 329 | reply: ReplyEntry, 330 | ) { 331 | debug!("symlink not supported!"); 332 | reply.error(Errno::EROFS as i32) 333 | } 334 | 335 | fn rename( 336 | &mut self, 337 | _req: &Request<'_>, 338 | _parent: u64, 339 | _name: &OsStr, 340 | _newparent: u64, 341 | _newname: &OsStr, 342 | _flags: u32, 343 | reply: fuser::ReplyEmpty, 344 | ) { 345 | debug!("rename not supported!"); 346 | reply.error(Errno::EROFS as i32) 347 | } 348 | 349 | fn link( 350 | &mut self, 351 | _req: &Request<'_>, 352 | _ino: u64, 353 | _newparent: u64, 354 | _newname: &OsStr, 355 | reply: ReplyEntry, 356 | ) { 357 | debug!("link not supported!"); 358 | reply.error(Errno::EROFS as i32) 359 | } 360 | 361 | fn write( 362 | &mut self, 363 | _req: &Request<'_>, 364 | _ino: u64, 365 | _fh: u64, 366 | _offset: i64, 367 | _data: &[u8], 368 | _write_flags: u32, 369 | _flags: i32, 370 | _lock_owner: Option, 371 | reply: fuser::ReplyWrite, 372 | ) { 373 | debug!("write not supported!"); 374 | reply.error(Errno::EROFS as i32) 375 | } 376 | 377 | fn flush( 378 | &mut self, 379 | _req: &Request<'_>, 380 | _ino: u64, 381 | _fh: u64, 382 | _lock_owner: u64, 383 | reply: fuser::ReplyEmpty, 384 | ) { 385 | debug!("flush not supported!"); 386 | reply.error(Errno::ENOSYS as i32) 387 | } 388 | 389 | fn fsync( 390 | &mut self, 391 | _req: &Request<'_>, 392 | _ino: u64, 393 | _fh: u64, 394 | _datasync: bool, 395 | reply: fuser::ReplyEmpty, 396 | ) { 397 | debug!("fsync not supported!"); 398 | reply.error(Errno::EROFS as i32) 399 | } 400 | 401 | fn fsyncdir( 402 | &mut self, 403 | _req: &Request<'_>, 404 | _ino: u64, 405 | _fh: u64, 406 | _datasync: bool, 407 | reply: fuser::ReplyEmpty, 408 | ) { 409 | debug!("fsyncdir not supported!"); 410 | reply.error(Errno::EROFS as i32) 411 | } 412 | 413 | fn setxattr( 414 | &mut self, 415 | _req: &Request<'_>, 416 | _ino: u64, 417 | _name: &OsStr, 418 | _value: &[u8], 419 | _flags: i32, 420 | _position: u32, 421 | reply: fuser::ReplyEmpty, 422 | ) { 423 | reply.error(Errno::EROFS as i32) 424 | } 425 | 426 | fn removexattr( 427 | &mut self, 428 | _req: &Request<'_>, 429 | _ino: u64, 430 | _name: &OsStr, 431 | reply: fuser::ReplyEmpty, 432 | ) { 433 | debug!("removexattr not supported!"); 434 | reply.error(Errno::EROFS as i32) 435 | } 436 | 437 | fn create( 438 | &mut self, 439 | _req: &Request<'_>, 440 | _parent: u64, 441 | _name: &OsStr, 442 | _mode: u32, 443 | _umask: u32, 444 | _flags: i32, 445 | reply: fuser::ReplyCreate, 446 | ) { 447 | debug!("create not supported!"); 448 | reply.error(Errno::EROFS as i32) 449 | } 450 | 451 | fn getlk( 452 | &mut self, 453 | _req: &Request<'_>, 454 | _ino: u64, 455 | _fh: u64, 456 | _lock_owner: u64, 457 | _start: u64, 458 | _end: u64, 459 | _typ: i32, 460 | _pid: u32, 461 | reply: fuser::ReplyLock, 462 | ) { 463 | debug!("getlk not supported!"); 464 | reply.error(Errno::EROFS as i32) 465 | } 466 | 467 | fn setlk( 468 | &mut self, 469 | _req: &Request<'_>, 470 | _ino: u64, 471 | _fh: u64, 472 | _lock_owner: u64, 473 | _start: u64, 474 | _end: u64, 475 | _typ: i32, 476 | _pid: u32, 477 | _sleep: bool, 478 | reply: fuser::ReplyEmpty, 479 | ) { 480 | debug!("setlk not supported!"); 481 | reply.error(Errno::EROFS as i32) 482 | } 483 | 484 | fn lookup(&mut self, _req: &Request<'_>, parent: u64, name: &OsStr, reply: ReplyEntry) { 485 | match self._lookup(parent, name) { 486 | Ok(attr) => { 487 | // http://libfuse.github.io/doxygen/structfuse__entry__param.html 488 | let ttl = Duration::new(u64::MAX, 0); 489 | let generation = 0; 490 | reply.entry(&ttl, &attr, generation) 491 | } 492 | Err(e) => { 493 | debug!("cannot lookup parent: {parent}, name {name:?} {e}!"); 494 | reply.error(e.to_errno()); 495 | } 496 | } 497 | } 498 | 499 | fn getattr(&mut self, _req: &Request<'_>, ino: u64, reply: fuser::ReplyAttr) { 500 | match self._getattr(ino) { 501 | Ok(attr) => { 502 | // http://libfuse.github.io/doxygen/structfuse__entry__param.html 503 | let ttl = Duration::new(u64::MAX, 0); 504 | reply.attr(&ttl, &attr) 505 | } 506 | Err(e) => { 507 | debug!("cannot getattr for ino {ino} {e}!"); 508 | reply.error(e.to_errno()) 509 | } 510 | } 511 | } 512 | 513 | fn readlink(&mut self, _req: &Request<'_>, ino: u64, reply: ReplyData) { 514 | match self._readlink(ino) { 515 | Ok(symlink) => reply.data(symlink.as_bytes()), 516 | Err(e) => { 517 | debug!("cannot readlink ino: {ino} {e}!"); 518 | reply.error(e.to_errno()) 519 | } 520 | } 521 | } 522 | 523 | fn open(&mut self, _req: &Request<'_>, _ino: u64, flags: i32, reply: ReplyOpen) { 524 | self._open(flags, reply) 525 | } 526 | 527 | fn read( 528 | &mut self, 529 | _req: &Request<'_>, 530 | ino: u64, 531 | _fh: u64, 532 | offset: i64, 533 | size: u32, 534 | _flags: i32, 535 | _lock_owner: Option, 536 | reply: ReplyData, 537 | ) { 538 | // TODO: why i64 from the fuse API here? 539 | let uoffset: u64 = offset.try_into().unwrap(); 540 | match self._read(ino, uoffset, size) { 541 | Ok(data) => reply.data(data.as_slice()), 542 | Err(e) => { 543 | debug!("cannot read ino {ino}, offset: {uoffset} {e}!"); 544 | reply.error(e.to_errno()) 545 | } 546 | } 547 | } 548 | 549 | fn release( 550 | &mut self, 551 | _req: &Request<'_>, 552 | _ino: u64, 553 | _fh: u64, 554 | _flags: i32, 555 | _lock_owner: Option, 556 | _flush: bool, 557 | reply: fuser::ReplyEmpty, 558 | ) { 559 | // TODO: purge from our cache here? dcache should save us too... 560 | reply.ok() 561 | } 562 | 563 | fn opendir(&mut self, _req: &Request<'_>, _ino: u64, flags: i32, reply: ReplyOpen) { 564 | self._open(flags, reply) 565 | } 566 | 567 | fn readdir( 568 | &mut self, 569 | _req: &Request<'_>, 570 | ino: u64, 571 | _fh: u64, 572 | offset: i64, 573 | mut reply: fuser::ReplyDirectory, 574 | ) { 575 | match self._readdir(ino, offset, &mut reply) { 576 | Ok(_) => reply.ok(), 577 | Err(e) => { 578 | debug!("cannot readdir ino: {ino}, offset {offset} {e}!"); 579 | reply.error(e.to_errno()) 580 | } 581 | } 582 | } 583 | 584 | fn releasedir( 585 | &mut self, 586 | _req: &Request<'_>, 587 | _ino: u64, 588 | _fh: u64, 589 | _flags: i32, 590 | reply: fuser::ReplyEmpty, 591 | ) { 592 | // TODO: again maybe purge from cache? 593 | reply.ok() 594 | } 595 | 596 | fn statfs(&mut self, _req: &Request<'_>, _ino: u64, reply: fuser::ReplyStatfs) { 597 | reply.statfs( 598 | 0, // blocks 599 | 0, // bfree 600 | 0, // bavail 601 | 0, // files 602 | 0, // ffree 603 | 0, // bsize 604 | 256, // namelen 605 | 0, // frsize 606 | ) 607 | } 608 | 609 | fn getxattr( 610 | &mut self, 611 | _req: &Request<'_>, 612 | ino: u64, 613 | name: &OsStr, 614 | size: u32, 615 | reply: fuser::ReplyXattr, 616 | ) { 617 | match self._getxattr(ino, name) { 618 | Ok(xattr) => { 619 | let xattr_len: u32 = xattr 620 | .len() 621 | .try_into() 622 | .expect("xattrs should not exceed u32"); 623 | if size == 0 { 624 | reply.size(xattr_len) 625 | } else if xattr_len <= size { 626 | reply.data(&xattr) 627 | } else { 628 | reply.error(Errno::ERANGE as i32) 629 | } 630 | } 631 | Err(e) => { 632 | debug!("cannot getxattr, ino: {ino}, name {name:?} {e}!"); 633 | reply.error(e.to_errno()) 634 | } 635 | } 636 | } 637 | 638 | fn listxattr(&mut self, _req: &Request<'_>, ino: u64, size: u32, reply: fuser::ReplyXattr) { 639 | match self._listxattr(ino) { 640 | Ok(xattr) => { 641 | let xattr_len: u32 = xattr 642 | .len() 643 | .try_into() 644 | .expect("xattrs should not exceed u32"); 645 | if size == 0 { 646 | reply.size(xattr_len) 647 | } else if xattr_len <= size { 648 | reply.data(&xattr) 649 | } else { 650 | reply.error(Errno::ERANGE as i32) 651 | } 652 | } 653 | Err(e) => { 654 | debug!("cannot listxattr, ino {ino}, size {size} {e}!"); 655 | reply.error(e.to_errno()) 656 | } 657 | } 658 | } 659 | 660 | fn access(&mut self, _req: &Request<'_>, _ino: u64, _mask: i32, reply: fuser::ReplyEmpty) { 661 | reply.ok() 662 | } 663 | 664 | fn bmap( 665 | &mut self, 666 | _req: &Request<'_>, 667 | _ino: u64, 668 | _blocksize: u32, 669 | _idx: u64, 670 | reply: fuser::ReplyBmap, 671 | ) { 672 | reply.error(Errno::ENOLCK as i32) 673 | } 674 | } 675 | 676 | #[cfg(test)] 677 | mod tests { 678 | use std::fs; 679 | use std::io; 680 | use std::path::Path; 681 | 682 | use sha2::{Digest, Sha256}; 683 | use tempfile::tempdir; 684 | 685 | use crate::builder::build_test_fs; 686 | use crate::oci::Image; 687 | 688 | #[test] 689 | fn test_fuse() { 690 | let dir = tempdir().unwrap(); 691 | let image = Image::new(dir.path()).unwrap(); 692 | build_test_fs(Path::new("src/builder/test/test-1"), &image, "test").unwrap(); 693 | let mountpoint = tempdir().unwrap(); 694 | let _bg = crate::reader::spawn_mount::<&str>( 695 | image, 696 | "test", 697 | Path::new(mountpoint.path()), 698 | &[], 699 | None, 700 | None, 701 | None, 702 | ) 703 | .unwrap(); 704 | let ents = fs::read_dir(mountpoint.path()) 705 | .unwrap() 706 | .collect::>>() 707 | .unwrap(); 708 | assert_eq!(ents.len(), 1); 709 | assert_eq!( 710 | ents[0].path().strip_prefix(mountpoint.path()).unwrap(), 711 | Path::new("SekienAkashita.jpg") 712 | ); 713 | 714 | let mut hasher = Sha256::new(); 715 | let mut f = fs::File::open(ents[0].path()).unwrap(); 716 | io::copy(&mut f, &mut hasher).unwrap(); 717 | let digest = hasher.finalize(); 718 | const FILE_DIGEST: &str = 719 | "d9e749d9367fc908876749d6502eb212fee88c9a94892fb07da5ef3ba8bc39ed"; 720 | assert_eq!(hex::encode(digest), FILE_DIGEST); 721 | } 722 | } 723 | -------------------------------------------------------------------------------- /puzzlefs-lib/src/reader/puzzlefs.rs: -------------------------------------------------------------------------------- 1 | use nix::errno::Errno; 2 | use std::backtrace::Backtrace; 3 | use std::cmp::min; 4 | use std::io; 5 | use std::os::unix::ffi::OsStrExt; 6 | use std::path::{Component, Path}; 7 | use std::sync::Arc; 8 | 9 | use crate::format::{ 10 | DirEnt, Ino, Inode, InodeMode, Result, RootfsReader, VerityData, WireFormatError, 11 | }; 12 | use crate::oci::Image; 13 | 14 | pub const PUZZLEFS_IMAGE_MANIFEST_VERSION: u64 = 3; 15 | 16 | pub(crate) fn file_read( 17 | oci: &Image, 18 | inode: &Inode, 19 | offset: usize, 20 | data: &mut [u8], 21 | verity_data: &Option, 22 | ) -> Result { 23 | let chunks = match &inode.mode { 24 | InodeMode::File { chunks } => chunks, 25 | _ => return Err(WireFormatError::from_errno(Errno::ENOTDIR)), 26 | }; 27 | 28 | // TODO: fix all this casting... 29 | let end = offset + data.len(); 30 | 31 | let mut file_offset = 0; 32 | let mut buf_offset = 0; 33 | for chunk in chunks { 34 | // have we read enough? 35 | if file_offset > end { 36 | break; 37 | } 38 | 39 | // should we skip this chunk? 40 | if file_offset + (chunk.len as usize) < offset { 41 | file_offset += chunk.len as usize; 42 | continue; 43 | } 44 | 45 | let addl_offset = offset.saturating_sub(file_offset); 46 | 47 | // ok, need to read this chunk; how much? 48 | let left_in_buf = data.len() - buf_offset; 49 | let to_read = min(left_in_buf, chunk.len as usize - addl_offset); 50 | 51 | let start = buf_offset; 52 | let finish = start + to_read; 53 | file_offset += addl_offset; 54 | 55 | // how many did we actually read? 56 | let n = oci.fill_from_chunk( 57 | chunk.blob, 58 | addl_offset as u64, 59 | &mut data[start..finish], 60 | verity_data, 61 | )?; 62 | file_offset += n; 63 | buf_offset += n; 64 | } 65 | 66 | // discard any extra if we hit EOF 67 | Ok(buf_offset) 68 | } 69 | 70 | pub struct PuzzleFS { 71 | pub oci: Arc, 72 | rootfs: RootfsReader, 73 | pub verity_data: Option, 74 | pub manifest_verity: Option>, 75 | } 76 | 77 | impl PuzzleFS { 78 | pub fn open(oci: Image, tag: &str, manifest_verity: Option<&[u8]>) -> Result { 79 | let rootfs = oci.open_rootfs_blob(tag, manifest_verity)?; 80 | 81 | if rootfs.get_manifest_version()? != PUZZLEFS_IMAGE_MANIFEST_VERSION { 82 | return Err(WireFormatError::InvalidImageVersion( 83 | format!( 84 | "got {}, expected {}", 85 | rootfs.get_manifest_version()?, 86 | PUZZLEFS_IMAGE_MANIFEST_VERSION 87 | ), 88 | Backtrace::capture(), 89 | )); 90 | } 91 | 92 | let verity_data = if manifest_verity.is_some() { 93 | Some(rootfs.get_verity_data()?) 94 | } else { 95 | None 96 | }; 97 | 98 | Ok(PuzzleFS { 99 | oci: Arc::new(oci), 100 | rootfs, 101 | verity_data, 102 | manifest_verity: manifest_verity.map(|e| e.to_vec()), 103 | }) 104 | } 105 | 106 | pub fn find_inode(&self, ino: u64) -> Result { 107 | self.rootfs.find_inode(ino) 108 | } 109 | 110 | // lookup performs a path-based lookup in this puzzlefs 111 | pub fn lookup(&self, p: &Path) -> Result> { 112 | let components = p.components().collect::>>(); 113 | if !matches!(components[0], Component::RootDir) { 114 | return Err(WireFormatError::from_errno(Errno::EINVAL)); 115 | } 116 | 117 | let mut cur = self.find_inode(1)?; 118 | 119 | // TODO: better path resolution with .. and such? 120 | for comp in components.into_iter().skip(1) { 121 | match comp { 122 | Component::Normal(p) => { 123 | if let InodeMode::Dir { dir_list } = cur.mode { 124 | if let Some(DirEnt { ino, name: _ }) = dir_list 125 | .entries 126 | .into_iter() 127 | .find(|dir_entry| dir_entry.name == p.as_bytes()) 128 | { 129 | cur = self.find_inode(ino)?; 130 | continue; 131 | } 132 | } 133 | return Ok(None); 134 | } 135 | _ => return Err(WireFormatError::from_errno(Errno::EINVAL)), 136 | } 137 | } 138 | 139 | Ok(Some(cur)) 140 | } 141 | 142 | pub fn max_inode(&self) -> Result { 143 | self.rootfs.max_inode() 144 | } 145 | } 146 | 147 | pub struct FileReader<'a> { 148 | oci: &'a Image, 149 | inode: &'a Inode, 150 | offset: usize, 151 | len: usize, 152 | } 153 | 154 | impl<'a> FileReader<'a> { 155 | pub fn new(oci: &'a Image, inode: &'a Inode) -> Result> { 156 | let len = inode.file_len()? as usize; 157 | Ok(FileReader { 158 | oci, 159 | inode, 160 | offset: 0, 161 | len, 162 | }) 163 | } 164 | } 165 | 166 | impl io::Read for FileReader<'_> { 167 | fn read(&mut self, buf: &mut [u8]) -> io::Result { 168 | let to_read = min(self.len - self.offset, buf.len()); 169 | if to_read == 0 { 170 | return Ok(0); 171 | } 172 | 173 | let read = file_read( 174 | self.oci, 175 | self.inode, 176 | self.offset, 177 | &mut buf[0..to_read], 178 | &None, 179 | ) 180 | .map_err(|e| io::Error::from_raw_os_error(e.to_errno()))?; 181 | self.offset += read; 182 | Ok(read) 183 | } 184 | } 185 | 186 | #[cfg(test)] 187 | mod tests { 188 | use sha2::{Digest, Sha256}; 189 | use tempfile::tempdir; 190 | 191 | use crate::builder::build_test_fs; 192 | 193 | use super::*; 194 | 195 | #[test] 196 | fn test_file_reader() { 197 | // make ourselves a test image 198 | let oci_dir = tempdir().unwrap(); 199 | let image = Image::new(oci_dir.path()).unwrap(); 200 | build_test_fs(Path::new("src/builder/test/test-1"), &image, "test").unwrap(); 201 | let pfs = PuzzleFS::open(image, "test", None).unwrap(); 202 | 203 | let inode = pfs.find_inode(2).unwrap(); 204 | let mut reader = FileReader::new(&pfs.oci, &inode).unwrap(); 205 | let mut hasher = Sha256::new(); 206 | 207 | assert_eq!(io::copy(&mut reader, &mut hasher).unwrap(), 109466); 208 | let digest = hasher.finalize(); 209 | assert_eq!( 210 | hex::encode(digest), 211 | "d9e749d9367fc908876749d6502eb212fee88c9a94892fb07da5ef3ba8bc39ed" 212 | ); 213 | assert_eq!(pfs.max_inode().unwrap(), 2); 214 | } 215 | 216 | #[test] 217 | fn test_path_lookup() { 218 | let oci_dir = tempdir().unwrap(); 219 | let image = Image::new(oci_dir.path()).unwrap(); 220 | build_test_fs(Path::new("src/builder/test/test-1"), &image, "test").unwrap(); 221 | let pfs = PuzzleFS::open(image, "test", None).unwrap(); 222 | 223 | assert_eq!(pfs.lookup(Path::new("/")).unwrap().unwrap().ino, 1); 224 | assert_eq!( 225 | pfs.lookup(Path::new("/SekienAkashita.jpg")) 226 | .unwrap() 227 | .unwrap() 228 | .ino, 229 | 2 230 | ); 231 | assert!(pfs.lookup(Path::new("/notexist")).unwrap().is_none()); 232 | pfs.lookup(Path::new("./invalid-path")).unwrap_err(); 233 | pfs.lookup(Path::new("invalid-path")).unwrap_err(); 234 | } 235 | } 236 | -------------------------------------------------------------------------------- /puzzlefs-lib/src/reader/walk.rs: -------------------------------------------------------------------------------- 1 | use std::collections::VecDeque; 2 | use std::path::PathBuf; 3 | 4 | use crate::format::{Inode, InodeMode, Result}; 5 | use crate::oci::Image; 6 | use std::ffi::OsStr; 7 | use std::os::unix::ffi::OsStrExt; 8 | use std::sync::Arc; 9 | 10 | use super::puzzlefs::{FileReader, PuzzleFS}; 11 | 12 | /// A in iterator over a PuzzleFS filesystem. This iterates breadth first, since file content is 13 | /// stored that way in a puzzlefs image so it'll be faster reading actual content if clients want 14 | /// to do that. 15 | pub struct WalkPuzzleFS<'a> { 16 | pfs: &'a mut PuzzleFS, 17 | q: VecDeque, 18 | } 19 | 20 | impl<'a> WalkPuzzleFS<'a> { 21 | pub fn walk(pfs: &'a mut PuzzleFS) -> Result> { 22 | let mut q = VecDeque::new(); 23 | 24 | let inode = pfs.find_inode(1)?; // root inode number 25 | let de = DirEntry { 26 | oci: Arc::clone(&pfs.oci), 27 | path: PathBuf::from("/"), 28 | inode, 29 | }; 30 | q.push_back(de); 31 | Ok(WalkPuzzleFS { pfs, q }) 32 | } 33 | 34 | fn add_dir_entries(&mut self, dir: &DirEntry) -> Result<()> { 35 | if let InodeMode::Dir { ref dir_list } = dir.inode.mode { 36 | for entry in &dir_list.entries { 37 | let inode = self.pfs.find_inode(entry.ino)?; 38 | let path = dir.path.join(OsStr::from_bytes(&entry.name)); 39 | self.q.push_back(DirEntry { 40 | oci: Arc::clone(&self.pfs.oci), 41 | path, 42 | inode, 43 | }) 44 | } 45 | }; 46 | 47 | Ok(()) 48 | } 49 | } 50 | 51 | impl Iterator for WalkPuzzleFS<'_> { 52 | type Item = Result; 53 | 54 | fn next(&mut self) -> Option { 55 | let de = self.q.pop_front()?; 56 | Some(self.add_dir_entries(&de).map(|_| de)) 57 | } 58 | } 59 | 60 | pub struct DirEntry { 61 | oci: Arc, 62 | pub path: PathBuf, 63 | pub inode: Inode, 64 | } 65 | 66 | impl DirEntry { 67 | /// Opens this DirEntry if it is a file. 68 | pub fn open(&self) -> Result> { 69 | FileReader::new(&self.oci, &self.inode) 70 | } 71 | } 72 | 73 | #[cfg(test)] 74 | mod tests { 75 | use tempfile::{tempdir, TempDir}; 76 | 77 | use std::fs; 78 | use std::path::Path; 79 | 80 | use crate::builder::build_test_fs; 81 | 82 | use super::*; 83 | 84 | #[test] 85 | fn test_walk() { 86 | // make ourselves a test image 87 | let oci_dir = tempdir().unwrap(); 88 | let image = Image::new(oci_dir.path()).unwrap(); 89 | build_test_fs(Path::new("src/builder/test/test-1"), &image, "test").unwrap(); 90 | let mut pfs = PuzzleFS::open(image, "test", None).unwrap(); 91 | 92 | let mut walker = WalkPuzzleFS::walk(&mut pfs).unwrap(); 93 | 94 | let root = walker.next().unwrap().unwrap(); 95 | assert_eq!(root.path.to_string_lossy(), "/"); 96 | assert_eq!(root.inode.ino, 1); 97 | assert_eq!(root.inode.dir_entries().unwrap().len(), 1); 98 | 99 | let jpg_file = walker.next().unwrap().unwrap(); 100 | assert_eq!(jpg_file.path.to_string_lossy(), "/SekienAkashita.jpg"); 101 | assert_eq!(jpg_file.inode.ino, 2); 102 | assert_eq!(jpg_file.inode.file_len().unwrap(), 109466); 103 | } 104 | 105 | #[test] 106 | fn test_xattrs() { 107 | // since walk provides us a nice API, we test some other basics of the builder here too. 108 | let dir = TempDir::new_in(".").unwrap(); 109 | let oci_dir = dir.path().join("oci"); 110 | let image = Image::new(&oci_dir).unwrap(); 111 | let rootfs = dir.path().join("rootfs"); 112 | 113 | let foo = rootfs.join("foo"); 114 | let bar = rootfs.join("bar"); 115 | 116 | // test directory, file types. we should probably also test "other" types, but on fifos and 117 | // symlinks on linux xattrs aren't allowed, so we just punt for now. maybe when 5.8 is more 118 | // prevalent, we can use mknod c 0 0? 119 | fs::create_dir_all(&foo).unwrap(); 120 | fs::write(&bar, b"bar").unwrap(); 121 | 122 | // set some xattrs 123 | for f in [&foo, &bar] { 124 | xattr::set(f, "user.meshuggah", b"rocks").unwrap(); 125 | } 126 | 127 | build_test_fs(&rootfs, &image, "test").unwrap(); 128 | 129 | let mut pfs = PuzzleFS::open(image, "test", None).unwrap(); 130 | 131 | let mut walker = WalkPuzzleFS::walk(&mut pfs).unwrap(); 132 | 133 | let root = walker.next().unwrap().unwrap(); 134 | assert_eq!(root.path.to_string_lossy(), "/"); 135 | assert_eq!(root.inode.ino, 1); 136 | assert_eq!(root.inode.dir_entries().unwrap().len(), 2); 137 | 138 | fn check_inode_xattrs(inode: Inode) { 139 | let additional = inode.additional.unwrap(); 140 | assert_eq!(additional.xattrs[0].key, b"user.meshuggah"); 141 | assert_eq!(additional.xattrs[0].val, b"rocks"); 142 | } 143 | 144 | let bar_i = walker.next().unwrap().unwrap(); 145 | assert_eq!(bar_i.path.to_string_lossy(), "/bar"); 146 | check_inode_xattrs(bar_i.inode); 147 | 148 | let foo_i = walker.next().unwrap().unwrap(); 149 | assert_eq!(foo_i.path.to_string_lossy(), "/foo"); 150 | check_inode_xattrs(foo_i.inode); 151 | } 152 | } 153 | -------------------------------------------------------------------------------- /rust-toolchain.toml: -------------------------------------------------------------------------------- 1 | [toolchain] 2 | channel="nightly" 3 | --------------------------------------------------------------------------------