├── .gitignore ├── Makefile ├── README.md ├── ostree.h ├── ostreefs.cli └── otfs.c /.gitignore: -------------------------------------------------------------------------------- 1 | *.o 2 | *.ko 3 | *.mod.* 4 | *.mod 5 | Module.symvers 6 | .*.cmd 7 | modules.order 8 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: GPL-2.0-only 2 | 3 | # make -C $KERNEL_SOURCE modules M=$PWD && make -C $KERNEL_SOURCE modules_install M=$PWD 4 | 5 | obj-m += ostreefs.o 6 | 7 | KBUILD_CFLAGS += 8 | 9 | ostreefs-objs += otfs.o 10 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # NOTICE: 2 | # Work on ostreefs is now stopped, in favour of https://github.com/giuseppe/composefs/. 3 | 4 | 5 | Ostreefs 6 | ======== 7 | 8 | ostreefs is a kernel filesystem that mounts ostree commits directly 9 | from an on-disk (bare format) ostree repository, with the goal of 10 | continuous verification of the ostree content. In other words, protect 11 | against any changes to the data in the filesystem as specified by 12 | the ostree commit id either online or offline. 13 | 14 | ## Short background on ostree repos 15 | 16 | A normal booted ostree system contains two parts, a repository and a 17 | checkout of a commit from the repository. 18 | 19 | The repository has is a directory of "object files" which are indexed 20 | by the sha256 of their content and metadata. Some object files 21 | describe directories, and some regular files. There are also commit 22 | objects that contain a reference to the root directory. The regular 23 | file objects look just like the real files on disk (apart from the 24 | name), but the others are in a custom file format. 25 | 26 | To be able to use such a repo ostree makes a checkout of the 27 | commit. This converts the directory metadata files to actual regular 28 | directories the kernel can understand, with the regular files in them 29 | being hard-links to the files in the repo. 30 | 31 | All the object files in the repository are verified by comparing the 32 | actual checksum with the expected one during download. In addition 33 | the commit object can be verified against a gpg signature. 34 | 35 | When the system is booted, the checkout for the commit we're booting 36 | is bind-mounted read-only as the root filesystem. 37 | 38 | ## Verification issues with ostree 39 | 40 | Once an ostree commit has been downloaded and checked out on disk, we 41 | never do any further verifications. This means that an attacker 42 | changing or adding files in the checkout (or accidental changes) will 43 | not be detected. 44 | 45 | It is possible to enable fs-verity for the files in the repository, 46 | which will tell the kernel to make the repo files immutable and all 47 | further reads from them will be verified against the stored 48 | checksums. However, this does not prohibit adding or replacing files. 49 | 50 | So, while the verification at deploy is nice, we would like to 51 | complete this with *continuous* verification, where every single 52 | I/O operation is verified against the ostree commit digest before 53 | being used. 54 | 55 | ## Introducing ostreefs 56 | 57 | Instead of using a checkout of the commit and hardlinks to the 58 | repostory we use a custom kernel-based filesystem somewhat similar to 59 | overlayfs. It mounts the commit directly from the ostree repo, and 60 | ensures that directory metadata is immutable and verified before used. 61 | 62 | For example, suppose you have an ostree repo in /some/repo, and it 63 | contains a commit with id 64 | `f163640407d292e262442ab76af6eca4e2722d54c081c7be6e005114a57057dd`. Then 65 | you can mount this at `/mnt/ostree` by specifing the commit and the 66 | object directory from the repo as mount options, like this: 67 | 68 | ``` 69 | # mount ostreefs -t ostreefs -o commit=f163640407d292e262442ab76af6eca4e2722d54c081c7be6e005114a57057dd,objectdir=/some/repo/repo/objects /mnt/ostree 70 | # ls -l /mnt/ostree 71 | total 0 72 | lrwxrwxrwx. 1 root root 0 Jan 1 1970 bin -> usr/bin 73 | drwxr-xr-x. 1 root root 4096 Jan 1 1970 boot 74 | drwxr-xr-x. 1 root root 4096 Jan 1 1970 dev 75 | lrwxrwxrwx. 1 root root 0 Jan 1 1970 home -> var/home 76 | lrwxrwxrwx. 1 root root 0 Jan 1 1970 lib -> usr/lib 77 | lrwxrwxrwx. 1 root root 0 Jan 1 1970 lib64 -> usr/lib64 78 | lrwxrwxrwx. 1 root root 0 Jan 1 1970 media -> run/media 79 | lrwxrwxrwx. 1 root root 0 Jan 1 1970 mnt -> var/mnt 80 | lrwxrwxrwx. 1 root root 0 Jan 1 1970 opt -> var/opt 81 | lrwxrwxrwx. 1 root root 0 Jan 1 1970 ostree -> sysroot/ostree 82 | drwxr-xr-x. 1 root root 4096 Jan 1 1970 proc 83 | lrwxrwxrwx. 1 root root 0 Jan 1 1970 root -> var/roothome 84 | drwxr-xr-x. 1 root root 4096 Jan 1 1970 run 85 | lrwxrwxrwx. 1 root root 0 Jan 1 1970 sbin -> usr/sbin 86 | lrwxrwxrwx. 1 root root 0 Jan 1 1970 srv -> var/srv 87 | drwxr-xr-x. 1 root root 4096 Jan 1 1970 sys 88 | drwxr-xr-x. 1 root root 4096 Jan 1 1970 sysroot 89 | drwxrwxrwt. 1 root root 4096 Jan 1 1970 tmp 90 | drwxr-xr-x. 1 root root 4096 Jan 1 1970 usr 91 | drwxr-xr-x. 1 root root 4096 Jan 1 1970 var 92 | ``` 93 | 94 | By default ostreefs assumes ostree repos are in `bare` mode, but if 95 | you pass `repomode=bare-user` it also works with `bare-user` repositories. 96 | 97 | 98 | ## Building ostreefs 99 | 100 | Before using ostreefs you must build the ostreefs kernel module against the kernel sources 101 | matching the version you are running. On Fedora and CentOS this is available in the 102 | kernel-devel package as /usr/src/kernels/$kernelversion, other distributions may 103 | have a different location. 104 | 105 | To build and load the ostreefs module, then run: 106 | 107 | ``` 108 | # make -C /usr/src/kernels/$(uname -r) modules M=$PWD 109 | # insmod ostreefs.ko 110 | ``` 111 | 112 | ## SELinux issues 113 | 114 | Ostreefs support xattrs natively, and selinux normally uses xattrs to 115 | store selinux file contexts. However, this only works if the local 116 | policy allows a particular filesystem type to use xattrs for selinux, 117 | and the default is to not allow it. So, until the default selinux 118 | contexts supports ostreefs, you need to manually install a local 119 | policy for this. 120 | 121 | To enable ostreefs selinux support, run: 122 | 123 | ``` 124 | # semodule -i ostreefs.cli 125 | ``` 126 | 127 | And, to later revert it, run: 128 | 129 | ``` 130 | # semodule -r ostreefs 131 | ``` 132 | 133 | ## Verification status 134 | 135 | Ostreefs currently verifies the sha256 checksum of the commit, 136 | dirmeta, dirtree and symbolic link object before using them, and keep 137 | the data in kernel memory after verification. This means that a 138 | any such metadata is guaranteed to correctly match what was in the 139 | original commit at all times. 140 | 141 | However, content and metadata for regular files is a bit more complex. 142 | Verifying file contents requires a full sha256 checksum of the file 143 | contents, which is costly. And even if we do the checksum calculation 144 | once, there is no guarantees that the backing file isn't changed by 145 | some other process while ostreefs is using it. 146 | 147 | There are three modes for file verification today, controlled by 148 | the `fileverify` mount option 149 | 150 | * `none`: (default) No verification of file content 151 | * `once`: The sha256 of the file metadata (uid, gid, mode, xattrs) and content 152 | is computed at inode lookup time and checked against the object id. 153 | * `full`: Same behaviour as `once`, however we verify that the backing file object 154 | has fs-verity enabled. This guarantees it is immutable, so the initial check is 155 | valid over time. 156 | 157 | In the future we would like to have a mode that can rely on the actual fs-verify 158 | checksum, so that we can avoid having to run a sha256 over the file contents. 159 | -------------------------------------------------------------------------------- /ostree.h: -------------------------------------------------------------------------------- 1 | #define ALIGN_TO(_offset, _align_to) ((_offset + _align_to - 1) & ~(size_t)(_align_to - 1)) 2 | #define STRUCT_MEMBER_P(struct_p, struct_offset) ((void *) ((u8*) (struct_p) + (size_t) (struct_offset))) 3 | #define STRUCT_MEMBER(member_type, struct_p, struct_offset) (*(member_type*) STRUCT_MEMBER_P ((struct_p), (struct_offset))) 4 | 5 | #define OSTREE_SHA256_DIGEST_LEN 32 6 | #define OSTREE_SHA256_STRING_LEN 64 7 | 8 | struct OtXAttrData { 9 | const char *name; 10 | size_t size; 11 | char *value; 12 | }; 13 | 14 | typedef struct { 15 | const u8 *base; 16 | size_t size; 17 | } OtRef; 18 | 19 | __pure static inline u64 ot_ref_read_unaligned_le(const u8 *bytes, u32 size) 20 | { 21 | if (size >= 4) { 22 | if (size == 8) 23 | return get_unaligned_le64(bytes); 24 | else 25 | return (u64)get_unaligned_le32(bytes); 26 | } else { 27 | if (size == 2) 28 | return (u64)get_unaligned_le16(bytes); 29 | else 30 | return (u64)bytes[0]; 31 | } 32 | } 33 | 34 | static inline void ot_ref_write_unaligned_le(u8 *bytes, u32 size, u64 value) 35 | { 36 | if (size >= 4) { 37 | if (size == 8) 38 | put_unaligned_le64(value, bytes); 39 | else 40 | put_unaligned_le32((u32)value, bytes); 41 | } else { 42 | if (size == 2) 43 | put_unaligned_le16((u16)value, bytes); 44 | else 45 | bytes[0] = (u8)value; 46 | } 47 | } 48 | 49 | __attribute_const__ static inline u32 ot_ref_get_offset_size(size_t size) 50 | { 51 | if (size > U16_MAX) { 52 | if (size > U32_MAX) 53 | return 8; 54 | else 55 | return 4; 56 | } else { 57 | if (size > U8_MAX) 58 | return 2; 59 | else 60 | return 1; 61 | } 62 | } 63 | 64 | static inline size_t ot_variant_total_size(size_t body_size, size_t num_offsets) 65 | { 66 | if (body_size + 1 * num_offsets <= U8_MAX) 67 | return body_size + 1 * num_offsets; 68 | 69 | if (body_size + 2 * num_offsets <= U16_MAX) 70 | return body_size + 2 * num_offsets; 71 | 72 | if (body_size + 4 * num_offsets <= U32_MAX) 73 | return body_size + 4 * num_offsets; 74 | 75 | return body_size + 8 * num_offsets; 76 | } 77 | 78 | __pure static inline u64 79 | ot_ref_read_frame_offset(OtRef ref, u32 offset_size, u32 index) 80 | { 81 | size_t offset_from_end = offset_size * (index + 1); 82 | return ot_ref_read_unaligned_le(ref.base + ref.size - offset_from_end, offset_size); 83 | } 84 | 85 | static inline size_t ot_arrayof_nonfixed_get_length(OtRef v) 86 | { 87 | if (v.size == 0) { 88 | return 0; 89 | } else { 90 | u32 offset_size = ot_ref_get_offset_size(v.size); 91 | size_t last_end = ot_ref_read_frame_offset(v, offset_size, 0); 92 | size_t offsets_array_size; 93 | if (last_end > v.size) 94 | return 0; 95 | offsets_array_size = v.size - last_end; 96 | if (offsets_array_size % offset_size != 0) 97 | return 0; 98 | return offsets_array_size / offset_size; 99 | } 100 | } 101 | 102 | static inline bool ot_arrayof_nonfixed_get_at(OtRef v, size_t index, size_t *start_out, size_t *end_out) 103 | { 104 | u32 offset_size = ot_ref_get_offset_size(v.size); 105 | size_t last_end = ot_ref_read_frame_offset(v, offset_size, 0); 106 | size_t len = (v.size - last_end) / offset_size; 107 | size_t start = (index > 0) ? ALIGN_TO(ot_ref_read_frame_offset(v, offset_size, len - index), 1) : 0; 108 | size_t end = ot_ref_read_frame_offset(v, offset_size, len - index - 1); 109 | 110 | if (start > end || end > last_end) 111 | return false; 112 | 113 | *start_out = start; 114 | *end_out = end; 115 | return true; 116 | } 117 | 118 | 119 | /************** OtChecksum *******************/ 120 | #define OT_CHECKSUM_TYPESTRING "ay" 121 | 122 | typedef OtRef OtChecksumRef; 123 | 124 | static inline bool ot_checksum_from_data(const u8 * data, size_t size, bool allow_empty, OtChecksumRef *out) 125 | { 126 | if (size != OSTREE_SHA256_DIGEST_LEN && 127 | (size != 0 || !allow_empty)) 128 | return false; 129 | 130 | *out = (OtChecksumRef) { data, size }; 131 | return true; 132 | } 133 | 134 | static inline const u8 *ot_checksum_peek(OtChecksumRef v) 135 | { 136 | return (const u8 *)v.base; 137 | } 138 | 139 | static inline void sha256_digest_to_string(const u8 *csum, char *buf) 140 | { 141 | static const char hexchars[] = "0123456789abcdef"; 142 | u32 i, j; 143 | 144 | for (i = 0, j = 0; i < OSTREE_SHA256_DIGEST_LEN; i++, j += 2) { 145 | u8 byte = csum[i]; 146 | buf[j] = hexchars[byte >> 4]; 147 | buf[j+1] = hexchars[byte & 0xF]; 148 | } 149 | buf[j] = '\0'; 150 | } 151 | 152 | static inline void ot_checksum_to_string(OtChecksumRef v, char *buf) 153 | { 154 | sha256_digest_to_string(ot_checksum_peek(v), buf); 155 | } 156 | 157 | 158 | /************** OtCommit *******************/ 159 | #define OT_COMMIT_TYPESTRING "(a{sv}aya(say)sstayay)" 160 | 161 | typedef OtRef OtCommitRef; 162 | 163 | static inline bool ot_commit_from_data(const u8 *data, size_t size, OtCommitRef *out) 164 | { 165 | u32 offset_size = ot_ref_get_offset_size(size); 166 | 167 | if (size < 8 + offset_size * 6) 168 | return false; 169 | 170 | *out = (OtCommitRef) { data, size }; 171 | return true; 172 | } 173 | 174 | static inline bool ot_commit_get_root_contents(OtCommitRef v, OtChecksumRef *out) 175 | { 176 | u32 offset_size = ot_ref_get_offset_size(v.size); 177 | size_t last_end = ot_ref_read_frame_offset(v, offset_size, 4); 178 | size_t start = ALIGN_TO(last_end, 8) + 8; 179 | size_t end = ot_ref_read_frame_offset(v, offset_size, 5); 180 | 181 | if (start > end || end > v.size) 182 | return false; 183 | 184 | return ot_checksum_from_data(STRUCT_MEMBER_P(v.base, start), end - start, false, out); 185 | } 186 | 187 | static inline bool ot_commit_get_root_metadata(OtCommitRef v, OtChecksumRef *out) 188 | { 189 | u32 offset_size = ot_ref_get_offset_size(v.size); 190 | size_t start = ot_ref_read_frame_offset(v, offset_size, 5); 191 | size_t end = v.size - offset_size * 6; 192 | 193 | if (start > end || end > v.size) 194 | return false; 195 | 196 | return ot_checksum_from_data(STRUCT_MEMBER_P(v.base, start), end - start, false, out); 197 | } 198 | 199 | /************** OtXattr *******************/ 200 | #define OT_XATTR_TYPESTRING "(ayay)" 201 | 202 | typedef OtRef OtXattrRef; 203 | 204 | static inline bool ot_xattr_from_data(const u8 *data, size_t size, OtXattrRef *out) 205 | { 206 | u32 offset_size = ot_ref_get_offset_size(size); 207 | 208 | if (size < offset_size * 1) 209 | return false; 210 | 211 | *out = (OtXattrRef) { data, size }; 212 | return true; 213 | } 214 | 215 | static inline const u8 *ot_xattr_get_name(OtXattrRef v, size_t *len_out) 216 | { 217 | u32 offset_size = ot_ref_get_offset_size(v.size); 218 | size_t start = 0; 219 | size_t end = ot_ref_read_frame_offset(v, offset_size, 0); 220 | 221 | if (start > end || end > v.size) 222 | return NULL; 223 | 224 | *len_out = end - start; 225 | return STRUCT_MEMBER_P(v.base, start); 226 | } 227 | 228 | static inline const u8 *ot_xattr_get_value(OtXattrRef v, size_t *len) 229 | { 230 | u32 offset_size = ot_ref_get_offset_size(v.size); 231 | size_t last_end = ot_ref_read_frame_offset(v, offset_size, 0); 232 | size_t start = last_end; 233 | size_t end = v.size - offset_size * 1; 234 | 235 | if (start > end || end > v.size) 236 | return NULL; 237 | 238 | *len = end - start; 239 | return STRUCT_MEMBER_P(v.base, start); 240 | } 241 | 242 | static inline size_t ot_xattr_compute_size(const char *name, size_t data_size) 243 | { 244 | size_t name_len = strlen(name) + 1; 245 | return ot_variant_total_size(name_len + data_size, 1); 246 | } 247 | 248 | static inline size_t ot_xattr_serialize(u8 *buf, const char *name, const u8 *data, size_t data_size) 249 | { 250 | size_t name_len = strlen(name) + 1; 251 | size_t size = ot_xattr_compute_size(name, data_size); 252 | u32 offset_size = ot_ref_get_offset_size(size); 253 | 254 | memcpy(buf, name, name_len); 255 | memcpy(buf + name_len, data, data_size); 256 | ot_ref_write_unaligned_le(buf + name_len + data_size, offset_size, name_len); 257 | 258 | return size; 259 | } 260 | 261 | /************** OtArrayofXattr *******************/ 262 | #define OT_ARRAYOF_XATTR_TYPESTRING "a(ayay)" 263 | 264 | typedef OtRef OtArrayofXattrRef; 265 | 266 | static inline bool ot_arrayof_xattr_from_data(const u8 *data, size_t size, OtArrayofXattrRef *out) 267 | { 268 | *out = (OtArrayofXattrRef) { data, size }; 269 | return true; 270 | } 271 | 272 | static inline size_t ot_arrayof_xattr_get_length(OtArrayofXattrRef v) 273 | { 274 | return ot_arrayof_nonfixed_get_length(v); 275 | } 276 | 277 | static inline bool ot_arrayof_xattr_get_at(OtArrayofXattrRef v, size_t index, OtXattrRef *out) 278 | { 279 | size_t start, end; 280 | 281 | if (!ot_arrayof_nonfixed_get_at(v, index, &start, &end)) 282 | return false; 283 | 284 | return ot_xattr_from_data(((const u8 *)v.base) + start, end - start, out); 285 | } 286 | 287 | /************** OtDirMeta *******************/ 288 | #define OT_DIR_META_TYPESTRING "(uuua(ayay))" 289 | 290 | /* Note: This is also used for the header of file content. */ 291 | 292 | typedef OtRef OtDirMetaRef; 293 | 294 | static inline bool ot_dir_meta_from_data(const u8 *data, size_t size, OtDirMetaRef *out) 295 | { 296 | if (size < 12) 297 | return false; 298 | 299 | *out = (OtDirMetaRef) { data, size }; 300 | return true; 301 | } 302 | 303 | static inline u32 ot_dir_meta_get_uid(OtDirMetaRef v) 304 | { 305 | return be32_to_cpu(STRUCT_MEMBER(u32, v.base, 0)); 306 | } 307 | 308 | static inline u32 ot_dir_meta_get_gid(OtDirMetaRef v) 309 | { 310 | return be32_to_cpu(STRUCT_MEMBER(u32, v.base, 4)); 311 | } 312 | 313 | static inline u32 ot_dir_meta_get_mode(OtDirMetaRef v) 314 | { 315 | return be32_to_cpu(STRUCT_MEMBER(u32, v.base, 8)); 316 | } 317 | 318 | static inline bool ot_dir_meta_get_xattrs(OtDirMetaRef v, OtArrayofXattrRef *out) 319 | { 320 | size_t start = 12; 321 | size_t end = v.size; 322 | 323 | if (start > end || end > v.size) 324 | return false; 325 | 326 | return ot_arrayof_xattr_from_data(STRUCT_MEMBER_P(v.base, start), end - start, out); 327 | } 328 | 329 | static inline int ot_dir_meta_serialize(u32 uid, u32 gid, u32 mode, struct OtXAttrData *xattrs, size_t num_xattr, OtDirMetaRef *out) 330 | { 331 | size_t size, i; 332 | u8 *data; 333 | size_t array_body_size; 334 | size_t array_size; 335 | 336 | array_body_size = 0; 337 | array_size = 0; 338 | if (num_xattr > 0) { 339 | for (i = 0; i < num_xattr; i++) 340 | array_body_size += ot_xattr_compute_size(xattrs[i].name, xattrs[i].size); 341 | array_size = ot_variant_total_size(array_body_size, num_xattr); 342 | } 343 | 344 | size = 12 + array_size; 345 | 346 | data = kvmalloc(size, GFP_KERNEL); 347 | if (data == NULL) { 348 | return -ENOMEM; 349 | } 350 | 351 | STRUCT_MEMBER(u32, data, 0) = cpu_to_be32(uid); 352 | STRUCT_MEMBER(u32, data, 4) = cpu_to_be32(gid); 353 | STRUCT_MEMBER(u32, data, 8) = cpu_to_be32(mode); 354 | 355 | if (num_xattr > 0) { 356 | u32 array_offset_size = ot_ref_get_offset_size(array_size); 357 | u8 *xattrs_data_start = data + 12; 358 | u8 *xattrs_data; 359 | 360 | for (xattrs_data = xattrs_data_start, i = 0; i < num_xattr; i++) { 361 | xattrs_data += ot_xattr_serialize(xattrs_data, xattrs[i].name, xattrs[i].value, xattrs[i].size); 362 | ot_ref_write_unaligned_le(xattrs_data_start + array_body_size + i * array_offset_size, array_offset_size, xattrs_data - xattrs_data_start); 363 | } 364 | } 365 | 366 | out->base = data; 367 | out->size = size; 368 | return 0; 369 | } 370 | 371 | /************** OtFileHeader *******************/ 372 | #define OT_FILE_HEADER_TYPESTRING "(uuuusa(ayay))" 373 | 374 | static inline int ot_file_header_checksum(OtDirMetaRef dir_meta, const char *target_link, struct sha256_state *sha256_ctx) 375 | { 376 | size_t body_size, variant_size, target_link_len, size; 377 | u32 offset_size; 378 | OtArrayofXattrRef xattrs; 379 | u8 data[8 + 16]; 380 | 381 | if (!ot_dir_meta_get_xattrs(dir_meta, &xattrs)) { 382 | return -EIO; 383 | } 384 | 385 | target_link_len = strlen(target_link) + 1; 386 | body_size = 387 | 16 /* uid, gid, mode, pad */ + 388 | target_link_len + 389 | xattrs.size; 390 | variant_size = ot_variant_total_size(body_size, 1); 391 | offset_size = ot_ref_get_offset_size(variant_size); 392 | size = 8 /* lenprefix header */ + variant_size; 393 | 394 | /* length-prefix */ 395 | STRUCT_MEMBER(u32, data, 0) = cpu_to_be32(variant_size); 396 | STRUCT_MEMBER(u32, data, 4) = 0; /* padding */ 397 | 398 | /* variant */ 399 | STRUCT_MEMBER(u32, data + 8, 0) = cpu_to_be32(ot_dir_meta_get_uid (dir_meta)); 400 | STRUCT_MEMBER(u32, data + 8, 4) = cpu_to_be32(ot_dir_meta_get_gid (dir_meta)); 401 | STRUCT_MEMBER(u32, data + 8, 8) = cpu_to_be32(ot_dir_meta_get_mode (dir_meta)); 402 | STRUCT_MEMBER(u32, data + 8, 12) = 0; 403 | 404 | sha256_update(sha256_ctx, data, 8 + 16); 405 | 406 | sha256_update(sha256_ctx, target_link, target_link_len); 407 | sha256_update(sha256_ctx, xattrs.base, xattrs.size); 408 | 409 | ot_ref_write_unaligned_le(data, offset_size, 16 + target_link_len); 410 | sha256_update(sha256_ctx, data, offset_size); 411 | 412 | return 0; 413 | 414 | } 415 | 416 | /************** OtTreeFile *******************/ 417 | #define OT_TREE_FILE_TYPESTRING "(say)" 418 | 419 | typedef OtRef OtTreeFileRef; 420 | 421 | static inline bool ot_tree_file_from_data(const u8 * data, size_t size, OtTreeFileRef *out) 422 | { 423 | u32 offset_size = ot_ref_get_offset_size(size); 424 | 425 | if (size < offset_size * 1) 426 | return false; 427 | 428 | *out = (OtTreeFileRef) { data, size }; 429 | return true; 430 | } 431 | 432 | static inline const char *ot_tree_file_get_name(OtTreeFileRef v, size_t *len_out) 433 | { 434 | u32 offset_size = ot_ref_get_offset_size(v.size); 435 | const char *base = (const char *)v.base; 436 | size_t start = 0; 437 | size_t end = ot_ref_read_frame_offset(v, offset_size, 0); 438 | 439 | if (start > end || end > v.size || base[end-1] != 0) 440 | return NULL; 441 | 442 | if (len_out) 443 | *len_out = end - start - 1; /* Not including terminating zero */ 444 | 445 | return &STRUCT_MEMBER(const char, v.base, start); 446 | } 447 | 448 | static inline bool ot_tree_file_get_checksum(OtTreeFileRef v, OtChecksumRef *out) 449 | { 450 | u32 offset_size = ot_ref_get_offset_size(v.size); 451 | size_t last_end = ot_ref_read_frame_offset(v, offset_size, 0); 452 | size_t start = last_end; 453 | size_t end = v.size - offset_size * 1; 454 | 455 | if (start > end || end > v.size) 456 | return false; 457 | 458 | return ot_checksum_from_data(STRUCT_MEMBER_P(v.base, start), end - start, false, out); 459 | } 460 | 461 | /************** OtTreeDir *******************/ 462 | #define OT_TREE_DIR_TYPESTRING "(sayay)" 463 | 464 | typedef OtRef OtTreeDirRef; 465 | 466 | static inline bool ot_tree_dir_from_data(const u8 * data, size_t size, OtTreeDirRef *out) 467 | { 468 | u32 offset_size = ot_ref_get_offset_size(size); 469 | 470 | if (size < offset_size * 2) 471 | return false; 472 | 473 | *out = (OtTreeDirRef) { data, size }; 474 | return true; 475 | } 476 | 477 | static inline const char *ot_tree_dir_get_name(OtTreeDirRef v, size_t *len_out) 478 | { 479 | u32 offset_size = ot_ref_get_offset_size(v.size); 480 | const char *base = (const char *)v.base; 481 | size_t start = 0; 482 | size_t end = ot_ref_read_frame_offset(v, offset_size, 0); 483 | 484 | if (start > end || end > v.size || base[end-1] != 0) 485 | return NULL; 486 | 487 | if (len_out) 488 | *len_out = end - start - 1; /* Not including terminating zero */ 489 | 490 | return &STRUCT_MEMBER(const char, v.base, start); 491 | } 492 | 493 | static inline bool ot_tree_dir_get_tree_checksum(OtTreeDirRef v, OtChecksumRef *out) 494 | { 495 | u32 offset_size = ot_ref_get_offset_size (v.size); 496 | size_t start = ot_ref_read_frame_offset(v, offset_size, 0); 497 | size_t end = ot_ref_read_frame_offset(v, offset_size, 1); 498 | 499 | if (start > end || end > v.size) 500 | return false; 501 | 502 | return ot_checksum_from_data(STRUCT_MEMBER_P(v.base, start), end - start, false, out); 503 | } 504 | 505 | static inline bool ot_tree_dir_get_meta_checksum(OtTreeDirRef v, OtChecksumRef *out) 506 | { 507 | u32 offset_size = ot_ref_get_offset_size(v.size); 508 | size_t start = ot_ref_read_frame_offset(v, offset_size, 1); 509 | size_t end = v.size - offset_size * 2; 510 | 511 | if (start > end || end > v.size) 512 | return false; 513 | 514 | return ot_checksum_from_data(STRUCT_MEMBER_P(v.base, start), end - start, false, out); 515 | } 516 | 517 | /************** OtArrayofTreeFile *******************/ 518 | #define OT_ARRAYOF_TREE_FILE_TYPESTRING "a(say)" 519 | 520 | typedef OtRef OtArrayofTreeFileRef; 521 | 522 | static inline bool ot_arrayof_tree_file_from_data(const u8 * data, size_t size, OtArrayofTreeFileRef *out) 523 | { 524 | *out = (OtArrayofTreeFileRef) { data, size }; 525 | return true; 526 | } 527 | 528 | static inline size_t ot_arrayof_tree_file_get_length(OtArrayofTreeFileRef v) 529 | { 530 | return ot_arrayof_nonfixed_get_length(v); 531 | } 532 | 533 | static inline bool ot_arrayof_tree_file_get_at(OtArrayofTreeFileRef v, size_t index, OtTreeFileRef *out) 534 | { 535 | size_t start, end; 536 | 537 | if (!ot_arrayof_nonfixed_get_at(v, index, &start, &end)) 538 | return false; 539 | 540 | return ot_tree_file_from_data(((const u8 *)v.base) + start, end - start, out); 541 | } 542 | 543 | /************** OtArrayofTreeDir *******************/ 544 | #define OT_ARRAYOF_TREE_DIR_TYPESTRING "a(say)" 545 | 546 | typedef OtRef OtArrayofTreeDirRef; 547 | 548 | static inline bool ot_arrayof_tree_dir_from_data(const u8 * data, size_t size, OtArrayofTreeDirRef *out) 549 | { 550 | *out = (OtArrayofTreeDirRef) { data, size }; 551 | return true; 552 | } 553 | 554 | static inline size_t ot_arrayof_tree_dir_get_length(OtArrayofTreeDirRef v) 555 | { 556 | return ot_arrayof_nonfixed_get_length(v); 557 | } 558 | 559 | static inline bool ot_arrayof_tree_dir_get_at(OtArrayofTreeDirRef v, size_t index, OtTreeDirRef *out) 560 | { 561 | size_t start, end; 562 | 563 | if (!ot_arrayof_nonfixed_get_at(v, index, &start, &end)) 564 | return false; 565 | 566 | return ot_tree_dir_from_data(((const u8 *)v.base) + start, end - start, out); 567 | } 568 | 569 | /************** OtTreeMeta *******************/ 570 | #define OT_TREE_META_TYPESTRING "(a(say)a(sayay))" 571 | 572 | typedef OtRef OtTreeMetaRef; 573 | 574 | static inline bool ot_tree_meta_from_data(const u8 * data, size_t size, OtTreeMetaRef *out) 575 | { 576 | u32 offset_size = ot_ref_get_offset_size(size); 577 | 578 | if (size < offset_size * 1) 579 | return false; 580 | 581 | *out = (OtTreeMetaRef) { data, size }; 582 | return true; 583 | } 584 | 585 | static inline bool ot_tree_meta_get_files(OtTreeMetaRef v, OtArrayofTreeFileRef *out) 586 | { 587 | u32 offset_size = ot_ref_get_offset_size(v.size); 588 | size_t start = 0; 589 | size_t end = ot_ref_read_frame_offset(v, offset_size, 0); 590 | 591 | if (start > end || end > v.size) 592 | return false; 593 | 594 | return ot_arrayof_tree_file_from_data(STRUCT_MEMBER_P(v.base, start), end - start, out); 595 | } 596 | 597 | static inline bool ot_tree_meta_get_dirs(OtTreeMetaRef v, OtArrayofTreeDirRef *out) 598 | { 599 | u32 offset_size = ot_ref_get_offset_size(v.size); 600 | size_t start = ot_ref_read_frame_offset(v, offset_size, 0); 601 | size_t end = v.size - offset_size * 1; 602 | 603 | if (start > end || end > v.size) 604 | return false; 605 | 606 | return ot_arrayof_tree_dir_from_data(STRUCT_MEMBER_P(v.base, start), end - start, out); 607 | } 608 | -------------------------------------------------------------------------------- /ostreefs.cli: -------------------------------------------------------------------------------- 1 | (fsuse xattr ostreefs (system_u object_r fs_t ((s0) (s0)))) 2 | -------------------------------------------------------------------------------- /otfs.c: -------------------------------------------------------------------------------- 1 | /* 2 | * ostreefs 3 | * 4 | * Copyright (C) 2000 Linus Torvalds. 5 | * 2000 Transmeta Corp. 6 | * Copyright (C) 2021 Giuseppe Scrivano 7 | * Copyright (C) 2022 Alexander Larsson 8 | * 9 | * This file is released under the GPL. 10 | */ 11 | 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | 27 | #include "ostree.h" 28 | 29 | #define OTFS_OPEN_FLAGS (O_NOATIME | FMODE_NONOTIFY) 30 | 31 | MODULE_LICENSE("GPL"); 32 | MODULE_AUTHOR("Alexander Larsson "); 33 | 34 | #define OTFS_MAGIC 0x055245638 35 | 36 | enum ot_repo_mode { 37 | ot_repo_mode_bare, 38 | ot_repo_mode_bare_user, 39 | }; 40 | 41 | static const struct constant_table ostreefs_param_repomode[] = { 42 | { "bare", ot_repo_mode_bare }, 43 | { "bare-user", ot_repo_mode_bare_user }, 44 | {} 45 | }; 46 | 47 | enum ot_file_verify { 48 | ot_file_verify_none, 49 | ot_file_verify_once, 50 | ot_file_verify_full, 51 | }; 52 | 53 | static const struct constant_table ostreefs_param_fileverify[] = { 54 | { "none", ot_file_verify_none }, 55 | { "once", ot_file_verify_once }, 56 | { "full", ot_file_verify_full }, 57 | {} 58 | }; 59 | 60 | struct otfs_info { 61 | char *object_dir_path; 62 | struct path object_dir; 63 | char *commit_id; 64 | enum ot_repo_mode repo_mode; 65 | enum ot_file_verify file_verify; 66 | 67 | atomic64_t inode_counter; 68 | }; 69 | 70 | struct otfs_inode { 71 | struct inode vfs_inode; /* must be first for clear in otfs_alloc_inode to work */ 72 | OtDirMetaRef dirmeta; 73 | union { 74 | struct { 75 | struct path path; 76 | } nondir; 77 | struct { 78 | OtTreeMetaRef dirtree; 79 | u64 inode_base; 80 | } dir; 81 | }; 82 | }; 83 | 84 | static inline struct otfs_inode *OTFS_I(struct inode *inode) 85 | { 86 | return container_of(inode, struct otfs_inode, vfs_inode); 87 | } 88 | 89 | static const struct super_operations otfs_ops; 90 | static const struct file_operations otfs_file_operations; 91 | static const struct file_operations otfs_dir_operations; 92 | static const struct inode_operations otfs_dir_inode_operations; 93 | static const struct inode_operations otfs_file_inode_operations; 94 | static const struct address_space_operations otfs_aops = { 95 | .direct_IO = noop_direct_IO, 96 | }; 97 | 98 | static void ot_ref_kvfree(OtRef ref) 99 | { 100 | if (ref.base) { 101 | kvfree(ref.base); 102 | ref.base = NULL; 103 | } 104 | } 105 | 106 | static int otfs_show_options(struct seq_file *m, struct dentry *root) 107 | { 108 | struct otfs_info *fsi = root->d_sb->s_fs_info; 109 | 110 | seq_printf(m, ",object_dir=%s", fsi->object_dir_path); 111 | seq_printf(m, ",commit=%s", fsi->commit_id); 112 | if (fsi->repo_mode == ot_repo_mode_bare_user) 113 | seq_printf(m, ",repomode=bare-user"); 114 | if (fsi->file_verify == ot_file_verify_full) 115 | seq_printf(m, ",fileverify=full"); 116 | else if (fsi->file_verify == ot_file_verify_once) 117 | seq_printf(m, ",fileverify=once"); 118 | else if (fsi->file_verify == ot_file_verify_none) 119 | seq_printf(m, ",fileverify=none"); 120 | return 0; 121 | } 122 | 123 | static int otfs_statfs(struct dentry *dentry, struct kstatfs *buf) 124 | { 125 | struct otfs_info *fsi = dentry->d_sb->s_fs_info; 126 | int err; 127 | 128 | err = vfs_statfs(&(fsi->object_dir), buf); 129 | if (!err) { 130 | buf->f_namelen = NAME_MAX; 131 | buf->f_type = OTFS_MAGIC; 132 | } 133 | 134 | return err; 135 | } 136 | 137 | static struct kmem_cache *otfs_inode_cachep; 138 | 139 | static struct inode *otfs_alloc_inode(struct super_block *sb) 140 | { 141 | #if (LINUX_VERSION_CODE < KERNEL_VERSION(5, 18, 0)) 142 | struct otfs_inode *oti = kmem_cache_alloc(otfs_inode_cachep, GFP_KERNEL); 143 | #else 144 | struct otfs_inode *oti = alloc_inode_sb(sb, otfs_inode_cachep, GFP_KERNEL); 145 | #endif 146 | 147 | if (!oti) 148 | return NULL; 149 | 150 | memset((u8*)oti + sizeof(struct inode), 0, sizeof(struct otfs_inode) - sizeof(struct inode)); 151 | 152 | return &oti->vfs_inode; 153 | } 154 | 155 | static void otfs_destroy_inode(struct inode *inode) 156 | { 157 | struct otfs_inode *oti = OTFS_I(inode); 158 | 159 | if (S_ISDIR(inode->i_mode)) { 160 | ot_ref_kvfree(oti->dir.dirtree); 161 | } else { 162 | if (oti->nondir.path.dentry) 163 | path_put(&oti->nondir.path); 164 | } 165 | 166 | if (S_ISLNK(inode->i_mode) && inode->i_link) 167 | kfree(inode->i_link); 168 | 169 | ot_ref_kvfree(oti->dirmeta); 170 | } 171 | 172 | static void otfs_free_inode(struct inode *inode) 173 | { 174 | struct otfs_inode *oti = OTFS_I(inode); 175 | 176 | kmem_cache_free(otfs_inode_cachep, oti); 177 | } 178 | 179 | static void otfs_put_super(struct super_block *sb) 180 | { 181 | struct otfs_info *fsi = sb->s_fs_info; 182 | 183 | if (fsi->object_dir_path) 184 | kfree(fsi->object_dir_path); 185 | if (fsi->object_dir.dentry) 186 | path_put(&fsi->object_dir); 187 | if (fsi->commit_id) 188 | kfree(fsi->commit_id); 189 | 190 | kfree(fsi); 191 | sb->s_fs_info = NULL; 192 | } 193 | 194 | static const struct super_operations otfs_ops = { 195 | .put_super = otfs_put_super, 196 | .statfs = otfs_statfs, 197 | .drop_inode = generic_delete_inode, 198 | .show_options = otfs_show_options, 199 | .alloc_inode = otfs_alloc_inode, 200 | .destroy_inode = otfs_destroy_inode, 201 | .free_inode = otfs_free_inode, 202 | }; 203 | 204 | enum otfs_param { 205 | Opt_object_dir, 206 | Opt_commit, 207 | Opt_repomode, 208 | Opt_fileverify, 209 | }; 210 | 211 | const struct fs_parameter_spec otfs_parameters[] = { 212 | fsparam_string("objectdir", Opt_object_dir), 213 | fsparam_string("commit", Opt_commit), 214 | fsparam_enum("repomode", Opt_repomode, ostreefs_param_repomode), 215 | fsparam_enum("fileverify", Opt_fileverify, ostreefs_param_fileverify), 216 | {} 217 | }; 218 | 219 | static int otfs_parse_param(struct fs_context *fc, struct fs_parameter *param) 220 | { 221 | struct fs_parse_result result; 222 | struct otfs_info *fsi = fc->s_fs_info; 223 | int opt; 224 | 225 | opt = fs_parse(fc, otfs_parameters, param, &result); 226 | if (opt < 0) 227 | return opt; 228 | 229 | switch (opt) { 230 | case Opt_object_dir: 231 | kfree(fsi->object_dir_path); 232 | /* Take ownership. */ 233 | fsi->object_dir_path = param->string; 234 | param->string = NULL; 235 | break; 236 | case Opt_commit: 237 | kfree(fsi->commit_id); 238 | /* Take ownership. */ 239 | fsi->commit_id = param->string; 240 | param->string = NULL; 241 | break; 242 | case Opt_repomode: 243 | fsi->repo_mode = result.uint_32; 244 | break; 245 | case Opt_fileverify: 246 | fsi->file_verify = result.uint_32; 247 | break; 248 | } 249 | 250 | return 0; 251 | } 252 | 253 | static struct file *otfs_open_object (struct path *object_dir, const char *object_id, const char *type, int flags) 254 | { 255 | char relpath[OSTREE_SHA256_STRING_LEN + 12]; /* Fits slash and longest extenssion (.dirtree) */ 256 | 257 | if (strlen(object_id) != OSTREE_SHA256_STRING_LEN) 258 | return ERR_PTR(-ENOENT); 259 | 260 | relpath[0] = object_id[0]; 261 | relpath[1] = object_id[1]; 262 | relpath[2] = '/'; 263 | relpath[3] = 0; 264 | strcat (relpath, object_id + 2); 265 | strcat (relpath, type); 266 | 267 | return file_open_root(object_dir, relpath, flags, 0); 268 | } 269 | 270 | static int otfs_read_object (struct path *object_dir, const char *object_id, const char *type, 271 | u8 **data_out) 272 | { 273 | struct file *f = NULL; 274 | void *buf = NULL; 275 | size_t file_size; 276 | int ret; 277 | int read_bytes; 278 | uint8_t digest[SHA256_DIGEST_SIZE]; 279 | char digest_string[OSTREE_SHA256_STRING_LEN + 1]; 280 | 281 | f = otfs_open_object(object_dir, object_id, type, O_RDONLY | OTFS_OPEN_FLAGS); 282 | if (IS_ERR(f)) 283 | return PTR_ERR(f); 284 | 285 | read_bytes = kernel_read_file(f, 0, &buf, INT_MAX, &file_size, READING_UNKNOWN); 286 | if (read_bytes < 0) { 287 | ret = read_bytes; 288 | goto fail; 289 | } 290 | 291 | sha256(buf, read_bytes, digest); 292 | sha256_digest_to_string (digest, digest_string); 293 | 294 | if (strcmp(digest_string, object_id) != 0) { 295 | printk(KERN_ERR "Invalid digest %s for ostree object %s of type %s\n", digest_string, object_id, type); 296 | ret = -EIO; 297 | goto fail; 298 | } 299 | 300 | fput(f); 301 | 302 | *data_out = buf; 303 | return read_bytes; 304 | 305 | fail: 306 | if (buf) 307 | vfree(buf); 308 | 309 | if (f) 310 | fput(f); 311 | return ret; 312 | } 313 | 314 | static int otfs_read_objectv (struct path *object_dir, OtChecksumRef checksum, const char *type, 315 | u8 **data_out) 316 | { 317 | char object_id[OSTREE_SHA256_STRING_LEN+1]; 318 | ot_checksum_to_string (checksum, object_id); 319 | 320 | return otfs_read_object (object_dir, object_id, type, data_out); 321 | } 322 | 323 | static int otfs_read_dirtree_object (struct path *object_dir, OtChecksumRef commit, 324 | OtTreeMetaRef *treemetav_out) 325 | { 326 | OtTreeMetaRef treemetav; 327 | int res; 328 | u8 *data; 329 | 330 | res = otfs_read_objectv (object_dir, commit, ".dirtree", &data); 331 | if (res < 0) 332 | return res; 333 | 334 | if (!ot_tree_meta_from_data (data, res, &treemetav)) { 335 | vfree(data); 336 | return -EIO; 337 | } 338 | 339 | *treemetav_out = treemetav; 340 | return 0; 341 | } 342 | 343 | static int otfs_read_dirmeta_object (struct path *object_dir, OtChecksumRef commit, 344 | OtDirMetaRef *dirmetav_out) 345 | { 346 | OtDirMetaRef dirmetav; 347 | int res; 348 | u8 *data; 349 | 350 | res = otfs_read_objectv (object_dir, commit, ".dirmeta", &data); 351 | if (res < 0) 352 | return res; 353 | 354 | if (!ot_dir_meta_from_data (data, res, &dirmetav)) { 355 | vfree(data); 356 | return -EIO; 357 | } 358 | 359 | *dirmetav_out = dirmetav; 360 | return 0; 361 | } 362 | 363 | static ssize_t listxattr(struct dentry *dentry, char **bufp) 364 | { 365 | ssize_t len; 366 | ssize_t ret; 367 | char *buf; 368 | struct inode *inode; 369 | 370 | inode = d_inode(dentry); 371 | len = 0; 372 | 373 | inode_lock_shared(inode); 374 | 375 | len = vfs_listxattr(dentry, NULL, 0); 376 | if (len <= 0) { 377 | ret = len; 378 | goto out; 379 | } 380 | 381 | if (len > XATTR_LIST_MAX) { 382 | ret = -E2BIG; 383 | goto out; 384 | } 385 | 386 | /* We're holding i_rwsem - use GFP_NOFS. */ 387 | buf = kvmalloc(len, GFP_KERNEL | GFP_NOFS); 388 | if (buf == NULL) { 389 | ret = -ENOMEM; 390 | goto out; 391 | } 392 | 393 | len = vfs_listxattr(dentry, buf, len); 394 | if (len <= 0) { 395 | kvfree(buf); 396 | ret = len; 397 | goto out; 398 | } 399 | 400 | *bufp = buf; 401 | ret = len; 402 | 403 | out: 404 | inode_unlock_shared(inode); 405 | return ret; 406 | } 407 | 408 | static int 409 | xattr_data_cmp(const struct OtXAttrData *a, const struct OtXAttrData *b) 410 | { 411 | return strcmp(a->name, b->name); 412 | } 413 | 414 | static void 415 | xattrs_data_free(struct OtXAttrData *data, size_t num_xattr, char *names) 416 | { 417 | size_t i; 418 | if (data) { 419 | for (i = 0; i < num_xattr; i++) 420 | kvfree(data[i].value); 421 | kvfree(data); 422 | } 423 | if (names) 424 | kvfree(names); 425 | } 426 | 427 | static ssize_t read_xattr(struct dentry *dentry, const char *name, u8 **data_out) { 428 | ssize_t size, value_size; 429 | u8 *value; 430 | 431 | size = vfs_getxattr(&init_user_ns, dentry, name, NULL, 0); 432 | if (size < 0) 433 | return size; 434 | 435 | value_size = size; 436 | value = kvmalloc(value_size, GFP_KERNEL); 437 | 438 | size = vfs_getxattr(&init_user_ns, dentry, name, value, value_size); 439 | if (size < 0) { 440 | kvfree(value); 441 | return size; 442 | } 443 | 444 | *data_out = value; 445 | 446 | return size; 447 | } 448 | 449 | static ssize_t get_xattrs(struct dentry *dentry, char **names_out, struct OtXAttrData **data_out) 450 | { 451 | char *names = NULL; 452 | const char *name; 453 | ssize_t names_len; 454 | ssize_t remaining; 455 | ssize_t ret; 456 | size_t slen; 457 | ssize_t size; 458 | u8 *value = NULL; 459 | size_t num_xattrs, i = 0; 460 | struct OtXAttrData *data = NULL; 461 | 462 | names_len = listxattr(dentry, &names); 463 | if (names_len < 0) 464 | return (int)names_len; 465 | 466 | if (names_len == 0) { 467 | *names_out = NULL; 468 | *data_out = NULL; 469 | return 0; 470 | } 471 | 472 | num_xattrs = 0; 473 | for (name = names, remaining = names_len; remaining; name += slen) { 474 | slen = strnlen(name, remaining) + 1; 475 | /* underlying fs providing us with an broken xattr list? */ 476 | if (WARN_ON(slen > remaining)) { 477 | ret = -EIO; 478 | goto fail; 479 | } 480 | num_xattrs++; 481 | remaining -= slen; 482 | } 483 | 484 | data = kvmalloc_array(num_xattrs, sizeof(struct OtXAttrData), GFP_KERNEL); 485 | if (!data) { 486 | ret = -ENOMEM; 487 | goto fail; 488 | } 489 | 490 | for (name = names, remaining = names_len; remaining; name += slen) { 491 | slen = strnlen(name, remaining) + 1; 492 | remaining -= slen; 493 | 494 | size = read_xattr(dentry, name, &value); 495 | if (size < 0) { 496 | ret = size; 497 | goto fail; 498 | } 499 | 500 | data[i].name = name; 501 | data[i].value = value; 502 | data[i].size = size; 503 | i++; 504 | } 505 | 506 | sort(data, num_xattrs, sizeof(struct OtXAttrData), (cmp_func_t)xattr_data_cmp, NULL); 507 | 508 | *names_out = names; 509 | *data_out = data; 510 | return num_xattrs; 511 | 512 | fail: 513 | while (i > 0) { 514 | kvfree(data[i].value); 515 | i--; 516 | } 517 | 518 | if (data) 519 | kvfree(data); 520 | if (names) 521 | kvfree(names); 522 | return ret; 523 | } 524 | 525 | static struct inode *otfs_new_inode(struct super_block *sb, 526 | const struct inode *dir, 527 | ino_t ino_num, 528 | mode_t mode) 529 | { 530 | struct inode *inode; 531 | struct timespec64 ostree_time = {0, 0}; 532 | 533 | inode = new_inode(sb); 534 | if (inode == NULL) 535 | return ERR_PTR(-ENOMEM); 536 | 537 | inode->i_ino = ino_num; 538 | 539 | inode_init_owner(&init_user_ns, inode, dir, mode); 540 | inode->i_mapping->a_ops = &otfs_aops; 541 | mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER); 542 | mapping_set_unevictable(inode->i_mapping); 543 | 544 | set_nlink(inode, 1); 545 | inode->i_mode = mode; 546 | inode->i_rdev = 0; 547 | inode->i_atime = ostree_time; 548 | inode->i_mtime = ostree_time; 549 | inode->i_ctime = ostree_time; 550 | 551 | return inode; 552 | } 553 | 554 | static int reconstruct_filemeta(struct otfs_info *fsi, struct file *object_file, OtDirMetaRef *filemeta_out, loff_t *size_out, bool *is_verity) 555 | { 556 | struct kstat stat; 557 | char *xattr_names = NULL; 558 | OtDirMetaRef filemeta = { NULL, 0}; 559 | int err; 560 | 561 | err = vfs_getattr(&object_file->f_path, &stat, STATX_BASIC_STATS, AT_STATX_SYNC_AS_STAT); 562 | if (err < 0) 563 | return err; 564 | 565 | if (fsi->repo_mode == ot_repo_mode_bare) { 566 | struct OtXAttrData *xattr_data = NULL; 567 | ssize_t num_xattr = 0; 568 | 569 | if (!S_ISLNK(stat.mode) && !S_ISREG(stat.mode)) 570 | return -EIO; 571 | 572 | num_xattr = get_xattrs(object_file->f_path.dentry, &xattr_names, &xattr_data); 573 | if (num_xattr < 0) 574 | return num_xattr; 575 | 576 | err = ot_dir_meta_serialize(from_kuid(&init_user_ns, stat.uid), 577 | from_kgid(&init_user_ns, stat.gid), 578 | stat.mode, 579 | xattr_data, num_xattr, &filemeta); 580 | xattrs_data_free(xattr_data, num_xattr, xattr_names); 581 | if (err < 0) 582 | return err; 583 | } else { /* ot_repo_mode_bare_user */ 584 | u8 *data; 585 | ssize_t data_size; 586 | 587 | if (!S_ISREG(stat.mode)) 588 | return -EIO; 589 | 590 | data_size = read_xattr(object_file->f_path.dentry, "user.ostreemeta", &data); 591 | if (data_size < 0) 592 | return data_size; 593 | 594 | if (!ot_dir_meta_from_data(data, data_size, &filemeta)) { 595 | kvfree(data); 596 | return -EIO; 597 | } 598 | } 599 | 600 | *size_out = stat.size; 601 | *filemeta_out = filemeta; 602 | *is_verity = (stat.attributes & STATX_ATTR_VERITY) != 0; 603 | return 0; 604 | } 605 | 606 | #define CHECKSUM_BUF_SIZE (64*1024) 607 | static int file_content_checksum(struct path *path, struct sha256_state *sha256_ctx) { 608 | struct file *file = NULL; 609 | u8 *buffer = NULL; 610 | loff_t pos; 611 | int ret; 612 | 613 | buffer = vmalloc(CHECKSUM_BUF_SIZE); 614 | if (buffer == NULL) 615 | return -ENOMEM; 616 | 617 | file = file_open_root(path, "", O_RDONLY|OTFS_OPEN_FLAGS, 0); 618 | if (IS_ERR(file)) { 619 | ret = PTR_ERR(file); 620 | goto fail; 621 | } 622 | 623 | pos = 0; 624 | while (true) { 625 | ssize_t bytes = kernel_read(file, buffer, CHECKSUM_BUF_SIZE, &pos); 626 | if (bytes < 0) { 627 | ret = bytes; 628 | goto fail; 629 | } 630 | 631 | if (bytes == 0) 632 | break; 633 | 634 | sha256_update(sha256_ctx, buffer, bytes); 635 | } 636 | 637 | vfree(buffer); 638 | fput(file); 639 | return 0; 640 | 641 | fail: 642 | if (file) 643 | fput(file); 644 | return ret; 645 | } 646 | 647 | static struct inode *otfs_make_file_inode(struct super_block *sb, 648 | const struct inode *dir, 649 | ino_t ino_num, 650 | OtChecksumRef file_csum) 651 | { 652 | struct otfs_info *fsi = sb->s_fs_info; 653 | struct otfs_inode *oti = NULL; 654 | struct file *object_file = NULL; 655 | int err; 656 | int ret; 657 | struct inode *inode; 658 | char *target_link = NULL; 659 | DEFINE_DELAYED_CALL(done); 660 | uint8_t digest[SHA256_DIGEST_SIZE]; 661 | char digest_string[OSTREE_SHA256_STRING_LEN + 1]; 662 | char object_id[OSTREE_SHA256_STRING_LEN+1]; 663 | OtDirMetaRef filemeta = { NULL, 0}; 664 | loff_t file_size; 665 | u32 mode; 666 | bool is_fsverity; 667 | bool do_verify; 668 | 669 | ot_checksum_to_string (file_csum, object_id); 670 | 671 | object_file = otfs_open_object (&fsi->object_dir, object_id, ".file", O_PATH|O_NOFOLLOW|OTFS_OPEN_FLAGS); 672 | if (IS_ERR(object_file)) 673 | return ERR_CAST(object_file); 674 | 675 | err = reconstruct_filemeta(fsi, object_file, &filemeta, &file_size, &is_fsverity); 676 | if (err < 0) { 677 | ret = err; 678 | goto fail; 679 | } 680 | 681 | mode = ot_dir_meta_get_mode(filemeta); 682 | 683 | /* We support only regular and symlink file objects */ 684 | if (!S_ISLNK(mode) && !S_ISREG(mode)) { 685 | ret = -EIO; 686 | goto fail; 687 | } 688 | 689 | if (S_ISLNK(mode)) { 690 | if (fsi->repo_mode == ot_repo_mode_bare) { 691 | const char *link; 692 | link = vfs_get_link(object_file->f_path.dentry, &done); 693 | if (IS_ERR(link)) { 694 | ret = PTR_ERR(link); 695 | goto fail; 696 | } 697 | 698 | target_link = kstrdup(link, GFP_KERNEL); 699 | do_delayed_call(&done); 700 | } else { /* ot_repo_mode_bare_user */ 701 | void *buf = NULL; 702 | size_t file_size; 703 | int read_bytes; 704 | struct file *f; 705 | 706 | f = file_open_root(&(object_file->f_path), "", O_RDONLY|OTFS_OPEN_FLAGS, 0); 707 | if (IS_ERR(f)) { 708 | ret = PTR_ERR(f); 709 | goto fail; 710 | } 711 | 712 | read_bytes = kernel_read_file(f, 0, &buf, PATH_MAX, &file_size, READING_UNKNOWN); 713 | fput(f); 714 | if (read_bytes < 0) { 715 | ret = read_bytes; 716 | goto fail; 717 | } 718 | if (file_size != read_bytes) { 719 | vfree(buf); 720 | ret = -EIO; 721 | goto fail; 722 | } 723 | target_link = kstrdup(buf, GFP_KERNEL); 724 | vfree(buf); 725 | } 726 | } 727 | 728 | do_verify = true; 729 | 730 | if (S_ISREG(mode)) { 731 | switch (fsi->file_verify) { 732 | default: 733 | case ot_file_verify_full: 734 | /* Continous verification. Actually we only verify on first open, 735 | but then rely on fs-verity to make the inode immutable */ 736 | if (!is_fsverity) { 737 | printk(KERN_ERR "Full file verification requested, but file object %s doesn't have fs-verify enabled\n", object_id); 738 | ret = -EIO; 739 | goto fail; 740 | } 741 | do_verify = true; 742 | break; 743 | case ot_file_verify_once: 744 | /* Verify only on first open */ 745 | do_verify = true; 746 | break; 747 | case ot_file_verify_none: 748 | do_verify = false; 749 | break; 750 | } 751 | } 752 | 753 | if (do_verify) { 754 | struct sha256_state sha256_ctx; 755 | 756 | /* Compute file header for checksum validation */ 757 | sha256_init(&sha256_ctx); 758 | 759 | err = ot_file_header_checksum(filemeta, target_link ? target_link : "", &sha256_ctx); 760 | if (err < 0) { 761 | ret = err; 762 | goto fail; 763 | } 764 | 765 | if (S_ISREG(mode)) { 766 | err = file_content_checksum(&object_file->f_path, &sha256_ctx); 767 | if (err < 0) { 768 | ret = err; 769 | goto fail; 770 | } 771 | } 772 | 773 | sha256_final(&sha256_ctx, digest); 774 | 775 | sha256_digest_to_string (digest, digest_string); 776 | 777 | if (strcmp(digest_string, object_id) != 0) { 778 | printk(KERN_ERR "Corrupted file object: checksum expected='%s', actual='%s'\n", object_id, digest_string); 779 | ret = EIO; 780 | goto fail; 781 | } 782 | } 783 | 784 | inode = otfs_new_inode(sb, dir, ino_num, mode); 785 | if (IS_ERR(inode)) { 786 | ret = PTR_ERR(inode); 787 | goto fail; 788 | } 789 | 790 | oti = OTFS_I(inode); 791 | 792 | oti->nondir.path = object_file->f_path; 793 | path_get(&oti->nondir.path); 794 | oti->dirmeta = filemeta; 795 | 796 | inode->i_uid = make_kuid(current_user_ns(), ot_dir_meta_get_uid(filemeta)); 797 | inode->i_gid = make_kgid(current_user_ns(), ot_dir_meta_get_gid(filemeta)); 798 | 799 | if (S_ISLNK(mode)) { 800 | inode->i_link = target_link; /* transfer ownership */ 801 | inode->i_op = &simple_symlink_inode_operations; 802 | inode->i_fop = &otfs_file_operations; 803 | } else { 804 | inode->i_size = file_size; 805 | inode->i_op = &otfs_file_inode_operations; 806 | inode->i_fop = &otfs_file_operations; 807 | } 808 | 809 | return inode; 810 | 811 | fail: 812 | ot_ref_kvfree(filemeta); 813 | 814 | if (object_file) 815 | fput(object_file); 816 | if (target_link) 817 | kfree(target_link); 818 | 819 | return ERR_PTR(ret); 820 | } 821 | 822 | static struct inode *otfs_make_dir_inode(struct super_block *sb, 823 | const struct inode *dir, 824 | ino_t ino_num, 825 | OtChecksumRef dirtree_csum, 826 | OtChecksumRef dirmeta_csum) 827 | { 828 | struct otfs_info *fsi = sb->s_fs_info; 829 | struct inode *inode; 830 | struct otfs_inode *oti = NULL; 831 | int ret; 832 | OtTreeMetaRef dirtree = { NULL, 0 }; 833 | OtDirMetaRef dirmeta = { NULL, 0 }; 834 | u32 uid, gid, mode; 835 | int res; 836 | u64 n_inos; 837 | OtArrayofTreeFileRef files; 838 | OtArrayofTreeDirRef dirs; 839 | 840 | res = otfs_read_dirmeta_object (&fsi->object_dir, dirmeta_csum, &dirmeta); 841 | if (res < 0) { 842 | ret = res; 843 | goto fail; 844 | } 845 | 846 | uid = ot_dir_meta_get_uid(dirmeta); 847 | gid = ot_dir_meta_get_gid(dirmeta); 848 | mode = ot_dir_meta_get_mode(dirmeta); 849 | 850 | /* Ensure its actually a directory */ 851 | if ((mode & S_IFMT) != S_IFDIR) { 852 | ret = -EIO; 853 | goto fail; 854 | } 855 | 856 | /* TODO: Should we validate mode mode? */ 857 | 858 | res = otfs_read_dirtree_object (&fsi->object_dir, dirtree_csum, 859 | &dirtree); 860 | if (res < 0) { 861 | ret = res; 862 | goto fail; 863 | } 864 | 865 | inode = otfs_new_inode(sb, dir, ino_num, mode); 866 | if (IS_ERR(inode)) { 867 | ret = PTR_ERR(inode); 868 | goto fail; 869 | } 870 | 871 | inode->i_uid = make_kuid(current_user_ns(), uid); 872 | inode->i_gid = make_kgid(current_user_ns(), gid); 873 | 874 | inode->i_op = &otfs_dir_inode_operations; 875 | inode->i_fop = &otfs_dir_operations; 876 | inode->i_size = 4096; 877 | 878 | oti = OTFS_I(inode); 879 | 880 | /* Allocate inodes for all children */ 881 | n_inos = 0; 882 | if (ot_tree_meta_get_files (dirtree, &files)) 883 | n_inos += ot_arrayof_tree_file_get_length (files); 884 | if (ot_tree_meta_get_dirs (dirtree, &dirs)) 885 | n_inos += ot_arrayof_tree_dir_get_length (dirs); 886 | oti->dir.inode_base = atomic64_add_return (n_inos, &fsi->inode_counter) - n_inos; 887 | 888 | oti->dir.dirtree = dirtree; /* Transfer ownership */ 889 | oti->dirmeta = dirmeta; /* Transfer ownership */ 890 | 891 | return inode; 892 | fail: 893 | ot_ref_kvfree(dirtree); 894 | ot_ref_kvfree(dirmeta); 895 | 896 | return ERR_PTR(ret); 897 | } 898 | 899 | static int otfs_getxattr(const struct xattr_handler *handler, 900 | struct dentry *unused2, struct inode *inode, 901 | const char *name, void *value, size_t size) 902 | { 903 | struct otfs_inode *oti = OTFS_I(inode); 904 | size_t name_len = strlen(name) + 1; /* Include the terminating zero */ 905 | size_t i; 906 | OtArrayofXattrRef xattrs; 907 | size_t n_xattrs = 0; 908 | 909 | if (ot_dir_meta_get_xattrs(oti->dirmeta, &xattrs)) 910 | n_xattrs = ot_arrayof_xattr_get_length(xattrs); 911 | 912 | for (i = 0; i < n_xattrs; i++) { 913 | OtXattrRef xattr; 914 | if (ot_arrayof_xattr_get_at(xattrs, i, &xattr)) { 915 | size_t this_name_len, this_value_len; 916 | const u8 *this_name, *this_value; 917 | 918 | this_name = ot_xattr_get_name (xattr, &this_name_len); 919 | if (name == NULL || name_len != this_name_len || 920 | memcmp(this_name, name, name_len) != 0) 921 | continue; 922 | 923 | this_value = ot_xattr_get_value (xattr, &this_value_len); 924 | if (this_value == NULL) 925 | continue; 926 | 927 | if (size == 0) 928 | return this_value_len; 929 | if (size < this_value_len) 930 | return -E2BIG; 931 | memcpy(value, this_value, this_value_len); 932 | return this_value_len; 933 | } 934 | } 935 | 936 | return -ENODATA; 937 | } 938 | 939 | static const struct xattr_handler otfs_xattr_handler = { 940 | .prefix = "", /* catch all */ 941 | .get = otfs_getxattr, 942 | }; 943 | 944 | static const struct xattr_handler *otfs_xattr_handlers[] = { 945 | &otfs_xattr_handler, 946 | NULL, 947 | }; 948 | 949 | static int otfs_dir_release(struct inode *inode, struct file *file) 950 | { 951 | return 0; 952 | } 953 | 954 | static int otfs_dir_open(struct inode *inode, struct file *file) 955 | { 956 | return 0; 957 | } 958 | 959 | struct dentry *otfs_lookup(struct inode *dir, struct dentry *dentry, 960 | unsigned int flags) 961 | { 962 | struct otfs_inode *dir_oti; 963 | struct otfs_info *fsi; 964 | OtArrayofTreeFileRef files; 965 | OtArrayofTreeDirRef dirs; 966 | size_t i, n_files, n_dirs; 967 | struct inode *inode; 968 | 969 | fsi = dir->i_sb->s_fs_info; 970 | dir_oti = OTFS_I(dir); 971 | 972 | if (!ot_tree_meta_get_files (dir_oti->dir.dirtree, &files)) 973 | return ERR_PTR(-EIO); 974 | n_files = ot_arrayof_tree_file_get_length (files); 975 | 976 | if (!ot_tree_meta_get_dirs (dir_oti->dir.dirtree, &dirs)) 977 | return ERR_PTR(-EIO); 978 | n_dirs = ot_arrayof_tree_dir_get_length (dirs); 979 | 980 | for (i = 0; i < n_files; i++) { 981 | OtTreeFileRef treefile; 982 | size_t name_len; 983 | const char *name; 984 | 985 | if (!ot_arrayof_tree_file_get_at (files, i, &treefile)) 986 | continue; 987 | 988 | name = ot_tree_file_get_name (treefile, &name_len); 989 | if (name == NULL) 990 | continue; 991 | 992 | if (dentry->d_name.len == name_len && 993 | memcmp(dentry->d_name.name,name, name_len) == 0) { 994 | OtChecksumRef file_csum; 995 | if (!ot_tree_file_get_checksum (treefile, &file_csum)) 996 | return ERR_PTR(-EIO); 997 | 998 | inode = otfs_make_file_inode(dir->i_sb, dir, dir_oti->dir.inode_base + i, 999 | file_csum); 1000 | if (IS_ERR(inode)) 1001 | return ERR_CAST(inode); 1002 | 1003 | return d_splice_alias(inode, dentry); 1004 | } 1005 | } 1006 | 1007 | for (i = 0; i < n_dirs; i++) { 1008 | OtTreeDirRef treedir; 1009 | size_t name_len; 1010 | const char *name; 1011 | 1012 | if (!ot_arrayof_tree_dir_get_at (dirs, i, &treedir)) 1013 | continue; 1014 | 1015 | name = ot_tree_dir_get_name (treedir, &name_len); 1016 | if (name == NULL) 1017 | continue; 1018 | 1019 | if (dentry->d_name.len == name_len && 1020 | memcmp(dentry->d_name.name,name, name_len) == 0) { 1021 | OtChecksumRef tree_csum, meta_csum; 1022 | if (!ot_tree_dir_get_tree_checksum (treedir, &tree_csum) || 1023 | !ot_tree_dir_get_meta_checksum (treedir, &meta_csum)) 1024 | return ERR_PTR(-EIO); 1025 | 1026 | inode = otfs_make_dir_inode(dir->i_sb, dir, dir_oti->dir.inode_base + n_files + i, 1027 | tree_csum, meta_csum); 1028 | if (IS_ERR(inode)) 1029 | return ERR_CAST(inode); 1030 | 1031 | return d_splice_alias(inode, dentry); 1032 | } 1033 | } 1034 | 1035 | d_add(dentry, NULL); 1036 | return NULL; 1037 | } 1038 | 1039 | static int otfs_iterate(struct file *file, struct dir_context *ctx) 1040 | { 1041 | struct otfs_inode *oti; 1042 | struct otfs_info *fsi; 1043 | bool done = false; 1044 | size_t pos; 1045 | OtArrayofTreeFileRef files; 1046 | OtArrayofTreeDirRef dirs; 1047 | size_t i, n_files, n_dirs; 1048 | 1049 | fsi = file->f_inode->i_sb->s_fs_info; 1050 | oti = OTFS_I(file->f_inode); 1051 | 1052 | if (!ot_tree_meta_get_files (oti->dir.dirtree, &files)) 1053 | return -EIO; 1054 | n_files = ot_arrayof_tree_file_get_length (files); 1055 | 1056 | if (!ot_tree_meta_get_dirs (oti->dir.dirtree, &dirs)) 1057 | return -EIO; 1058 | n_dirs = ot_arrayof_tree_dir_get_length (dirs); 1059 | 1060 | /* Early exit if guaranteed past end */ 1061 | if (ctx->pos >= 2 + n_files + n_dirs) 1062 | return 0; 1063 | 1064 | if (!dir_emit_dots(file, ctx)) 1065 | return 0; 1066 | 1067 | /* pos 0 and 1 is dots, our entries start at 2 */ 1068 | pos = 2; 1069 | 1070 | /* First list files */ 1071 | for (i = 0; !done && i < n_files; i++) { 1072 | OtTreeFileRef treefile; 1073 | size_t name_len; 1074 | const char *name; 1075 | 1076 | if (!ot_arrayof_tree_file_get_at (files, i, &treefile)) 1077 | continue; 1078 | 1079 | name = ot_tree_file_get_name (treefile, &name_len); 1080 | if (name == NULL) 1081 | continue; 1082 | 1083 | if (pos++ == ctx->pos) { 1084 | if (dir_emit(ctx, name, name_len, oti->dir.inode_base + i, DT_UNKNOWN)) { 1085 | ctx->pos++; 1086 | } else { 1087 | done = true; /* no more */ 1088 | } 1089 | } 1090 | } 1091 | 1092 | /* Then dirs */ 1093 | for (i = 0; !done && i < n_dirs; i++) { 1094 | OtTreeDirRef treedir; 1095 | size_t name_len; 1096 | const char *name; 1097 | 1098 | if (!ot_arrayof_tree_dir_get_at (dirs, i, &treedir)) 1099 | continue; 1100 | 1101 | name = ot_tree_dir_get_name (treedir, &name_len); 1102 | if (name == NULL) 1103 | continue; 1104 | 1105 | if (pos++ == ctx->pos) { 1106 | if (dir_emit(ctx, name, name_len, oti->dir.inode_base + n_files + i, DT_DIR)) { 1107 | ctx->pos++; 1108 | } else { 1109 | done = true; /* no more */ 1110 | } 1111 | } 1112 | } 1113 | 1114 | return 0; 1115 | } 1116 | 1117 | static loff_t otfs_dir_llseek(struct file *file, loff_t offset, int origin) 1118 | { 1119 | loff_t res = -EINVAL; 1120 | 1121 | switch (origin) { 1122 | case SEEK_CUR: 1123 | offset += file->f_pos; 1124 | break; 1125 | case SEEK_SET: 1126 | break; 1127 | default: 1128 | return res; 1129 | } 1130 | if (offset < 0) 1131 | return res; 1132 | 1133 | file->f_pos = offset; 1134 | 1135 | return offset; 1136 | } 1137 | 1138 | static ssize_t otfs_listxattr(struct dentry *dentry, char *names, size_t size) 1139 | { 1140 | struct inode *inode = d_inode(dentry); 1141 | struct otfs_inode *oti = OTFS_I(inode); 1142 | OtArrayofXattrRef xattrs; 1143 | size_t n_xattrs = 0; 1144 | size_t required_size = 0; 1145 | char *dest; 1146 | size_t i; 1147 | 1148 | if (ot_dir_meta_get_xattrs(oti->dirmeta, &xattrs)) 1149 | n_xattrs = ot_arrayof_xattr_get_length(xattrs); 1150 | 1151 | for (i = 0; i < n_xattrs; i++) { 1152 | OtXattrRef xattr; 1153 | if (ot_arrayof_xattr_get_at(xattrs, i, &xattr)) { 1154 | size_t name_len; 1155 | const u8 *name; 1156 | name = ot_xattr_get_name (xattr, &name_len); 1157 | if (name != NULL) 1158 | required_size += name_len; 1159 | } 1160 | } 1161 | if (size < required_size) 1162 | return -ERANGE; 1163 | dest = names; 1164 | for (i = 0; i < n_xattrs; i++) { 1165 | OtXattrRef xattr; 1166 | if (ot_arrayof_xattr_get_at(xattrs, i, &xattr)) { 1167 | size_t name_len; 1168 | const u8 *name; 1169 | name = ot_xattr_get_name (xattr, &name_len); 1170 | if (name != NULL) { 1171 | memcpy(dest, name, name_len); 1172 | dest += name_len; 1173 | } 1174 | } 1175 | } 1176 | 1177 | return required_size; 1178 | } 1179 | 1180 | static ssize_t otfs_read_iter(struct kiocb *iocb, struct iov_iter *iter) 1181 | { 1182 | struct file *file = iocb->ki_filp; 1183 | struct file *realfile = file->private_data; 1184 | int ret; 1185 | 1186 | if (!realfile->f_op->read_iter) 1187 | return -ENODEV; 1188 | 1189 | iocb->ki_filp = realfile; 1190 | ret = call_read_iter(realfile, iocb, iter); 1191 | iocb->ki_filp = file; 1192 | 1193 | return ret; 1194 | } 1195 | 1196 | static int otfs_mmap(struct file *file, struct vm_area_struct *vma) 1197 | { 1198 | struct file *realfile = file->private_data; 1199 | int ret; 1200 | 1201 | if (!realfile->f_op->mmap) 1202 | return -ENODEV; 1203 | 1204 | if (WARN_ON(file != vma->vm_file)) 1205 | return -EIO; 1206 | 1207 | vma_set_file(vma, realfile); 1208 | 1209 | ret = call_mmap(vma->vm_file, vma); 1210 | 1211 | return ret; 1212 | } 1213 | 1214 | static int otfs_fadvise(struct file *file, loff_t offset, loff_t len, int advice) 1215 | { 1216 | struct file *realfile = file->private_data; 1217 | 1218 | return vfs_fadvise(realfile, offset, len, advice); 1219 | } 1220 | 1221 | static unsigned long otfs_mmu_get_unmapped_area(struct file *file, 1222 | unsigned long addr, 1223 | unsigned long len, 1224 | unsigned long pgoff, 1225 | unsigned long flags) 1226 | { 1227 | return current->mm->get_unmapped_area(file, addr, len, pgoff, flags); 1228 | } 1229 | 1230 | static int otfs_release_file(struct inode *inode, struct file *file) 1231 | { 1232 | struct file *realfile = file->private_data; 1233 | 1234 | if (WARN_ON(realfile == NULL)) 1235 | return -EIO; 1236 | 1237 | fput(file->private_data); 1238 | file->private_data = NULL; 1239 | 1240 | return 0; 1241 | } 1242 | 1243 | static int otfs_open_file(struct inode *inode, struct file *file) 1244 | { 1245 | struct otfs_inode *oti = OTFS_I(inode); 1246 | struct inode *real_inode; 1247 | struct file *real_file; 1248 | int err; 1249 | 1250 | if (WARN_ON(file == NULL)) 1251 | return -EIO; 1252 | 1253 | /* Ensure no writes */ 1254 | if (file->f_flags & (O_WRONLY | O_RDWR | O_CREAT | O_EXCL | O_TRUNC)) 1255 | return -EROFS; 1256 | 1257 | /* don't pass these on to underlying fs */ 1258 | file->f_flags &= ~(O_NOCTTY); 1259 | 1260 | real_inode = d_inode(oti->nondir.path.dentry); 1261 | err = inode_permission(&init_user_ns, real_inode, MAY_OPEN); 1262 | if (err < 0) 1263 | return err; 1264 | 1265 | real_file = open_with_fake_path(&file->f_path, file->f_flags | OTFS_OPEN_FLAGS, real_inode, 1266 | current_cred()); 1267 | if (IS_ERR(real_file)) { 1268 | return PTR_ERR(real_file); 1269 | } 1270 | 1271 | file->private_data = real_file; 1272 | return 0; 1273 | } 1274 | 1275 | static const struct file_operations otfs_dir_operations = { 1276 | .open = otfs_dir_open, 1277 | .iterate = otfs_iterate, 1278 | .release = otfs_dir_release, 1279 | .llseek = otfs_dir_llseek, 1280 | }; 1281 | 1282 | static const struct inode_operations otfs_dir_inode_operations = { 1283 | .lookup = otfs_lookup, 1284 | }; 1285 | 1286 | static const struct inode_operations otfs_file_inode_operations = { 1287 | .setattr = simple_setattr, 1288 | .getattr = simple_getattr, 1289 | 1290 | .listxattr = otfs_listxattr, 1291 | }; 1292 | 1293 | static const struct file_operations otfs_file_operations = { 1294 | .read_iter = otfs_read_iter, 1295 | .mmap = otfs_mmap, 1296 | .fadvise = otfs_fadvise, 1297 | .fsync = noop_fsync, 1298 | .splice_read = generic_file_splice_read, 1299 | .llseek = generic_file_llseek, 1300 | .get_unmapped_area = otfs_mmu_get_unmapped_area, 1301 | .release = otfs_release_file, 1302 | .open = otfs_open_file, 1303 | }; 1304 | 1305 | static int otfs_fill_super(struct super_block *sb, struct fs_context *fc) 1306 | { 1307 | struct otfs_info *fsi = sb->s_fs_info; 1308 | struct file *f; 1309 | int ret; 1310 | int res; 1311 | u8 *commit_data = NULL; 1312 | struct inode *inode; 1313 | OtCommitRef commit; 1314 | OtChecksumRef root_contents; 1315 | OtChecksumRef root_metadata; 1316 | 1317 | if (sb->s_root) 1318 | return -EINVAL; 1319 | 1320 | /* These are required options */ 1321 | if (fsi->object_dir_path == NULL || 1322 | fsi->commit_id == NULL) 1323 | return -EINVAL; 1324 | 1325 | /* Set up the inode allocator early */ 1326 | sb->s_op = &otfs_ops; 1327 | sb->s_xattr = otfs_xattr_handlers; 1328 | sb->s_flags |= SB_RDONLY; 1329 | sb->s_magic = OTFS_MAGIC; 1330 | 1331 | f = filp_open(fsi->object_dir_path, O_PATH, 0); 1332 | if (IS_ERR(f)) { 1333 | ret = PTR_ERR(f); 1334 | goto fail; 1335 | } 1336 | fsi->object_dir = f->f_path; 1337 | path_get(&fsi->object_dir); 1338 | fput(f); 1339 | 1340 | res = otfs_read_object (&fsi->object_dir, fsi->commit_id, ".commit", &commit_data); 1341 | if (res < 0) { 1342 | ret = res; 1343 | goto fail; 1344 | } 1345 | 1346 | if (!ot_commit_from_data (commit_data, res, &commit) || 1347 | !ot_commit_get_root_contents (commit, &root_contents) || 1348 | !ot_commit_get_root_metadata (commit, &root_metadata)) { 1349 | ret = -EINVAL; 1350 | goto fail; 1351 | } 1352 | 1353 | /* 0 is root, so start at 1 */ 1354 | atomic64_set (&fsi->inode_counter, 1); 1355 | inode = otfs_make_dir_inode(sb, NULL, 0, 1356 | root_contents, root_metadata); 1357 | if (IS_ERR(inode)) { 1358 | ret = PTR_ERR(inode); 1359 | goto fail; 1360 | } 1361 | sb->s_root = d_make_root(inode); /* Takes ownership */ 1362 | 1363 | ret = -ENOMEM; 1364 | if (!sb->s_root) 1365 | goto fail; 1366 | 1367 | sb->s_maxbytes = MAX_LFS_FILESIZE; 1368 | sb->s_blocksize = PAGE_SIZE; 1369 | sb->s_blocksize_bits = PAGE_SHIFT; 1370 | 1371 | sb->s_time_gran = 1; 1372 | 1373 | vfree(commit_data); 1374 | return 0; 1375 | fail: 1376 | if (commit_data) 1377 | vfree(commit_data); 1378 | return ret; 1379 | } 1380 | 1381 | static int otfs_get_tree(struct fs_context *fc) 1382 | { 1383 | return get_tree_nodev(fc, otfs_fill_super); 1384 | } 1385 | 1386 | static const struct fs_context_operations otfs_context_ops = { 1387 | .parse_param = otfs_parse_param, 1388 | .get_tree = otfs_get_tree, 1389 | }; 1390 | 1391 | static int otfs_init_fs_context(struct fs_context *fc) 1392 | { 1393 | struct otfs_info *fsi; 1394 | 1395 | fsi = kzalloc(sizeof(*fsi), GFP_KERNEL); 1396 | if (!fsi) 1397 | return -ENOMEM; 1398 | 1399 | fc->s_fs_info = fsi; 1400 | fc->ops = &otfs_context_ops; 1401 | return 0; 1402 | } 1403 | 1404 | static struct file_system_type otfs_type = { 1405 | .name = "ostreefs", 1406 | .init_fs_context = otfs_init_fs_context, 1407 | .parameters = otfs_parameters, 1408 | .kill_sb = kill_anon_super, 1409 | .fs_flags = FS_USERNS_MOUNT, 1410 | }; 1411 | 1412 | static void otfs_inode_init_once(void *foo) 1413 | { 1414 | struct otfs_inode *oti = foo; 1415 | 1416 | inode_init_once(&oti->vfs_inode); 1417 | } 1418 | 1419 | static int __init init_otfs(void) 1420 | { 1421 | otfs_inode_cachep = kmem_cache_create("otfs_inode", 1422 | sizeof(struct otfs_inode), 0, 1423 | (SLAB_RECLAIM_ACCOUNT| 1424 | SLAB_MEM_SPREAD|SLAB_ACCOUNT), 1425 | otfs_inode_init_once); 1426 | if (otfs_inode_cachep == NULL) 1427 | return -ENOMEM; 1428 | 1429 | return register_filesystem(&otfs_type); 1430 | } 1431 | 1432 | static void __exit exit_otfs(void) 1433 | { 1434 | unregister_filesystem(&otfs_type); 1435 | 1436 | /* Ensure all RCU free inodes are safe to be destroyed. */ 1437 | rcu_barrier(); 1438 | 1439 | kmem_cache_destroy(otfs_inode_cachep); 1440 | } 1441 | 1442 | module_init(init_otfs); 1443 | module_exit(exit_otfs); 1444 | --------------------------------------------------------------------------------