├── meson_options.txt ├── compression.h ├── libs ├── crc32c.h ├── raid56.h ├── rbtree.h ├── crc32c.c ├── rbtree_augmented.h ├── raid56.c ├── list.h └── rbtree.c ├── hash.h ├── messages.h ├── accessors.c ├── super.h ├── messages.c ├── LICENSE ├── ctree.h ├── hash.c ├── data.h ├── tests ├── global.h ├── corrupt.c └── test.sh ├── meson.build ├── inode.h ├── metadata.h ├── volumes.h ├── README.md ├── compression.c ├── compat.h ├── main.c ├── inode.c ├── super.c ├── metadata.c ├── data.c ├── ondisk_format.h └── accessors.h /meson_options.txt: -------------------------------------------------------------------------------- 1 | option('tests', type : 'boolean', value : true, 2 | description : 'Build tests that require additional dependencies') 3 | -------------------------------------------------------------------------------- /compression.h: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: MIT 2 | 3 | #ifndef BTRFS_FUSE_COMPRESSION_H 4 | #define BTRFS_FUSE_COMPRESSION_H 5 | 6 | #include "compat.h" 7 | #include "ctree.h" 8 | 9 | int btrfs_decompress(const struct btrfs_fs_info *fs_info, 10 | char *input, u32 input_len, 11 | char *output, u32 output_len, u8 compression); 12 | 13 | #endif 14 | -------------------------------------------------------------------------------- /libs/crc32c.h: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: GPL-2.0-or-later 2 | // 3 | #ifndef __CRC32C__ 4 | #define __CRC32C__ 5 | 6 | #include "compat.h" 7 | 8 | u32 crc32c_le(u32 seed, unsigned char const *data, size_t length); 9 | void crc32c_optimization_init(void); 10 | 11 | #define crc32c(seed, data, length) crc32c_le(seed, (unsigned char const *)data, length) 12 | 13 | #endif 14 | -------------------------------------------------------------------------------- /hash.h: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: MIT 2 | 3 | #ifndef BTRFS_FUSE_HASH_H 4 | #define BTRFS_FUSE_HASH_H 5 | 6 | #include "compat.h" 7 | #include "libs/crc32c.h" 8 | 9 | int btrfs_csum_data(u16 csum_type, const u8 *data, u8 *out, size_t len); 10 | 11 | static inline u64 btrfs_name_hash(const char *name, int len) 12 | { 13 | return crc32c((u32)~1, name, len); 14 | } 15 | 16 | #endif 17 | -------------------------------------------------------------------------------- /messages.h: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: MIT 2 | 3 | #ifndef BTRFS_FUSE_MESSAGES_H 4 | #define BTRFS_FUSE_MESSAGES_H 5 | 6 | __attribute__ ((format (printf, 1, 2))) 7 | void error(const char *fmt, ...); 8 | 9 | __attribute__ ((format (printf, 1, 2))) 10 | void warning(const char *fmt, ...); 11 | 12 | __attribute__ ((format (printf, 1, 2))) 13 | void info(const char *fmt, ...); 14 | 15 | __attribute__ ((format (printf, 1, 2))) 16 | void debug(const char *fmt, ...); 17 | 18 | #endif 19 | -------------------------------------------------------------------------------- /accessors.c: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: MIT 2 | 3 | #include "accessors.h" 4 | 5 | static const struct btrfs_csum { 6 | u16 size; 7 | } btrfs_csums[] = { 8 | [BTRFS_CSUM_TYPE_CRC32] = { 4 }, 9 | [BTRFS_CSUM_TYPE_XXHASH] = { 8 }, 10 | [BTRFS_CSUM_TYPE_SHA256] = { 32 }, 11 | [BTRFS_CSUM_TYPE_BLAKE2] = { 32 }, 12 | }; 13 | 14 | u16 btrfs_super_csum_size(const struct btrfs_super_block *sb) 15 | { 16 | const u16 csum_type = btrfs_super_csum_type(sb); 17 | 18 | return btrfs_csums[csum_type].size; 19 | } 20 | 21 | size_t btrfs_super_num_csums(void) 22 | { 23 | return ARRAY_SIZE(btrfs_csums); 24 | } 25 | -------------------------------------------------------------------------------- /super.h: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: MIT 2 | 3 | #ifndef BTRFS_FUSE_SUPER_H 4 | #define BTRFS_FUSE_SUPER_H 5 | 6 | #include "ctree.h" 7 | #include "volumes.h" 8 | 9 | /* 10 | * Read directly from physical @offset from disk. 11 | * 12 | * This is only used by superblock which is not chunk mapped. 13 | */ 14 | int btrfs_read_from_disk(int fd, char *buf, u64 offset, u32 len); 15 | 16 | int btrfs_check_super(struct btrfs_super_block *sb); 17 | 18 | /* The equivalent to open_ctree() of kernel/progs */ 19 | struct btrfs_fs_info *btrfs_mount(const char *path); 20 | 21 | /* The equivalent to close_ctree() of kernel/progs */ 22 | void btrfs_unmount(struct btrfs_fs_info *fs_info); 23 | 24 | #endif 25 | -------------------------------------------------------------------------------- /messages.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "messages.h" 4 | 5 | __attribute__ ((format (printf, 1, 2))) 6 | void error(const char *fmt, ...) 7 | { 8 | va_list args; 9 | 10 | fputs("ERROR: ", stderr); 11 | va_start(args, fmt); 12 | vfprintf(stderr, fmt, args); 13 | va_end(args); 14 | fputc('\n', stderr); 15 | } 16 | 17 | __attribute__ ((format (printf, 1, 2))) 18 | void warning(const char *fmt, ...) 19 | { 20 | va_list args; 21 | 22 | fputs("WARNING: ", stderr); 23 | va_start(args, fmt); 24 | vfprintf(stderr, fmt, args); 25 | va_end(args); 26 | fputc('\n', stderr); 27 | } 28 | 29 | __attribute__ ((format (printf, 1, 2))) 30 | void info(const char *fmt, ...) 31 | { 32 | va_list args; 33 | 34 | fputs("INFO: ", stdout); 35 | va_start(args, fmt); 36 | vprintf(fmt, args); 37 | va_end(args); 38 | putchar('\n'); 39 | } 40 | 41 | __attribute__ ((format (printf, 1, 2))) 42 | void debug(const char *fmt, ...) 43 | { 44 | #ifdef DEBUG 45 | va_list args; 46 | 47 | fputs("DEBUG: ", stdout); 48 | va_start(args, fmt); 49 | vprintf(fmt, args); 50 | va_end(args); 51 | putchar('\n'); 52 | #endif 53 | } 54 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The following files are used under their own license, mostly GPLv2+ or GPLv2 2 | only: 3 | 4 | libs/*.[ch] 5 | 6 | 7 | All other files is used under the terms of MIT license. 8 | 9 | Copyright 2021 Qu Wenruo 10 | 11 | Permission is hereby granted, free of charge, to any person obtaining a copy of 12 | this software and associated documentation files (the "Software"), to deal in 13 | the Software without restriction, including without limitation the rights to 14 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 15 | of the Software, and to permit persons to whom the Software is furnished to do 16 | so, subject to the following conditions: 17 | 18 | The above copyright notice and this permission notice shall be included in all 19 | copies or substantial portions of the Software. 20 | 21 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 22 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 23 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 24 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 25 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 26 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 27 | SOFTWARE. 28 | -------------------------------------------------------------------------------- /ctree.h: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: MIT 2 | 3 | #ifndef BTRFS_FUSE_CTREE_H 4 | #define BTRFS_FUSE_CTREE_H 5 | 6 | #include 7 | #include "accessors.h" 8 | #include "libs/rbtree.h" 9 | #include "libs/list.h" 10 | 11 | #define BTRFS_UUID_UNPARSED_SIZE 37 12 | 13 | struct btrfs_root { 14 | struct extent_buffer *node; 15 | struct btrfs_key root_key; 16 | struct btrfs_fs_info *fs_info; 17 | struct rb_node rb_node; 18 | 19 | /* 20 | * Indicates the first inode number, which is also the inode for the 21 | * subvolume root. 22 | */ 23 | u64 root_dirid; 24 | }; 25 | 26 | /* Represents a btrfs filesystem */ 27 | struct btrfs_fs_info { 28 | u8 chunk_tree_uuid[BTRFS_UUID_SIZE]; 29 | u8 fsid[BTRFS_UUID_SIZE]; 30 | 31 | struct btrfs_root *tree_root; 32 | struct btrfs_root *default_root; 33 | struct btrfs_root *chunk_root; 34 | struct btrfs_root *csum_root; 35 | 36 | /* Records all subvolume trees that are in use */ 37 | struct rb_root subvols_root; 38 | 39 | /* Records logical->physical mappings */ 40 | struct rb_root mapping_root; 41 | 42 | /* Recrods all extent_buffers */ 43 | struct rb_root eb_root; 44 | pthread_mutex_t eb_lock; 45 | 46 | /* Cached generation, the same as superblock::generation */ 47 | u64 generation; 48 | 49 | /* Cached basic sizes */ 50 | u32 nodesize; 51 | u32 sectorsize; 52 | u16 csum_size; 53 | u16 csum_type; 54 | struct btrfs_fs_devices *fs_devices; 55 | struct btrfs_super_block super_copy; 56 | }; 57 | 58 | #endif 59 | -------------------------------------------------------------------------------- /hash.c: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: MIT 2 | 3 | 4 | #include 5 | #include 6 | #include 7 | #include "ondisk_format.h" 8 | #include "messages.h" 9 | #include "libs/crc32c.h" 10 | 11 | static int hash_crc32c(const u8* buf, size_t length, u8 *out) 12 | { 13 | u32 crc = ~0; 14 | 15 | crc = crc32c(~0, buf, length); 16 | put_unaligned_le32(~crc, out); 17 | 18 | return 0; 19 | } 20 | 21 | static int hash_xxhash(const u8* buf, size_t length, u8 *out) 22 | { 23 | XXH64_hash_t hash; 24 | 25 | hash = XXH64(buf, length, 0); 26 | put_unaligned_le64(hash, out); 27 | 28 | return 0; 29 | } 30 | 31 | static int hash_sha256(const u8* buf, size_t length, u8 *out) 32 | { 33 | SHA256(buf, length, out); 34 | 35 | return 0; 36 | } 37 | 38 | static int hash_blake2b(const u8* buf, size_t length, u8 *out) 39 | { 40 | blake2b_state S; 41 | 42 | blake2b_init(&S, BTRFS_CSUM_SIZE); 43 | blake2b_update(&S, buf, length); 44 | blake2b_final(&S, out, BTRFS_CSUM_SIZE); 45 | 46 | return 0; 47 | } 48 | 49 | int btrfs_csum_data(u16 csum_type, const u8 *data, u8 *out, size_t len) 50 | { 51 | memset(out, 0, BTRFS_CSUM_SIZE); 52 | 53 | switch(csum_type) { 54 | case BTRFS_CSUM_TYPE_CRC32: 55 | return hash_crc32c(data, len, out); 56 | case BTRFS_CSUM_TYPE_XXHASH: 57 | return hash_xxhash(data, len, out); 58 | case BTRFS_CSUM_TYPE_SHA256: 59 | return hash_sha256(data, len, out); 60 | case BTRFS_CSUM_TYPE_BLAKE2: 61 | return hash_blake2b(data, len, out); 62 | default: 63 | error("unknown csum type: %d\n", csum_type); 64 | assert(0); 65 | } 66 | return -1; 67 | } 68 | -------------------------------------------------------------------------------- /data.h: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: MIT 2 | 3 | #ifndef BTRFS_FUSE_DATA_H 4 | #define BTRFS_FUSE_DATA_H 5 | 6 | #include "metadata.h" 7 | 8 | /* 9 | * Try to locate one csum item for @bytenr. 10 | * 11 | * Return btrfs_csum_item pointer to the csum item (must be used with 12 | * path->nodes[0]). 13 | * Return ERR_PTR() for error (including no csum found). 14 | * For ERR_PTR(-ENOENT) case, path will point to the nearest item after 15 | * @bytenr, in case caller want to know where the next csum starts. 16 | * 17 | * Thus caller should check path->nodes[0] and release the path accordingly. 18 | */ 19 | struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_fs_info *fs_info, 20 | struct btrfs_path *path, 21 | u64 bytenr); 22 | 23 | /* 24 | * Read data from btrfs logical address @logical. 25 | * 26 | * Will do csum check and try to find the copy which pass checksum (if has). 27 | * 28 | * Return >0 for the number of bytes read from disk and pass the checksum 29 | * (if has). 30 | * Return <0 for error. 31 | * 32 | * Thus if we have the following on-disk data layout: 33 | * 34 | * X X+4K X+8K 35 | * Mirror 1 |XXXXXXX| | 36 | * Mirror 2 | |XXXXXXX| 37 | * 38 | * Where X means corrupted data. 39 | * 40 | * Then we call btrfs_read_data(fs_info, buf, 8192, X); 41 | * 42 | * We will get the return value 4096, with correct data from mirror 2, 43 | * then we still need to call btrfs_read_data(fs_info, buf + 4096, 4096, 44 | * X + 4096) to read the next 4K correctly from mirror 1. 45 | */ 46 | ssize_t btrfs_read_data(struct btrfs_fs_info *fs_info, char *buf, 47 | size_t num_bytes, u64 logical); 48 | 49 | /* 50 | * Read data at @file_offset of @inode into @buf. 51 | * 52 | * @file_offset and @num_bytes must be fs_info->sectorsize aligned. 53 | */ 54 | ssize_t btrfs_read_file(struct btrfs_fs_info *fs_info, 55 | struct btrfs_inode *inode, u64 file_offset, 56 | char *buf, u32 num_bytes); 57 | 58 | #endif 59 | -------------------------------------------------------------------------------- /tests/global.h: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: GPL-2.0 2 | /* 3 | * Copyright (c) 2000-2003 Silicon Graphics, Inc. 4 | * All Rights Reserved. 5 | */ 6 | 7 | #ifndef GLOBAL_H 8 | #define GLOBAL_H 9 | 10 | #ifdef HAVE_XFS_XFS_H 11 | #include 12 | #endif 13 | 14 | #ifdef HAVE_XFS_LIBXFS_H 15 | #include 16 | #endif 17 | 18 | #ifdef HAVE_XFS_JDM_H 19 | #include 20 | #endif 21 | 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | #include 34 | #include 35 | #include 36 | #include 37 | #include 38 | #include 39 | #include 40 | #include 41 | #include 42 | #include 43 | #include 44 | #include 45 | #include 46 | 47 | #ifndef FALLOC_FL_KEEP_SIZE 48 | #define FALLOC_FL_KEEP_SIZE 0x01 49 | #endif 50 | 51 | #ifndef FALLOC_FL_PUNCH_HOLE 52 | #define FALLOC_FL_PUNCH_HOLE 0x02 53 | #endif 54 | 55 | #ifndef FALLOC_FL_NO_HIDE_STALE 56 | #define FALLOC_FL_NO_HIDE_STALE 0x04 57 | #endif 58 | 59 | #ifndef FALLOC_FL_COLLAPSE_RANGE 60 | #define FALLOC_FL_COLLAPSE_RANGE 0x08 61 | #endif 62 | 63 | #ifndef FALLOC_FL_ZERO_RANGE 64 | #define FALLOC_FL_ZERO_RANGE 0x10 65 | #endif 66 | 67 | #ifndef FALLOC_FL_INSERT_RANGE 68 | #define FALLOC_FL_INSERT_RANGE 0x20 69 | #endif 70 | 71 | #include 72 | 73 | static inline unsigned long long 74 | rounddown_64(unsigned long long x, unsigned int y) 75 | { 76 | x /= y; 77 | return x * y; 78 | } 79 | 80 | static inline unsigned long long 81 | roundup_64(unsigned long long x, unsigned int y) 82 | { 83 | return rounddown_64(x + y - 1, y); 84 | } 85 | 86 | #endif /* GLOBAL_H */ 87 | -------------------------------------------------------------------------------- /tests/corrupt.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | static void usage(const char *name) 12 | { 13 | printf("usage: %s \n", name); 14 | exit(1); 15 | } 16 | 17 | #define BUF_SIZE 8 18 | int main(int argc, char *argv[]) 19 | { 20 | struct stat stat_buf; 21 | char *path; 22 | char data_buf[BUF_SIZE]; 23 | off_t size; 24 | int i; 25 | int fd; 26 | int ret; 27 | 28 | if (argc != 2) 29 | usage(argv[0]); 30 | 31 | path = argv[1]; 32 | fd = open(path, O_RDWR); 33 | if (fd < 0) { 34 | ret = -errno; 35 | fprintf(stderr, "failed to open file %s: %d\n", path, ret); 36 | return 1; 37 | } 38 | ret = fstat(fd, &stat_buf); 39 | if (ret < 0) { 40 | ret = -errno; 41 | fprintf(stderr, "failed to open file %s: %d\n", path, ret); 42 | goto out; 43 | } 44 | if (S_ISREG(stat_buf.st_mode)) { 45 | size = stat_buf.st_size; 46 | } else if (S_ISBLK(stat_buf.st_mode)) { 47 | ret = ioctl(fd, BLKGETSIZE64, &size); 48 | if (ret < 0) { 49 | ret = -errno; 50 | fprintf(stderr, "failed to get block dev size %s: %d\n", 51 | path, ret); 52 | goto out; 53 | } 54 | } else { 55 | ret = -EINVAL; 56 | fprintf(stderr, "%s is not a regular file or block device\n", 57 | path); 58 | goto out; 59 | } 60 | if (size <= 1024 * 1024) { 61 | ret = -EINVAL; 62 | fprintf(stderr, "file %s is too small\n", path); 63 | goto out; 64 | } 65 | srand(time(NULL)); 66 | 67 | /* Corrupted 1/16 of the file */ 68 | for (i = 0; i < (size / 16); i+= BUF_SIZE) { 69 | off_t dest_off = rand() % (size - 1024 * 1024); 70 | int j; 71 | 72 | /* Now dest_off is always beyond the first 1MB */ 73 | dest_off += 1024 * 1024; 74 | 75 | for (j = 0; j < BUF_SIZE; j++) 76 | data_buf[j] = rand() % 256; 77 | 78 | ret = pwrite(fd, data_buf, BUF_SIZE, dest_off); 79 | if (ret != BUF_SIZE) { 80 | ret = -EIO; 81 | fprintf(stderr, "failed to write data into %s\n", path); 82 | goto out; 83 | } 84 | } 85 | out: 86 | close(fd); 87 | if (ret < 0) 88 | return 1; 89 | printf("%s is corrupted with %u bytes\n", path, i); 90 | return 0; 91 | } 92 | -------------------------------------------------------------------------------- /libs/raid56.h: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: GPL-2.0-only 2 | 3 | /* 4 | * This program is free software; you can redistribute it and/or 5 | * modify it under the terms of the GNU General Public 6 | * License v2 as published by the Free Software Foundation. 7 | * 8 | * This program is distributed in the hope that it will be useful, 9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 | * General Public License for more details. 12 | * 13 | * You should have received a copy of the GNU General Public 14 | * License along with this program; if not, write to the 15 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 16 | * Boston, MA 021110-1307, USA. 17 | */ 18 | 19 | /* 20 | * Original headers from kernel library for RAID5/6 calculations, not from 21 | * btrfs kernel header. 22 | */ 23 | 24 | #ifndef __BTRFS_PROGS_RAID56_H__ 25 | #define __BTRFS_PROGS_RAID56_H__ 26 | 27 | void raid6_gen_syndrome(int disks, size_t bytes, void **ptrs); 28 | int raid5_gen_result(int nr_devs, size_t stripe_len, int dest, void **data); 29 | 30 | /* 31 | * Headers synchronized from kernel include/linux/raid/pq.h 32 | * No modification at all. 33 | * 34 | * Galois field tables. 35 | */ 36 | extern const u8 raid6_gfmul[256][256] __attribute__((aligned(256))); 37 | extern const u8 raid6_vgfmul[256][32] __attribute__((aligned(256))); 38 | extern const u8 raid6_gfexp[256] __attribute__((aligned(256))); 39 | extern const u8 raid6_gfinv[256] __attribute__((aligned(256))); 40 | extern const u8 raid6_gfexi[256] __attribute__((aligned(256))); 41 | 42 | /* Recover raid6 with 2 data stripes corrupted */ 43 | int raid6_recov_data2(int nr_devs, size_t stripe_len, int dest1, int dest2, 44 | void **data); 45 | 46 | /* Recover data and P */ 47 | int raid6_recov_datap(int nr_devs, size_t stripe_len, int dest1, void **data); 48 | 49 | /* 50 | * Recover raid56 data 51 | * @dest1/2 can be -1 to indicate correct data 52 | * 53 | * Return >0 for unrecoverable case. 54 | * Return 0 for recoverable case, And recovered data will be stored into @data 55 | * Return <0 for fatal error 56 | */ 57 | int raid56_recov(int nr_devs, size_t stripe_len, u64 profile, int dest1, 58 | int dest2, void **data); 59 | 60 | #endif 61 | -------------------------------------------------------------------------------- /meson.build: -------------------------------------------------------------------------------- 1 | project('btrfs-fuse', 'c') 2 | 3 | if get_option('buildtype').startswith('debug') 4 | add_project_arguments('-DDEBUG', language : ['c']) 5 | endif 6 | btrfs_fuse_src = ['accessors.c', 'hash.c', 'main.c', 'messages.c', 'metadata.c', 7 | 'super.c', 'volumes.c', 'inode.c', 'data.c', 'compression.c', 8 | 'libs/crc32c.c', 'libs/rbtree.c', 'libs/raid56.c', 'libs/tables.c'] 9 | 10 | cc = meson.get_compiler('c') 11 | 12 | uuid_dep = dependency('uuid') 13 | 14 | # Hash dependency 15 | blake2_dep = dependency('libb2') 16 | crypto_dep = dependency('libcrypto') 17 | xxhash_dep = dependency('libxxhash') 18 | hash_deps = [blake2_dep, crypto_dep, xxhash_dep] 19 | 20 | # Compression dependency 21 | zlib_dep = dependency('zlib') 22 | # not using pkgconfig for lzo as older versions do not ship a definition 23 | lzo_dep = cc.find_library('lzo2', has_headers: ['lzo/lzo2a.h']) 24 | zstd_dep = dependency('libzstd') 25 | compression_deps = [zlib_dep, lzo_dep, zstd_dep] 26 | 27 | fuse_dep = dependency('fuse3') 28 | 29 | # For PPC64/MIPS64 which has u64 defined as unsigned long, and can 30 | # cause warning for print formats. 31 | # Use this marco to use more common u64 == unsigned long long. 32 | if host_machine.cpu_family() == 'ppc64' or host_machine.cpu_family() == 'mips64' 33 | add_project_arguments('-D__SANE_USERSPACE_TYPES__', language : ['c']) 34 | endif 35 | 36 | btrfs_fuse_deps = [uuid_dep, hash_deps, compression_deps, fuse_dep] 37 | executable('btrfs-fuse', btrfs_fuse_src, dependencies: btrfs_fuse_deps, 38 | install: true) 39 | 40 | want_tests = get_option('tests') 41 | 42 | # The rest are all for selftests 43 | # 44 | # Btrfs-progs is a hard-requirement so that fsstress can create subvolumes and 45 | # snapshots. Without that we can't test the name resolve for subvolumes. 46 | btrfs_dep = dependency('libbtrfsutil', required: want_tests) 47 | test_args = ['-D_GNU_SOURCE', '-D_FILE_OFFSET_BITS=64'] 48 | if btrfs_dep.found() 49 | test_args += ['-DHAVE_BTRFSUTIL_H'] 50 | endif 51 | 52 | if cc.has_header('linux/fiemap.h', required: want_tests) 53 | test_args += ['-DHAVE_LINUX_FIEMAP_H'] 54 | endif 55 | 56 | if cc.has_header('sys/prctl.h', required: want_tests) 57 | test_args += ['-DHAVE_SYS_PRCTL_H'] 58 | endif 59 | 60 | aio_dep = cc.find_library('aio', has_headers: ['libaio.h'], required: want_tests) 61 | if aio_dep.found() 62 | test_args += ['-DAIO'] 63 | endif 64 | 65 | uring_dep = dependency('liburing', required: want_tests) 66 | if uring_dep.found() 67 | test_args += ['-DURING'] 68 | endif 69 | 70 | if cc.has_function('renameat2') 71 | test_args += ['-DHAVE_RENAMEAT2'] 72 | endif 73 | 74 | if cc.has_header('xfs/xfs.h', required: want_tests) 75 | test_args += ['-DHAVE_XFS_XFS_H'] 76 | endif 77 | if cc.has_header('xfs/jdm.h', required: want_tests) 78 | test_args += ['-DHAVE_XFS_JDM_H'] 79 | endif 80 | 81 | test_deps = [btrfs_dep, aio_dep, uring_dep] 82 | executable('fsstress', 'tests/fsstress.c', c_args: test_args, 83 | dependencies: test_deps, install: false) 84 | executable('fssum', 'tests/fssum.c', c_args: test_args, 85 | dependencies: [test_deps, crypto_dep], install: false) 86 | executable('corrupt', 'tests/corrupt.c', c_args: test_args, 87 | dependencies: [test_deps, crypto_dep], install: false) 88 | -------------------------------------------------------------------------------- /inode.h: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: MIT 2 | 3 | #ifndef BTRFS_FUSE_INODE_H 4 | #define BTRFS_FUSE_INODE_H 5 | 6 | #include 7 | #include 8 | #include "compat.h" 9 | #include "ondisk_format.h" 10 | #include "ctree.h" 11 | #include "metadata.h" 12 | 13 | /* 14 | * Represent one inode in a btrfs. 15 | * 16 | * Since each subvolume is a separate inode space, we have can same inode 17 | * numbers in different subvolumes. 18 | * 19 | * Thus in btrfs to locate one inode, we need (subvolid, inode), not just inode 20 | * number. 21 | */ 22 | struct btrfs_inode { 23 | struct btrfs_root *root; 24 | u64 ino; 25 | 26 | /* File type, indicated using BTRFS_FT_* numbers */ 27 | u8 file_type; 28 | }; 29 | 30 | /* 31 | * Lookup one name for @dir. 32 | * 33 | * NOTE: @name should not contain '/', thus it's really just one name, not 34 | * a complete path. 35 | * 36 | * The result will be put into @inode_ret, which can be either on-stack or 37 | * allocated memory. (This applies to all @inode_ret in the header) 38 | */ 39 | int btrfs_lookup_one_name(struct btrfs_fs_info *fs_info, 40 | struct btrfs_inode *dir, const char *name, 41 | size_t name_len, struct btrfs_inode *inode_ret); 42 | 43 | /* 44 | * Resolve a full path. 45 | * 46 | * NOTE: the path should not contain soft link (or ".." or "."), and should be 47 | * absolute path (starts with '/'). 48 | * This is ensured by FUSE already. 49 | */ 50 | int btrfs_resolve_path(struct btrfs_fs_info *fs_info, 51 | const char *path, size_t path_len, 52 | struct btrfs_inode *inode_ret); 53 | 54 | /* 55 | * Read the softlink destination into @output. 56 | * 57 | * @inode must be a soft link. 58 | * 59 | * Return >0 for the size of the content read (not including 60 | * the tailing '\0') 61 | * Return <0 for error. 62 | * Under no case it would return 0. 63 | */ 64 | int btrfs_read_link(struct btrfs_fs_info *fs_info, 65 | struct btrfs_inode *inode, char *output, 66 | size_t output_size); 67 | 68 | struct btrfs_iterate_dir_ctrl { 69 | struct btrfs_path path; 70 | struct btrfs_inode dir; 71 | struct btrfs_key_range range; 72 | }; 73 | 74 | /* 75 | * Interafaces to iterate one dir. 76 | * 77 | * The common usage would be: 78 | * 79 | * struct btrfs_iterate_dir_ctrl ctrl = {}; 80 | * int ret; 81 | * 82 | * ret = btrfs_iterate_dir_start(fs_info, &ctrl, dir, 0); // start from index 0 83 | * 84 | * while (ret == 0) { 85 | * struct btrfs_inode found_entry; 86 | * char name_buf[NAME_MAX] = {}; 87 | * size_t name_len; 88 | * 89 | * ret = btrfs_iterate_dir_get_inode(fs_info, &ctrl, 90 | * &found_entry, name_buf, &name_len); 91 | * // Do something using found_entry/name_buf. 92 | * 93 | * ret = btrfs_iterate_dir_next(fs_info, &ctrl); 94 | * } 95 | * btrfs_iterate_dir_end(fs_info, &ctrl); 96 | */ 97 | int btrfs_iterate_dir_start(struct btrfs_fs_info *fs_info, 98 | struct btrfs_iterate_dir_ctrl *ctrl, 99 | const struct btrfs_inode *dir, u64 start_index); 100 | 101 | int btrfs_iterate_dir_get_inode(struct btrfs_fs_info *fs_info, 102 | struct btrfs_iterate_dir_ctrl *ctrl, 103 | struct btrfs_inode *entry, 104 | u64 *index_ret, char *name, size_t *name_len); 105 | 106 | static inline int btrfs_iterate_dir_next(struct btrfs_fs_info *fs_info, 107 | struct btrfs_iterate_dir_ctrl *ctrl) 108 | { 109 | return btrfs_search_keys_next(&ctrl->path, &ctrl->range); 110 | } 111 | 112 | static inline void btrfs_iterate_dir_end(struct btrfs_fs_info *fs_info, 113 | struct btrfs_iterate_dir_ctrl *ctrl) 114 | { 115 | btrfs_release_path(&ctrl->path); 116 | } 117 | 118 | static inline u32 btrfs_type_to_imode(u8 btrfs_ft) 119 | { 120 | const static u32 imode_by_btrfs_type[] = { 121 | [BTRFS_FT_REG_FILE] = S_IFREG, 122 | [BTRFS_FT_DIR] = S_IFDIR, 123 | [BTRFS_FT_CHRDEV] = S_IFCHR, 124 | [BTRFS_FT_BLKDEV] = S_IFBLK, 125 | [BTRFS_FT_FIFO] = S_IFIFO, 126 | [BTRFS_FT_SOCK] = S_IFSOCK, 127 | [BTRFS_FT_SYMLINK] = S_IFLNK, 128 | }; 129 | 130 | return imode_by_btrfs_type[btrfs_ft]; 131 | } 132 | 133 | int btrfs_stat(struct btrfs_fs_info *fs_info, struct btrfs_inode *inode, 134 | struct stat *stbuf); 135 | #endif 136 | -------------------------------------------------------------------------------- /libs/rbtree.h: -------------------------------------------------------------------------------- 1 | /* 2 | Red Black Trees 3 | (C) 1999 Andrea Arcangeli 4 | 5 | This program is free software; you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation; either version 2 of the License, or 8 | (at your option) any later version. 9 | 10 | This program is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with this program; if not, write to the Free Software 17 | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 18 | 19 | linux/include/linux/rbtree.h 20 | 21 | To use rbtrees you'll have to implement your own insert and search cores. 22 | This will avoid us to use callbacks and to drop dramatically performances. 23 | I know it's not the cleaner way, but in C (not in C++) to get 24 | performances and genericity... 25 | 26 | See Documentation/rbtree.txt for documentation and samples. 27 | */ 28 | 29 | #ifndef _LINUX_RBTREE_H 30 | #define _LINUX_RBTREE_H 31 | 32 | #include 33 | 34 | struct rb_node { 35 | unsigned long __rb_parent_color; 36 | struct rb_node *rb_right; 37 | struct rb_node *rb_left; 38 | } __attribute__((aligned(sizeof(long)))); 39 | /* The alignment might seem pointless, but allegedly CRIS needs it */ 40 | 41 | struct rb_root { 42 | struct rb_node *rb_node; 43 | }; 44 | 45 | 46 | #define rb_parent(r) ((struct rb_node *)((r)->__rb_parent_color & ~3)) 47 | 48 | #define RB_ROOT (struct rb_root) { NULL, } 49 | #define rb_entry(ptr, type, member) container_of(ptr, type, member) 50 | 51 | #define RB_EMPTY_ROOT(root) ((root)->rb_node == NULL) 52 | 53 | /* 'empty' nodes are nodes that are known not to be inserted in an rtbree */ 54 | #define RB_EMPTY_NODE(node) \ 55 | ((node)->__rb_parent_color == (unsigned long)(node)) 56 | #define RB_CLEAR_NODE(node) \ 57 | ((node)->__rb_parent_color = (unsigned long)(node)) 58 | 59 | 60 | extern void rb_insert_color(struct rb_node *, struct rb_root *); 61 | extern void rb_erase(struct rb_node *, struct rb_root *); 62 | 63 | 64 | /* Find logical next and previous nodes in a tree */ 65 | extern struct rb_node *rb_next(const struct rb_node *); 66 | extern struct rb_node *rb_prev(const struct rb_node *); 67 | extern struct rb_node *rb_first(const struct rb_root *); 68 | extern struct rb_node *rb_last(const struct rb_root *); 69 | 70 | /* Postorder iteration - always visit the parent after its children */ 71 | extern struct rb_node *rb_first_postorder(const struct rb_root *); 72 | extern struct rb_node *rb_next_postorder(const struct rb_node *); 73 | 74 | /* Fast replacement of a single node without remove/rebalance/add/rebalance */ 75 | extern void rb_replace_node(struct rb_node *victim, struct rb_node *new_node, 76 | struct rb_root *root); 77 | 78 | static inline void rb_link_node(struct rb_node * node, struct rb_node * parent, 79 | struct rb_node ** rb_link) 80 | { 81 | node->__rb_parent_color = (unsigned long)parent; 82 | node->rb_left = node->rb_right = NULL; 83 | 84 | *rb_link = node; 85 | } 86 | 87 | #define rb_entry_safe(ptr, type, member) \ 88 | ({ typeof(ptr) ____ptr = (ptr); \ 89 | ____ptr ? rb_entry(____ptr, type, member) : NULL; \ 90 | }) 91 | 92 | /** 93 | * rbtree_postorder_for_each_entry_safe - iterate over rb_root in post order of 94 | * given type safe against removal of rb_node entry 95 | * 96 | * @pos: the 'type *' to use as a loop cursor. 97 | * @n: another 'type *' to use as temporary storage 98 | * @root: 'rb_root *' of the rbtree. 99 | * @field: the name of the rb_node field within 'type'. 100 | */ 101 | #define rbtree_postorder_for_each_entry_safe(pos, n, root, field) \ 102 | for (pos = rb_entry_safe(rb_first_postorder(root), typeof(*pos), field); \ 103 | pos && ({ n = rb_entry_safe(rb_next_postorder(&pos->field), \ 104 | typeof(*pos), field); 1; }); \ 105 | pos = n) 106 | 107 | #endif /* _LINUX_RBTREE_H */ 108 | -------------------------------------------------------------------------------- /metadata.h: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: MIT 2 | 3 | #ifndef BTRFS_FUSE_METADATA_H 4 | #define BTRFS_FUSE_METADATA_H 5 | 6 | #include 7 | #include "ctree.h" 8 | 9 | struct btrfs_path { 10 | struct extent_buffer *nodes[BTRFS_MAX_LEVEL]; 11 | int slots[BTRFS_MAX_LEVEL]; 12 | }; 13 | 14 | /* Specify a key range to search */ 15 | struct btrfs_key_range { 16 | /* The search range must have the same objectid */ 17 | u64 objectid; 18 | 19 | /* Result slots will have @type_start <= key.type <= @type_end */ 20 | u8 type_start; 21 | u8 type_end; 22 | 23 | /* Result slots will have @offset_start <= key.offset <= @offset_end */ 24 | u64 offset_start; 25 | u64 offset_end; 26 | }; 27 | 28 | void btrfs_init_path(struct btrfs_path *path); 29 | void btrfs_release_path(struct btrfs_path *path); 30 | static inline int btrfs_comp_cpu_keys(const struct btrfs_key *key1, 31 | const struct btrfs_key *key2) 32 | { 33 | if (key1->objectid > key2->objectid) 34 | return 1; 35 | if (key1->objectid < key2->objectid) 36 | return -1; 37 | if (key1->type > key2->type) 38 | return 1; 39 | if (key1->type < key2->type) 40 | return -1; 41 | if (key1->offset > key2->offset) 42 | return 1; 43 | if (key1->offset < key2->offset) 44 | return -1; 45 | return 0; 46 | } 47 | 48 | static inline struct extent_buffer *extent_buffer_get(struct extent_buffer *eb) 49 | { 50 | pthread_mutex_lock(&eb->fs_info->eb_lock); 51 | eb->refs++; 52 | pthread_mutex_unlock(&eb->fs_info->eb_lock); 53 | return eb; 54 | } 55 | 56 | void free_extent_buffer(struct extent_buffer *eb); 57 | 58 | /* 59 | * Read a tree block at logical bytenr @logical. 60 | * 61 | * @logical: The logical bytenr where the tree block should be. 62 | * @level: The level the tree block should have. 63 | * @transid: The transid the tree block should have. 64 | * @first_key: The first key the tree block should have. 65 | * (optional, NULL to skip this check) 66 | * 67 | * Return ERR_PTR for error. 68 | * Return eb if read succeeded. 69 | */ 70 | struct extent_buffer *btrfs_read_tree_block(struct btrfs_fs_info *fs_info, 71 | u64 logical, u8 level, u64 transid, 72 | struct btrfs_key *first_key); 73 | 74 | static inline bool is_fstree(u64 rootid) 75 | { 76 | return (rootid == BTRFS_FS_TREE_OBJECTID) || 77 | (rootid >= BTRFS_FIRST_FREE_OBJECTID && 78 | rootid < BTRFS_LAST_FREE_OBJECTID); 79 | } 80 | 81 | struct btrfs_root *btrfs_read_root(struct btrfs_fs_info *fs_info, u64 rootid); 82 | 83 | /* 84 | * Go to next sibling leaf 85 | * 86 | * Return 0 if next sibling leaf found and update @path. 87 | * Return >0 if no more next leaf. 88 | * Return <0 for error. 89 | */ 90 | int btrfs_next_leaf(struct btrfs_path *path); 91 | 92 | /* 93 | * This is the equivalent of kernel/progs btrfs_search_slot(), without the CoW 94 | * part. 95 | * 96 | * Return 0 if an exact match is found. 97 | * Return <0 if an error occurred. 98 | * Return >0 if no exact match is found, and @path will point to the slot where 99 | * the new key should be inserted into. 100 | * 101 | * The >0 behavior has several pitfalls: 102 | * 103 | * - It may return an unused slot 104 | * This means path->slots[0] >= btrfs_header_nritems(path->nodes[0]). 105 | * 106 | * - path->slots[0] can be 0 if the tree only has one leaf. 107 | * Otherwise, path->slots[0] will never be zero. 108 | * 109 | * Thus it's recommened to call btrfs_search_key() and btrfs_search_key_range() 110 | * wrappers. 111 | */ 112 | int __btrfs_search_slot(struct btrfs_root *root, struct btrfs_path *path, 113 | struct btrfs_key *key); 114 | /* 115 | * Search a single key to find an exact match 116 | * 117 | * Return 0 if an exact match is found and @path will point to the slot. 118 | * Return -ENOENT if no exact is found. 119 | * Return <0 for error. 120 | */ 121 | int btrfs_search_key(struct btrfs_root *root, struct btrfs_path *path, 122 | struct btrfs_key *key); 123 | 124 | /* 125 | * Initial a search for a range of keys 126 | * 127 | * Return 0 if we found any key matching the range, and @path will point 128 | * to the slot. 129 | * Caller then need to call btrfs_search_keys_next() to continue. 130 | * 131 | * Return -ENOENT if we can't find any key matching the range 132 | * Return <0 for error. 133 | */ 134 | int btrfs_search_keys_start(struct btrfs_root *root, struct btrfs_path *path, 135 | struct btrfs_key_range *range); 136 | 137 | /* 138 | * Continue the search for a range of keys 139 | * 140 | * Return 0 if there is still a key matching the range, and update @path. 141 | * Return >0 if there is no more such key, @path will still be updated 142 | * return <0 for error, and @path will be released. 143 | */ 144 | int btrfs_search_keys_next(struct btrfs_path *path, 145 | struct btrfs_key_range *range); 146 | 147 | #endif 148 | -------------------------------------------------------------------------------- /volumes.h: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: MIT 2 | 3 | #ifndef BTRFS_FUSE_VOLUMES_H 4 | #define BTRFS_FUSE_VOLUMES_H 5 | 6 | #include "compat.h" 7 | #include "libs/list.h" 8 | #include "libs/rbtree.h" 9 | #include "ondisk_format.h" 10 | 11 | /* 12 | * Here we use ilog2(BTRFS_BLOCK_GROUP_*) to convert the profile bits to 13 | * an index. 14 | * We reserve 0 for BTRFS_RAID_SINGLE, while the lowest profile, ilog2(RAID0), 15 | * is 3, thus we need this shift to make all index numbers sequential. 16 | */ 17 | #define BTRFS_RAID_SHIFT (const_ilog2(BTRFS_BLOCK_GROUP_RAID0) - 1) 18 | 19 | enum btrfs_raid_types { 20 | BTRFS_RAID_SINGLE = 0, 21 | BTRFS_RAID_RAID0 = const_ilog2(BTRFS_BLOCK_GROUP_RAID0 >> BTRFS_RAID_SHIFT), 22 | BTRFS_RAID_RAID1 = const_ilog2(BTRFS_BLOCK_GROUP_RAID1 >> BTRFS_RAID_SHIFT), 23 | BTRFS_RAID_DUP = const_ilog2(BTRFS_BLOCK_GROUP_DUP >> BTRFS_RAID_SHIFT), 24 | BTRFS_RAID_RAID10 = const_ilog2(BTRFS_BLOCK_GROUP_RAID10 >> BTRFS_RAID_SHIFT), 25 | BTRFS_RAID_RAID5 = const_ilog2(BTRFS_BLOCK_GROUP_RAID5 >> BTRFS_RAID_SHIFT), 26 | BTRFS_RAID_RAID6 = const_ilog2(BTRFS_BLOCK_GROUP_RAID6 >> BTRFS_RAID_SHIFT), 27 | BTRFS_RAID_RAID1C3 = const_ilog2(BTRFS_BLOCK_GROUP_RAID1C3 >> BTRFS_RAID_SHIFT), 28 | BTRFS_RAID_RAID1C4 = const_ilog2(BTRFS_BLOCK_GROUP_RAID1C4 >> BTRFS_RAID_SHIFT), 29 | BTRFS_NR_RAID_TYPES 30 | }; 31 | 32 | /* 33 | * Convert block group flags (BTRFS_BLOCK_GROUP_*) to btrfs_raid_types, which 34 | * can be used as index to access btrfs_raid_array[]. 35 | */ 36 | static inline enum btrfs_raid_types __attribute_const__ 37 | btrfs_bg_flags_to_raid_index(u64 flags) 38 | { 39 | u64 profile = flags & BTRFS_BLOCK_GROUP_PROFILE_MASK; 40 | 41 | if (!profile) 42 | return BTRFS_RAID_SINGLE; 43 | 44 | return ilog2(profile >> BTRFS_RAID_SHIFT); 45 | } 46 | 47 | /* 48 | * Describe one single device which has btrfs super block. 49 | * 50 | * All involved devices need to be scanned so btrfs can assemble all its 51 | * devices belonging to one fs. 52 | */ 53 | struct btrfs_device { 54 | struct list_head list; 55 | struct btrfs_fs_info *fs_info; 56 | 57 | u64 devid; 58 | u8 uuid[BTRFS_UUID_SIZE]; 59 | 60 | int fd; 61 | char *path; 62 | }; 63 | 64 | 65 | /* Describe all devices belonging to one btrfs filesystem. */ 66 | struct btrfs_fs_devices { 67 | /* TODO: Find a better way to put seed devices into this list */ 68 | struct list_head dev_list; 69 | 70 | /* 71 | * We can have multiple btrfs specified, thus we need to record them 72 | * all. 73 | */ 74 | struct list_head fs_list; 75 | 76 | u8 fsid[BTRFS_UUID_SIZE]; 77 | int num_devices; 78 | 79 | }; 80 | 81 | struct btrfs_io_stripe { 82 | struct btrfs_device *dev; 83 | u64 physical; 84 | }; 85 | 86 | struct btrfs_chunk_map { 87 | struct rb_node node; 88 | 89 | u64 logical; 90 | u64 length; 91 | u64 stripe_len; 92 | u64 flags; 93 | u16 sub_stripes; 94 | 95 | int num_stripes; 96 | struct btrfs_io_stripe stripes[]; 97 | }; 98 | 99 | static int inline btrfs_chunk_map_size(int num_stripes) 100 | { 101 | return sizeof(struct btrfs_chunk_map) + 102 | num_stripes * sizeof(struct btrfs_io_stripe); 103 | } 104 | 105 | /* 106 | * This is for each profile to provide their own read function. 107 | * 108 | * Return the number of bytes read. For striped profiles (RAID0/RAID10/RAID56) 109 | * we will read at most one stripe a time, thus caller must do the read in a 110 | * loop to fill all the data. 111 | * 112 | * Return <0 for error. 113 | */ 114 | typedef int (*btrfs_raid_read_t)(struct btrfs_fs_info *fs_info, 115 | struct btrfs_chunk_map *map, char *buf, 116 | size_t size, u64 logical, int mirror_nr); 117 | 118 | struct btrfs_raid_attr { 119 | int max_mirror; 120 | btrfs_raid_read_t read_func; 121 | }; 122 | 123 | extern const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES]; 124 | 125 | /* 126 | * Try to scan one device for btrfs. 127 | * 128 | * Return 0 if it's a btrfs and @sb will be populated. 129 | * Return <0 if it's not a btrfs. 130 | */ 131 | int btrfs_scan_device(const char *path, struct btrfs_super_block *sb); 132 | 133 | /* 134 | * Open all devices belonging to the fs with @fsid 135 | * 136 | * At this stage, @fs_info should be pretty empty with just superblock and 137 | * fsid populated. 138 | */ 139 | struct btrfs_fs_devices *btrfs_open_devices(struct btrfs_fs_info *fs_info); 140 | 141 | int btrfs_read_sys_chunk_array(struct btrfs_fs_info *fs_info); 142 | int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info); 143 | 144 | /* 145 | * Return >0 for the max mirror number of the chunk containing @logical. 146 | * Return <0 for error. 147 | */ 148 | int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical); 149 | 150 | /* 151 | * Read from logical bytenr @logical with @mirror_nr as mirror number. 152 | * 153 | * This doesn't have any validation like data checksum nor metadata checksum. 154 | * 155 | * Return the number of bytes read from @logical. 156 | * Return <0 for error. 157 | */ 158 | int btrfs_read_logical(struct btrfs_fs_info *fs_info, char *buf, size_t size, 159 | u64 logical, int mirror_nr); 160 | 161 | /* The equivalent of btrfs_cleanup_fs_uuid() of kernel */ 162 | void btrfs_exit(void); 163 | 164 | #endif 165 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | btrfs-fuse 2 | ========== 3 | 4 | About 5 | ----- 6 | 7 | This is a read-only btrfs implementation using FUSE (Filesystem in Userspace). 8 | 9 | Although btrfs is already in mainline Linux kernel, there are still use-cases 10 | for such read-only btrfs implementation: 11 | 12 | * Educational purpose 13 | 14 | Let new developers get a quick way to learn how a complex and modern 15 | filesystem works. 16 | 17 | * For certain bootloaders 18 | 19 | Certain bootloaders need code base compatible with their license. 20 | 21 | * As a last resort method for subpage/multipage support 22 | 23 | Currently (v5.16-rc) Linux kernel can only support sectorsize == pagesize , and 24 | 4K sectorsize with 64K page size. 25 | 26 | Thus this project can act as a last resort method to read data from filesystem 27 | with unsupported sectorsize. 28 | 29 | 30 | Build 31 | ----- 32 | 33 | This project uses meson build system. 34 | 35 | ``` 36 | $ cd btrfs-fuse 37 | $ meson setup build 38 | $ cd build 39 | $ ninja 40 | ``` 41 | 42 | This project has the following dependency: 43 | 44 | - uuid 45 | 46 | For uuid parsing 47 | 48 | - libb2 49 | 50 | For BLAKE2 checksum support 51 | 52 | - libcrypto 53 | 54 | For SHA256 checksum support 55 | 56 | - libxxhash 57 | 58 | For XXHASH checksum support 59 | 60 | - zlib 61 | 62 | For zlib decompression support 63 | 64 | - lzo2 65 | 66 | For lzo decompression support 67 | 68 | - libzstd 69 | 70 | For zstd decompression support 71 | 72 | - fuse3 73 | 74 | For FUSE interface. 75 | 76 | 77 | There are some extra dependency for self-test tools: 78 | 79 | - xfsprogs 80 | - btrfs-progs 81 | - aio (optional) 82 | - liburing (optional) 83 | 84 | Above dependencies are all for `fsstress` program. 85 | 86 | Limitation 87 | ---------- 88 | 89 | Currently `btrfs-fuse` has the following btrfs features missing: 90 | 91 | - xattr/fattr support 92 | 93 | Above features are still under active development. 94 | 95 | When such missing features is hit, `btrfs-fuse` would return -EOPNOTSUPP. 96 | 97 | 98 | While there are still some other FUSE related feature missing: 99 | 100 | - Proper subvolume inode address space 101 | 102 | This is due to FUSE limitation, that one FUSE must has the same `stat::st_dev`. 103 | In kernel btrfs returns different `stat::st_dev` for different subvolumes, 104 | but in FUSE we don't have the ability do the same thing. 105 | 106 | 107 | Usage 108 | ----- 109 | 110 | ``` 111 | $ btrfs-fuse [] [ ...] 112 | ``` 113 | 114 | Please note that, if multiple devices are passed into `btrfs-fuse` and contains 115 | different file systems, `btrfs-fuse` will use the last device to initialize the 116 | mount. 117 | 118 | That's to say, for the following example: 119 | 120 | ``` 121 | $ mkfs.btrfs -f /dev/test/scratch1 122 | $ mkfs.btrfs -f /dev/test/scratch2 123 | $ btrfs-fuse /dev/test/scratch1 /dev/test/scratch2 /tmp/mount 124 | ``` 125 | 126 | Then only btrfs on `/dev/test/scratch2` will be mounted onto `/tmp/mount`. 127 | 128 | Selftest 129 | -------- 130 | 131 | `btrfs-fuse` has a self-test script in `tests/test.sh`. 132 | 133 | Usage: 134 | 135 | ``` 136 | # ./tests/test.sh 137 | ``` 138 | 139 | The test case will utilize `fsstress` to generate a complex enough fs with 140 | btrfs kernel module, and then use `fssum` to generate a summary of the whole fs. 141 | Thus it needs root privilege. 142 | 143 | Then mount using `btrfs-fuse`, and compare the filesystem content against the 144 | summary. 145 | 146 | Also for profiles with duplication, the test case will also try to remove device(s) 147 | and manually corrupt device(s) and make sure the content is still correct. 148 | 149 | As regular dm/dm-raid can handle missing devices, but since they don't have 150 | checksum, they are not able to handle corrupted devices. 151 | 152 | License 153 | ------- 154 | 155 | All files at the root directory is under MIT license. 156 | 157 | Files under `libs` and `tests` directories are under their own licenses. 158 | Mostly GPL-2.0+ or GPL-2.0-only. 159 | 160 | Those external libs include: 161 | 162 | - crc32c.[ch] 163 | 164 | For CRC32C checksum support. 165 | 166 | Cross-ported from btrfs-progs, which is cross-ported from older kernel, which 167 | is still under GPL-2.0+ license. 168 | 169 | - list.h 170 | 171 | For kernel style list implementation. 172 | 173 | Cross-ported from btrfs-progs, which is cross-ported from kernel, and under 174 | GPL-2.0-only license. 175 | 176 | - rbtree.[ch] and rbtree_augmented.h 177 | 178 | For kernel style rb-tree implementation. 179 | 180 | Cross-ported from btrfs-progs, which is cross-ported from kernel, and under 181 | GPL-2.0+ license. 182 | 183 | - raid56.[ch] and tables.c 184 | 185 | For RAID56 rebuild. 186 | 187 | Cross-ported from btrfs-progs, which is cross-ported from kernel, and under 188 | GPL-2.0-only license. 189 | 190 | - fsstress.c 191 | 192 | For populating the test mount point. 193 | 194 | Cross-ported from fstests, which is cross-ported from LTP, and under 195 | GPL-2.0-only license. 196 | 197 | - fssum.c and md5.[ch] 198 | 199 | For verifying the content of the test filesystem. 200 | 201 | Cross-ported from fstests, under GPL-2.0-only license. 202 | 203 | For projects which want to have btrfs read-only support, and already has a 204 | FUSE-like interface (like GRUB), those files should not be cross-ported to the 205 | project as above licenses are not compatible with the target project. 206 | 207 | Instead either use wrappers around the interfaces provided by the target 208 | project, or start from scratch and follow the license of the target project. 209 | -------------------------------------------------------------------------------- /compression.c: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: MIT 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include "compression.h" 9 | #include "messages.h" 10 | 11 | static int decompress_zlib(char *input, u32 input_len, char *output, 12 | u32 output_len) 13 | { 14 | z_stream strm; 15 | int ret; 16 | 17 | memset(&strm, 0, sizeof(strm)); 18 | ret = inflateInit(&strm); 19 | if (ret != Z_OK) { 20 | error("zlib init failed: %d", ret); 21 | return -EIO; 22 | } 23 | 24 | strm.avail_in = input_len; 25 | strm.next_in = (unsigned char *)input; 26 | strm.avail_out = output_len; 27 | strm.next_out = (unsigned char *)output; 28 | ret = inflate(&strm, Z_NO_FLUSH); 29 | inflateEnd(&strm); 30 | if (ret != Z_STREAM_END) { 31 | error("zlib infalte failed: %d", ret); 32 | return -EIO; 33 | } 34 | return 0; 35 | } 36 | 37 | static int decompress_zstd(char *input, u32 input_len, char *output, 38 | u32 output_len) 39 | { 40 | ZSTD_DStream *strm; 41 | ZSTD_inBuffer in = { 42 | .src = input, 43 | .size = input_len, 44 | .pos = 0, 45 | }; 46 | ZSTD_outBuffer out = { 47 | .dst = output, 48 | .size = output_len, 49 | .pos = 0, 50 | }; 51 | size_t zret; 52 | int ret = 0; 53 | 54 | strm = ZSTD_createDStream(); 55 | if (!strm) { 56 | error("failed to alloc zstd"); 57 | return -ENOMEM; 58 | } 59 | 60 | zret = ZSTD_initDStream(strm); 61 | if (ZSTD_isError(zret)) { 62 | error("zstd init failed: %s", ZSTD_getErrorName(zret)); 63 | ret = -EIO; 64 | goto out; 65 | } 66 | 67 | zret = ZSTD_decompressStream(strm, &out, &in); 68 | if (ZSTD_isError(zret)) { 69 | error("zstd decompress failed: %s", ZSTD_getErrorName(zret)); 70 | ret = -EIO; 71 | goto out; 72 | } 73 | if (zret != 0) { 74 | error("zstd frame incomplete"); 75 | ret = -EIO; 76 | } 77 | out: 78 | ZSTD_freeDStream(strm); 79 | return ret; 80 | } 81 | 82 | #define LZO_LEN (4) 83 | 84 | static inline u32 read_compress_length(const char *buf) 85 | { 86 | __le32 dlen; 87 | 88 | memcpy(&dlen, buf, LZO_LEN); 89 | return le32_to_cpu(dlen); 90 | } 91 | 92 | /* Worst lzo compressed size */ 93 | static inline u32 lzo1x_worst_compress(u32 size) 94 | { 95 | return (size + size / 16) + 64 + 3 + 2; 96 | } 97 | 98 | /* 99 | * Unlike zlib/zstd, lzo doesn't have its embedded stream format, thus 100 | * it relies on btrfs defined segment headers: 101 | * 102 | * 1. Header 103 | * Fixed size. LZO_LEN (4) bytes long, LE32. 104 | * Records the total size (including the header) of compressed data. 105 | * 106 | * 2. Segment(s) 107 | * Variable size. Each segment includes one segment header, followed by data 108 | * payload. 109 | * One regular LZO compressed extent can have one or more segments. 110 | * For inlined LZO compressed extent, only one segment is allowed. 111 | * One segment represents at most one sector of uncompressed data. 112 | * 113 | * 2.1 Segment header 114 | * Fixed size. LZO_LEN (4) bytes long, LE32. 115 | * Records the total size of the segment (not including the header). 116 | * Segment header never crosses sector boundary, thus it's possible to 117 | * have at most 3 padding zeros at the end of the sector. 118 | * 119 | * 2.2 Data Payload 120 | * Variable size. Size up limit should be lzo1x_worst_compress(sectorsize) 121 | * which is 4419 for a 4KiB sectorsize. 122 | * 123 | * Example with 4K sectorsize: 124 | * Page 1: 125 | * 0 0x2 0x4 0x6 0x8 0xa 0xc 0xe 0x10 126 | * 0x0000 | Header | SegHdr 01 | Data payload 01 ... | 127 | * ... 128 | * 0x0ff0 | SegHdr N | Data payload N ... |00| 129 | * ^^ padding zeros 130 | * Page 2: 131 | * 0x1000 | SegHdr N+1| Data payload N+1 ... | 132 | */ 133 | static int decompress_lzo(const struct btrfs_fs_info *fs_info, char *input, 134 | u32 input_len, char *output, u32 output_len) 135 | { 136 | const u32 sectorsize = fs_info->sectorsize; 137 | int ret = 0; 138 | u32 len_in; 139 | u32 cur_in = 0; /* Current offset inside @input */ 140 | u32 cur_out = 0; /* current oiffset inside @output */ 141 | 142 | len_in = read_compress_length(input); 143 | cur_in += LZO_LEN; 144 | 145 | /* Basic lzo header checks */ 146 | if (len_in > MIN(BTRFS_MAX_COMPRESSED, input_len) || 147 | round_up(input_len, sectorsize) < input_len) { 148 | error("invalid lzo header, lzo len %u compressed len %u", 149 | len_in, input_len); 150 | return -EUCLEAN; 151 | } 152 | 153 | while (cur_in < input_len) { 154 | u32 seg_len; /* length of the compressed segment */ 155 | u32 sector_bytes_left; 156 | unsigned long out_len = lzo1x_worst_compress(sectorsize); 157 | 158 | /* 159 | * We should always have enough space for one segment header 160 | * inside current sector. 161 | */ 162 | ASSERT(cur_in / sectorsize == 163 | (cur_in + LZO_LEN - 1) / sectorsize); 164 | seg_len = read_compress_length(input + cur_in); 165 | cur_in += LZO_LEN; 166 | cur_in += seg_len; 167 | 168 | ret = lzo1x_decompress_safe((unsigned char *)input + cur_in, 169 | seg_len, (unsigned char *)output + cur_out, 170 | &out_len, NULL); 171 | if (ret != LZO_E_OK) { 172 | error("lzo decompress failed: %d", ret); 173 | ret = -EIO; 174 | return ret; 175 | } 176 | cur_out += out_len; 177 | 178 | sector_bytes_left = sectorsize - (cur_in % sectorsize); 179 | if (sector_bytes_left >= LZO_LEN) 180 | continue; 181 | 182 | /* Skip the padding zeros */ 183 | cur_in += sector_bytes_left; 184 | } 185 | if (!ret) 186 | memset(output + cur_out, 0, output_len - cur_out); 187 | return 0; 188 | } 189 | 190 | int btrfs_decompress(const struct btrfs_fs_info *fs_info, 191 | char *input, u32 input_len, 192 | char *output, u32 output_len, u8 compression) 193 | { 194 | switch (compression) { 195 | case BTRFS_COMPRESS_ZLIB: 196 | return decompress_zlib(input, input_len, output, output_len); 197 | case BTRFS_COMPRESS_LZO: 198 | return decompress_lzo(fs_info, input, input_len, output, 199 | output_len); 200 | case BTRFS_COMPRESS_ZSTD: 201 | return decompress_zstd(input, input_len, output, output_len); 202 | } 203 | 204 | error("invalid compression algorithm: %d", compression); 205 | return -EUCLEAN; 206 | } 207 | -------------------------------------------------------------------------------- /compat.h: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: MIT 2 | 3 | #ifndef BTRFS_FUSE_COMPAT_H 4 | #define BTRFS_FUSE_COMPAT_H 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | /* Compatible layer to provide various kernel-like interfaces */ 17 | 18 | #define IS_ALIGNED(x, a) (((x) & ((typeof(x))(a) - 1)) == 0) 19 | 20 | #define container_of(ptr, type, member) ({ \ 21 | const typeof( ((type *)0)->member ) *__mptr = (ptr); \ 22 | (type *)( (char *)__mptr - offsetof(type,member) );}) 23 | 24 | #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) 25 | 26 | #define __round_mask(x, y) ((__typeof__(x))((y)-1)) 27 | #define round_up(x, y) ((((x)-1) | __round_mask(x, y))+1) 28 | #define round_down(x, y) ((x) & ~__round_mask(x, y)) 29 | 30 | /* Basic width dependent types */ 31 | typedef __u32 u32; 32 | typedef __u64 u64; 33 | typedef __u16 u16; 34 | typedef __u8 u8; 35 | typedef __s64 s64; 36 | typedef __s32 s32; 37 | 38 | #define ASSERT(c) assert(c) 39 | 40 | #ifndef SECTOR_SHIFT 41 | #define SECTOR_SHIFT (9) 42 | #endif 43 | 44 | /* Error pointer conversion */ 45 | #define MAX_ERRNO 4095 46 | #define IS_ERR_VALUE(x) ((x) >= (unsigned long)-MAX_ERRNO) 47 | 48 | /* Compiler check related macros */ 49 | #ifdef __CHECKER__ 50 | #define __force __attribute__((force)) 51 | #ifndef __bitwise__ 52 | #define __bitwise__ __attribute__((bitwise)) 53 | #endif /* __bitwise__ */ 54 | #define __must_check __attribute__((__warn_unused_result__)) 55 | #else 56 | #define __force 57 | #ifndef __bitwise__ 58 | #define __bitwise__ 59 | #endif /* __bitwise__ */ 60 | #define __must_check 61 | #endif /* __CHECKER__ */ 62 | 63 | static inline void * __must_check ERR_PTR(long error) 64 | { 65 | return (void *) error; 66 | } 67 | 68 | static inline long __must_check PTR_ERR(const void *ptr) 69 | { 70 | return (long) ptr; 71 | } 72 | 73 | static inline bool __must_check IS_ERR(const void *ptr) 74 | { 75 | return IS_ERR_VALUE((unsigned long)ptr); 76 | } 77 | 78 | static inline void * __must_check ERR_CASE(__force const void *ptr) 79 | { 80 | return (void *) ptr; 81 | } 82 | 83 | #define le8_to_cpu(v) (v) 84 | #define cpu_to_le8(v) (v) 85 | #define __le8 u8 86 | 87 | #if __BYTE_ORDER == __BIG_ENDIAN 88 | #define cpu_to_le64(x) ((__force __le64)(u64)(bswap_64(x))) 89 | #define le64_to_cpu(x) ((__force u64)(__le64)(bswap_64(x))) 90 | #define cpu_to_le32(x) ((__force __le32)(u32)(bswap_32(x))) 91 | #define le32_to_cpu(x) ((__force u32)(__le32)(bswap_32(x))) 92 | #define cpu_to_le16(x) ((__force __le16)(u16)(bswap_16(x))) 93 | #define le16_to_cpu(x) ((__force u16)(__le16)(bswap_16(x))) 94 | #else 95 | #define cpu_to_le64(x) ((__force __le64)(u64)(x)) 96 | #define le64_to_cpu(x) ((__force u64)(__le64)(x)) 97 | #define cpu_to_le32(x) ((__force __le32)(u32)(x)) 98 | #define le32_to_cpu(x) ((__force u32)(__le32)(x)) 99 | #define cpu_to_le16(x) ((__force __le16)(u16)(x)) 100 | #define le16_to_cpu(x) ((__force u16)(__le16)(x)) 101 | #endif /* __BYTE_ORDER == __BIG_ENDIA */ 102 | 103 | struct __una_u16 { __le16 x; } __attribute__((__packed__)); 104 | struct __una_u32 { __le32 x; } __attribute__((__packed__)); 105 | struct __una_u64 { __le64 x; } __attribute__((__packed__)); 106 | 107 | #define get_unaligned_le8(p) (*((u8 *)(p))) 108 | #define get_unaligned_8(p) (*((u8 *)(p))) 109 | #define put_unaligned_le8(val,p) ((*((u8 *)(p))) = (val)) 110 | #define put_unaligned_8(val,p) ((*((u8 *)(p))) = (val)) 111 | #define get_unaligned_le16(p) le16_to_cpu(((const struct __una_u16 *)(p))->x) 112 | #define get_unaligned_16(p) (((const struct __una_u16 *)(p))->x) 113 | #define put_unaligned_le16(val,p) (((struct __una_u16 *)(p))->x = cpu_to_le16(val)) 114 | #define put_unaligned_16(val,p) (((struct __una_u16 *)(p))->x = (val)) 115 | #define get_unaligned_le32(p) le32_to_cpu(((const struct __una_u32 *)(p))->x) 116 | #define get_unaligned_32(p) (((const struct __una_u32 *)(p))->x) 117 | #define put_unaligned_le32(val,p) (((struct __una_u32 *)(p))->x = cpu_to_le32(val)) 118 | #define put_unaligned_32(val,p) (((struct __una_u32 *)(p))->x = (val)) 119 | #define get_unaligned_le64(p) le64_to_cpu(((const struct __una_u64 *)(p))->x) 120 | #define get_unaligned_64(p) (((const struct __una_u64 *)(p))->x) 121 | #define put_unaligned_le64(val,p) (((struct __una_u64 *)(p))->x = cpu_to_le64(val)) 122 | #define put_unaligned_64(val,p) (((struct __una_u64 *)(p))->x = (val)) 123 | 124 | /** 125 | * const_ilog2 - log base 2 of 32-bit or a 64-bit constant unsigned value 126 | * @n: parameter 127 | * 128 | * Use this where sparse expects a true constant expression, e.g. for array 129 | * indices. 130 | */ 131 | #define const_ilog2(n) \ 132 | ( \ 133 | __builtin_constant_p(n) ? ( \ 134 | (n) < 2 ? 0 : \ 135 | (n) & (1ULL << 63) ? 63 : \ 136 | (n) & (1ULL << 62) ? 62 : \ 137 | (n) & (1ULL << 61) ? 61 : \ 138 | (n) & (1ULL << 60) ? 60 : \ 139 | (n) & (1ULL << 59) ? 59 : \ 140 | (n) & (1ULL << 58) ? 58 : \ 141 | (n) & (1ULL << 57) ? 57 : \ 142 | (n) & (1ULL << 56) ? 56 : \ 143 | (n) & (1ULL << 55) ? 55 : \ 144 | (n) & (1ULL << 54) ? 54 : \ 145 | (n) & (1ULL << 53) ? 53 : \ 146 | (n) & (1ULL << 52) ? 52 : \ 147 | (n) & (1ULL << 51) ? 51 : \ 148 | (n) & (1ULL << 50) ? 50 : \ 149 | (n) & (1ULL << 49) ? 49 : \ 150 | (n) & (1ULL << 48) ? 48 : \ 151 | (n) & (1ULL << 47) ? 47 : \ 152 | (n) & (1ULL << 46) ? 46 : \ 153 | (n) & (1ULL << 45) ? 45 : \ 154 | (n) & (1ULL << 44) ? 44 : \ 155 | (n) & (1ULL << 43) ? 43 : \ 156 | (n) & (1ULL << 42) ? 42 : \ 157 | (n) & (1ULL << 41) ? 41 : \ 158 | (n) & (1ULL << 40) ? 40 : \ 159 | (n) & (1ULL << 39) ? 39 : \ 160 | (n) & (1ULL << 38) ? 38 : \ 161 | (n) & (1ULL << 37) ? 37 : \ 162 | (n) & (1ULL << 36) ? 36 : \ 163 | (n) & (1ULL << 35) ? 35 : \ 164 | (n) & (1ULL << 34) ? 34 : \ 165 | (n) & (1ULL << 33) ? 33 : \ 166 | (n) & (1ULL << 32) ? 32 : \ 167 | (n) & (1ULL << 31) ? 31 : \ 168 | (n) & (1ULL << 30) ? 30 : \ 169 | (n) & (1ULL << 29) ? 29 : \ 170 | (n) & (1ULL << 28) ? 28 : \ 171 | (n) & (1ULL << 27) ? 27 : \ 172 | (n) & (1ULL << 26) ? 26 : \ 173 | (n) & (1ULL << 25) ? 25 : \ 174 | (n) & (1ULL << 24) ? 24 : \ 175 | (n) & (1ULL << 23) ? 23 : \ 176 | (n) & (1ULL << 22) ? 22 : \ 177 | (n) & (1ULL << 21) ? 21 : \ 178 | (n) & (1ULL << 20) ? 20 : \ 179 | (n) & (1ULL << 19) ? 19 : \ 180 | (n) & (1ULL << 18) ? 18 : \ 181 | (n) & (1ULL << 17) ? 17 : \ 182 | (n) & (1ULL << 16) ? 16 : \ 183 | (n) & (1ULL << 15) ? 15 : \ 184 | (n) & (1ULL << 14) ? 14 : \ 185 | (n) & (1ULL << 13) ? 13 : \ 186 | (n) & (1ULL << 12) ? 12 : \ 187 | (n) & (1ULL << 11) ? 11 : \ 188 | (n) & (1ULL << 10) ? 10 : \ 189 | (n) & (1ULL << 9) ? 9 : \ 190 | (n) & (1ULL << 8) ? 8 : \ 191 | (n) & (1ULL << 7) ? 7 : \ 192 | (n) & (1ULL << 6) ? 6 : \ 193 | (n) & (1ULL << 5) ? 5 : \ 194 | (n) & (1ULL << 4) ? 4 : \ 195 | (n) & (1ULL << 3) ? 3 : \ 196 | (n) & (1ULL << 2) ? 2 : \ 197 | 1) : \ 198 | -1) 199 | 200 | static inline int ilog2(u64 num) 201 | { 202 | int l = 0; 203 | 204 | num >>= 1; 205 | while (num) { 206 | l++; 207 | num >>= 1; 208 | } 209 | 210 | return l; 211 | } 212 | 213 | static inline int is_power_of_2(unsigned long n) 214 | { 215 | return (n != 0 && ((n & (n - 1)) == 0)); 216 | } 217 | 218 | #endif 219 | -------------------------------------------------------------------------------- /tests/test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | build_dir=$1 4 | 5 | usage() 6 | { 7 | echo "usage:" 8 | echo " $0: " 9 | exit 1 10 | } 11 | 12 | if [ -z $build_dir ]; then 13 | usage 14 | fi 15 | 16 | 17 | if [ $(whoami) != "root" ]; then 18 | echo "need root privilege" 19 | exit 1; 20 | fi 21 | 22 | declare -a devs 23 | for (( i = 0; i < 4; i++ )); do 24 | file="${build_dir}/dev${i}" 25 | truncate -s 1G "$file" 26 | devs[$i]=$(losetup -f --show $file) 27 | done 28 | 29 | fsstress="$build_dir/fsstress" 30 | fssum="$build_dir/fssum" 31 | fuse="$build_dir/btrfs-fuse" 32 | corrupt="$build_dir/corrupt" 33 | mnt="$build_dir/mnt" 34 | log="$build_dir/test-log" 35 | tmp=$(mktemp --tmpdir btrfs-fuse-tests.XXXXXX) 36 | nr_ops=1024 37 | 38 | rm -rf "$log" 39 | 40 | cleanup() 41 | { 42 | umount "$mnt" &> /dev/null 43 | for ((i = 0; i < 4; i++)); do 44 | losetup -d "${devs[i]}" 45 | done 46 | if [ -f "$tmp.fssum_kernel" ]; then 47 | cp "$tmp.fssum_kernel" "$build_dir/fssum_kernel" 48 | fi 49 | rm -rf - "$tmp*" 50 | } 51 | 52 | fail() 53 | { 54 | echo "$*" | tee -a "$log" 55 | cleanup 56 | exit 1 57 | } 58 | 59 | require_command() 60 | { 61 | type -p "$1" &> /dev/null 62 | if [ $? -ne 0 ]; then 63 | fail "command '$1' not found" 64 | fi 65 | } 66 | 67 | fssum_generate() 68 | { 69 | dir=$1 70 | 71 | if [ -z "$dir" ]; then 72 | fail "need a path" 73 | fi 74 | 75 | # Don't create snapshot, as due to FUSE ino/st_dev limit, fssum has no 76 | # way to detect snapshot boundary 77 | "$fsstress" -f snapshot=0 -w -n "$nr_ops" -d "$mnt" >> "$log" ||\ 78 | fail "fsstress failed" 79 | 80 | mount -o ro,remount "$mnt" || fail "remount failed" 81 | 82 | # No XATTR support yet, thus don't take xattr into fssum 83 | "$fssum" -T -f -w "$tmp.fssum_kernel" "$mnt" ||\ 84 | fail "fssum generation failed" 85 | } 86 | 87 | test_default() 88 | { 89 | echo "=== test default mkfs profile ===" | tee -a "$log" 90 | mkfs.btrfs -f "${devs[0]}" > /dev/null 91 | mount "${devs[0]}" "$mnt" || fail "mount failed" 92 | fssum_generate "$mnt" 93 | umount "$mnt" 94 | 95 | "$fuse" "${devs[0]}" "$mnt" || fail "fuse mount failed" 96 | "$fssum" -r "$tmp.fssum_kernel" "$mnt" >> "$log" || fail "fssum verification failed" 97 | fusermount -u "$mnt" || fail "fuse unmount failed" 98 | } 99 | 100 | test_raid0() 101 | { 102 | echo "=== test raid0 mkfs profile ===" | tee -a "$log" 103 | mkfs.btrfs -f "${devs[0]}" "${devs[1]}" -m raid0 -d raid0 > /dev/null 104 | mount "${devs[0]}" "$mnt" || fail "mount failed" 105 | fssum_generate "$mnt" 106 | umount "$mnt" 107 | 108 | "$fuse" "${devs[0]}" "${devs[1]}" "$mnt" || fail "fuse mount failed" 109 | "$fssum" -r "$tmp.fssum_kernel" "$mnt" >> "$log" || fail "fssum verification failed" 110 | fusermount -u "$mnt" || fail "fuse unmount failed" 111 | } 112 | 113 | test_raid1() 114 | { 115 | echo "=== test raid1 mkfs profile ===" | tee -a "$log" 116 | mkfs.btrfs -f "${devs[0]}" "${devs[1]}" -m raid1 -d raid1 > /dev/null 117 | mount "${devs[0]}" "$mnt" || fail "mount failed" 118 | fssum_generate "$mnt" 119 | umount "$mnt" 120 | 121 | "$fuse" "${devs[0]}" "${devs[1]}" "$mnt" || fail "fuse mount failed" 122 | "$fssum" -r "$tmp.fssum_kernel" "$mnt" >> "$log" || fail "fssum verification failed" 123 | fusermount -u "$mnt" || fail "fuse unmount failed" 124 | 125 | echo "=== test raid1 with one missing dev ===" | tee -a "$log" 126 | "$fuse" "${devs[0]}" "$mnt" >> "$log" || fail "fuse mount failed" 127 | "$fssum" -r "$tmp.fssum_kernel" "$mnt" >> "$log" || fail "fssum verification failed" 128 | fusermount -u "$mnt" || fail "fuse unmount failed" 129 | 130 | echo "=== test raid1 with one corrupted dev ===" | tee -a "$log" 131 | "$corrupt" "${devs[0]}" >> "$log" || fail "file corruption failed" 132 | "$fuse" "${devs[0]}" "${devs[1]}" "$mnt" || fail "fuse mount failed" 133 | "$fssum" -r "$tmp.fssum_kernel" "$mnt" >> "$log" || fail "fssum verification failed" 134 | fusermount -u "$mnt" || fail "fuse unmount failed" 135 | } 136 | 137 | test_raid10() 138 | { 139 | echo "=== test raid10 mkfs profile ===" | tee -a "$log" 140 | mkfs.btrfs -f "${devs[0]}" "${devs[1]}" "${devs[2]}" "${devs[3]}" \ 141 | -m raid10 -d raid10 > /dev/null 142 | mount "${devs[0]}" "$mnt" || fail "mount failed" 143 | fssum_generate "$mnt" 144 | umount "$mnt" 145 | 146 | "$fuse" "${devs[0]}" "${devs[1]}" "${devs[2]}" "${devs[3]}" "$mnt" >> "$log" ||\ 147 | fail "fuse mount failed" 148 | "$fssum" -r "$tmp.fssum_kernel" "$mnt" >> "$log" || fail "fssum verification failed" 149 | fusermount -u "$mnt" || fail "fuse unmount failed" 150 | 151 | # In theory we can handle two missing devices in different sub groups, 152 | # but that requires very strict device rotation during mkfs. 153 | echo "=== test raid10 with one missing devs ===" | tee -a "$log" 154 | "$fuse" "${devs[0]}" "${devs[1]}" "${devs[3]}" "$mnt" >> "$log" || fail "fuse mount failed" 155 | "$fssum" -r "$tmp.fssum_kernel" "$mnt" >> "$log" || fail "fssum verification failed" 156 | fusermount -u "$mnt" || fail "fuse unmount failed" 157 | 158 | echo "=== test raid10 with one corrupted dev ===" | tee -a "$log" 159 | "$corrupt" "${devs[0]}" >> "$log" || fail "file corruption failed" 160 | "$fuse" "${devs[0]}" "${devs[1]}" "${devs[2]}" "${devs[3]}" "$mnt" ||\ 161 | fail "fuse mount failed" 162 | "$fssum" -r "$tmp.fssum_kernel" "$mnt" >> "$log" || fail "fssum verification failed" 163 | fusermount -u "$mnt" || fail "fuse unmount failed" 164 | } 165 | 166 | test_raid5() 167 | { 168 | echo "=== test raid5 mkfs profile ===" | tee -a "$log" 169 | mkfs.btrfs -f "${devs[0]}" "${devs[1]}" "${devs[2]}" \ 170 | -m raid5 -d raid5 &> /dev/null 171 | mount "${devs[0]}" "$mnt" || fail "mount failed" 172 | fssum_generate "$mnt" 173 | umount "$mnt" 174 | 175 | "$fuse" "${devs[0]}" "${devs[1]}" "${devs[2]}" "$mnt" >> "$log" ||\ 176 | fail "fuse mount failed" 177 | "$fssum" -r "$tmp.fssum_kernel" "$mnt" >> "$log" || fail "fssum verification failed" 178 | fusermount -u "$mnt" || fail "fuse unmount failed" 179 | 180 | echo "=== test raid5 with one missing dev ===" | tee -a "$log" 181 | "$fuse" "${devs[0]}" "${devs[1]}" "$mnt" >> "$log" || fail "fuse mount failed" 182 | "$fssum" -r "$tmp.fssum_kernel" "$mnt" >> "$log" || fail "fssum verification failed" 183 | fusermount -u "$mnt" || fail "fuse unmount failed" 184 | 185 | echo "=== test raid5 with one corrupted dev ===" | tee -a "$log" 186 | "$corrupt" "${devs[0]}" >> "$log" || fail "file corruption failed" 187 | "$fuse" "${devs[0]}" "${devs[1]}" "${devs[2]}" "$mnt" >> "$log" ||\ 188 | fail "fuse mount failed" 189 | "$fssum" -r "$tmp.fssum_kernel" "$mnt" >> "$log" || fail "fssum verification failed" 190 | fusermount -u "$mnt" || fail "fuse unmount failed" 191 | } 192 | 193 | test_raid6() 194 | { 195 | echo "=== test raid6 mkfs profile ===" | tee -a "$log" 196 | mkfs.btrfs -f "${devs[0]}" "${devs[1]}" "${devs[2]}" "${devs[3]}"\ 197 | -m raid6 -d raid6 &> /dev/null 198 | mount "${devs[0]}" "$mnt" || fail "mount failed" 199 | fssum_generate "$mnt" 200 | umount "$mnt" 201 | 202 | "$fuse" "${devs[0]}" "${devs[1]}" "${devs[2]}" "${devs[3]}" "$mnt" \ 203 | >> "$log" || fail "fuse mount failed" 204 | "$fssum" -r "$tmp.fssum_kernel" "$mnt" >> "$log" || fail "fssum verification failed" 205 | fusermount -u "$mnt" || fail "fuse unmount failed" 206 | 207 | # RAID6 recovery with mixed corruption and missing is not handled well 208 | # in kernel/progs/btrfs-fuse. 209 | # Thus here we only test missing devices case. 210 | 211 | echo "=== test raid6 with two missing dev ===" | tee -a "$log" 212 | "$fuse" "${devs[0]}" "${devs[1]}" "$mnt" >> "$log" || fail "fuse mount failed" 213 | "$fssum" -r "$tmp.fssum_kernel" "$mnt" >> "$log" || fail "fssum verification failed" 214 | fusermount -u "$mnt" || fail "fuse unmount failed" 215 | } 216 | mkdir -p $mnt 217 | 218 | require_command mkfs.btrfs 219 | require_command fusermount 220 | require_command losetup 221 | 222 | test_default 223 | test_raid0 224 | test_raid1 225 | test_raid10 226 | test_raid5 227 | test_raid6 228 | cleanup 229 | -------------------------------------------------------------------------------- /libs/crc32c.c: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: GPL-2.0-or-later 2 | /* 3 | * Copied from the kernel source code, lib/libcrc32c.c. 4 | * 5 | * This program is free software; you can redistribute it and/or modify it 6 | * under the terms of the GNU General Public License as published by the Free 7 | * Software Foundation; either version 2 of the License, or (at your option) 8 | * any later version. 9 | * 10 | */ 11 | 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include "libs/crc32c.h" 20 | 21 | u32 __crc32c_le(u32 crc, unsigned char const *data, size_t length); 22 | static u32 (*crc_function)(u32 crc, unsigned char const *data, size_t length) = __crc32c_le; 23 | 24 | #ifdef __x86_64__ 25 | 26 | /* 27 | * Based on a posting to lkml by Austin Zhang 28 | * 29 | * Using hardware provided CRC32 instruction to accelerate the CRC32 disposal. 30 | * CRC32C polynomial:0x1EDC6F41(BE)/0x82F63B78(LE) 31 | * CRC32 is a new instruction in Intel SSE4.2, the reference can be found at: 32 | * http://www.intel.com/products/processor/manuals/ 33 | * Intel(R) 64 and IA-32 Architectures Software Developer's Manual 34 | * Volume 2A: Instruction Set Reference, A-M 35 | */ 36 | #if __SIZEOF_LONG__ == 8 37 | #define REX_PRE "0x48, " 38 | #define SCALE_F 8 39 | #else 40 | #define REX_PRE 41 | #define SCALE_F 4 42 | #endif 43 | 44 | static int crc32c_probed = 0; 45 | static int crc32c_intel_available = 0; 46 | 47 | static uint32_t crc32c_intel_le_hw_byte(uint32_t crc, unsigned char const *data, 48 | unsigned long length) 49 | { 50 | while (length--) { 51 | __asm__ __volatile__( 52 | ".byte 0xf2, 0xf, 0x38, 0xf0, 0xf1" 53 | :"=S"(crc) 54 | :"0"(crc), "c"(*data) 55 | ); 56 | data++; 57 | } 58 | 59 | return crc; 60 | } 61 | 62 | /* 63 | * Steps through buffer one byte at at time, calculates reflected 64 | * crc using table. 65 | */ 66 | static uint32_t crc32c_intel(u32 crc, unsigned char const *data, unsigned long length) 67 | { 68 | unsigned int iquotient = length / SCALE_F; 69 | unsigned int iremainder = length % SCALE_F; 70 | unsigned long *ptmp = (unsigned long *)data; 71 | 72 | while (iquotient--) { 73 | __asm__ __volatile__( 74 | ".byte 0xf2, " REX_PRE "0xf, 0x38, 0xf1, 0xf1;" 75 | :"=S"(crc) 76 | :"0"(crc), "c"(*ptmp) 77 | ); 78 | ptmp++; 79 | } 80 | 81 | if (iremainder) 82 | crc = crc32c_intel_le_hw_byte(crc, (unsigned char *)ptmp, 83 | iremainder); 84 | 85 | return crc; 86 | } 87 | 88 | static void do_cpuid(unsigned int *eax, unsigned int *ebx, unsigned int *ecx, 89 | unsigned int *edx) 90 | { 91 | int id = *eax; 92 | 93 | asm("movl %4, %%eax;" 94 | "cpuid;" 95 | "movl %%eax, %0;" 96 | "movl %%ebx, %1;" 97 | "movl %%ecx, %2;" 98 | "movl %%edx, %3;" 99 | : "=r" (*eax), "=r" (*ebx), "=r" (*ecx), "=r" (*edx) 100 | : "r" (id) 101 | : "eax", "ebx", "ecx", "edx"); 102 | } 103 | 104 | static void crc32c_intel_probe(void) 105 | { 106 | if (!crc32c_probed) { 107 | unsigned int eax, ebx, ecx, edx; 108 | 109 | eax = 1; 110 | 111 | do_cpuid(&eax, &ebx, &ecx, &edx); 112 | crc32c_intel_available = (ecx & (1 << 20)) != 0; 113 | crc32c_probed = 1; 114 | } 115 | } 116 | 117 | void crc32c_optimization_init(void) 118 | { 119 | crc32c_intel_probe(); 120 | if (crc32c_intel_available) 121 | crc_function = crc32c_intel; 122 | } 123 | #else 124 | 125 | void crc32c_optimization_init(void) 126 | { 127 | } 128 | 129 | #endif /* __x86_64__ */ 130 | 131 | /* 132 | * This is the CRC-32C table 133 | * Generated with: 134 | * width = 32 bits 135 | * poly = 0x1EDC6F41 136 | * reflect input bytes = true 137 | * reflect output bytes = true 138 | */ 139 | 140 | static const u32 crc32c_table[256] = { 141 | 0x00000000L, 0xF26B8303L, 0xE13B70F7L, 0x1350F3F4L, 142 | 0xC79A971FL, 0x35F1141CL, 0x26A1E7E8L, 0xD4CA64EBL, 143 | 0x8AD958CFL, 0x78B2DBCCL, 0x6BE22838L, 0x9989AB3BL, 144 | 0x4D43CFD0L, 0xBF284CD3L, 0xAC78BF27L, 0x5E133C24L, 145 | 0x105EC76FL, 0xE235446CL, 0xF165B798L, 0x030E349BL, 146 | 0xD7C45070L, 0x25AFD373L, 0x36FF2087L, 0xC494A384L, 147 | 0x9A879FA0L, 0x68EC1CA3L, 0x7BBCEF57L, 0x89D76C54L, 148 | 0x5D1D08BFL, 0xAF768BBCL, 0xBC267848L, 0x4E4DFB4BL, 149 | 0x20BD8EDEL, 0xD2D60DDDL, 0xC186FE29L, 0x33ED7D2AL, 150 | 0xE72719C1L, 0x154C9AC2L, 0x061C6936L, 0xF477EA35L, 151 | 0xAA64D611L, 0x580F5512L, 0x4B5FA6E6L, 0xB93425E5L, 152 | 0x6DFE410EL, 0x9F95C20DL, 0x8CC531F9L, 0x7EAEB2FAL, 153 | 0x30E349B1L, 0xC288CAB2L, 0xD1D83946L, 0x23B3BA45L, 154 | 0xF779DEAEL, 0x05125DADL, 0x1642AE59L, 0xE4292D5AL, 155 | 0xBA3A117EL, 0x4851927DL, 0x5B016189L, 0xA96AE28AL, 156 | 0x7DA08661L, 0x8FCB0562L, 0x9C9BF696L, 0x6EF07595L, 157 | 0x417B1DBCL, 0xB3109EBFL, 0xA0406D4BL, 0x522BEE48L, 158 | 0x86E18AA3L, 0x748A09A0L, 0x67DAFA54L, 0x95B17957L, 159 | 0xCBA24573L, 0x39C9C670L, 0x2A993584L, 0xD8F2B687L, 160 | 0x0C38D26CL, 0xFE53516FL, 0xED03A29BL, 0x1F682198L, 161 | 0x5125DAD3L, 0xA34E59D0L, 0xB01EAA24L, 0x42752927L, 162 | 0x96BF4DCCL, 0x64D4CECFL, 0x77843D3BL, 0x85EFBE38L, 163 | 0xDBFC821CL, 0x2997011FL, 0x3AC7F2EBL, 0xC8AC71E8L, 164 | 0x1C661503L, 0xEE0D9600L, 0xFD5D65F4L, 0x0F36E6F7L, 165 | 0x61C69362L, 0x93AD1061L, 0x80FDE395L, 0x72966096L, 166 | 0xA65C047DL, 0x5437877EL, 0x4767748AL, 0xB50CF789L, 167 | 0xEB1FCBADL, 0x197448AEL, 0x0A24BB5AL, 0xF84F3859L, 168 | 0x2C855CB2L, 0xDEEEDFB1L, 0xCDBE2C45L, 0x3FD5AF46L, 169 | 0x7198540DL, 0x83F3D70EL, 0x90A324FAL, 0x62C8A7F9L, 170 | 0xB602C312L, 0x44694011L, 0x5739B3E5L, 0xA55230E6L, 171 | 0xFB410CC2L, 0x092A8FC1L, 0x1A7A7C35L, 0xE811FF36L, 172 | 0x3CDB9BDDL, 0xCEB018DEL, 0xDDE0EB2AL, 0x2F8B6829L, 173 | 0x82F63B78L, 0x709DB87BL, 0x63CD4B8FL, 0x91A6C88CL, 174 | 0x456CAC67L, 0xB7072F64L, 0xA457DC90L, 0x563C5F93L, 175 | 0x082F63B7L, 0xFA44E0B4L, 0xE9141340L, 0x1B7F9043L, 176 | 0xCFB5F4A8L, 0x3DDE77ABL, 0x2E8E845FL, 0xDCE5075CL, 177 | 0x92A8FC17L, 0x60C37F14L, 0x73938CE0L, 0x81F80FE3L, 178 | 0x55326B08L, 0xA759E80BL, 0xB4091BFFL, 0x466298FCL, 179 | 0x1871A4D8L, 0xEA1A27DBL, 0xF94AD42FL, 0x0B21572CL, 180 | 0xDFEB33C7L, 0x2D80B0C4L, 0x3ED04330L, 0xCCBBC033L, 181 | 0xA24BB5A6L, 0x502036A5L, 0x4370C551L, 0xB11B4652L, 182 | 0x65D122B9L, 0x97BAA1BAL, 0x84EA524EL, 0x7681D14DL, 183 | 0x2892ED69L, 0xDAF96E6AL, 0xC9A99D9EL, 0x3BC21E9DL, 184 | 0xEF087A76L, 0x1D63F975L, 0x0E330A81L, 0xFC588982L, 185 | 0xB21572C9L, 0x407EF1CAL, 0x532E023EL, 0xA145813DL, 186 | 0x758FE5D6L, 0x87E466D5L, 0x94B49521L, 0x66DF1622L, 187 | 0x38CC2A06L, 0xCAA7A905L, 0xD9F75AF1L, 0x2B9CD9F2L, 188 | 0xFF56BD19L, 0x0D3D3E1AL, 0x1E6DCDEEL, 0xEC064EEDL, 189 | 0xC38D26C4L, 0x31E6A5C7L, 0x22B65633L, 0xD0DDD530L, 190 | 0x0417B1DBL, 0xF67C32D8L, 0xE52CC12CL, 0x1747422FL, 191 | 0x49547E0BL, 0xBB3FFD08L, 0xA86F0EFCL, 0x5A048DFFL, 192 | 0x8ECEE914L, 0x7CA56A17L, 0x6FF599E3L, 0x9D9E1AE0L, 193 | 0xD3D3E1ABL, 0x21B862A8L, 0x32E8915CL, 0xC083125FL, 194 | 0x144976B4L, 0xE622F5B7L, 0xF5720643L, 0x07198540L, 195 | 0x590AB964L, 0xAB613A67L, 0xB831C993L, 0x4A5A4A90L, 196 | 0x9E902E7BL, 0x6CFBAD78L, 0x7FAB5E8CL, 0x8DC0DD8FL, 197 | 0xE330A81AL, 0x115B2B19L, 0x020BD8EDL, 0xF0605BEEL, 198 | 0x24AA3F05L, 0xD6C1BC06L, 0xC5914FF2L, 0x37FACCF1L, 199 | 0x69E9F0D5L, 0x9B8273D6L, 0x88D28022L, 0x7AB90321L, 200 | 0xAE7367CAL, 0x5C18E4C9L, 0x4F48173DL, 0xBD23943EL, 201 | 0xF36E6F75L, 0x0105EC76L, 0x12551F82L, 0xE03E9C81L, 202 | 0x34F4F86AL, 0xC69F7B69L, 0xD5CF889DL, 0x27A40B9EL, 203 | 0x79B737BAL, 0x8BDCB4B9L, 0x988C474DL, 0x6AE7C44EL, 204 | 0xBE2DA0A5L, 0x4C4623A6L, 0x5F16D052L, 0xAD7D5351L 205 | }; 206 | 207 | /* 208 | * Steps through buffer one byte at at time, calculates reflected 209 | * crc using table. 210 | */ 211 | 212 | u32 __crc32c_le(u32 crc, unsigned char const *data, size_t length) 213 | { 214 | while (length--) 215 | crc = 216 | crc32c_table[(crc ^ *data++) & 0xFFL] ^ (crc >> 8); 217 | return crc; 218 | } 219 | 220 | u32 crc32c_le(u32 crc, unsigned char const *data, size_t length) 221 | { 222 | /* Use by-byte access for unaligned buffers */ 223 | if ((unsigned long)data % sizeof(unsigned long)) 224 | return __crc32c_le(crc, data, length); 225 | 226 | return crc_function(crc, data, length); 227 | } 228 | -------------------------------------------------------------------------------- /main.c: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: MIT 2 | 3 | #define FUSE_USE_VERSION 31 4 | 5 | #include 6 | #include 7 | #include "accessors.h" 8 | #include "ctree.h" 9 | #include "messages.h" 10 | #include "super.h" 11 | #include "inode.h" 12 | #include "data.h" 13 | 14 | static struct btrfs_fs_info *global_info = NULL; 15 | 16 | static int btrfs_fuse_statfs(const char *path, struct statvfs *stbuf) 17 | { 18 | ASSERT(global_info); 19 | 20 | stbuf->f_bsize = global_info->sectorsize; 21 | stbuf->f_frsize = global_info->sectorsize; 22 | stbuf->f_blocks = btrfs_super_total_bytes(&global_info->super_copy) / 23 | global_info->sectorsize; 24 | /* 25 | * Btrfs avaiable space calculation is already complex due to dyanmic 26 | * allocation. 27 | * Since our implementation is read-only, no need to populate those 28 | * available values. 29 | */ 30 | stbuf->f_bavail = 0; 31 | stbuf->f_bfree = 0; 32 | stbuf->f_favail = 0; 33 | stbuf->f_files = 0; 34 | stbuf->f_namemax = BTRFS_NAME_LEN; 35 | return 0; 36 | } 37 | 38 | static int btrfs_fuse_getattr(const char *path, struct stat *stbuf, 39 | struct fuse_file_info *fi) 40 | { 41 | struct btrfs_fs_info *fs_info = global_info; 42 | struct btrfs_inode inode = {}; 43 | int ret; 44 | 45 | ret = btrfs_resolve_path(fs_info, path, strlen(path), &inode); 46 | if (ret < 0) 47 | return ret; 48 | 49 | ret = btrfs_stat(fs_info, &inode, stbuf); 50 | return ret; 51 | } 52 | 53 | static int btrfs_fuse_read_link(const char *path, char *output, size_t output_len) 54 | { 55 | struct btrfs_fs_info *fs_info = global_info; 56 | struct btrfs_inode inode = {}; 57 | int ret; 58 | 59 | ret = btrfs_resolve_path(fs_info, path, strlen(path), &inode); 60 | if (ret < 0) 61 | return ret; 62 | 63 | if (inode.file_type != BTRFS_FT_SYMLINK) 64 | return -EINVAL; 65 | ret = btrfs_read_link(fs_info, &inode, output, output_len); 66 | if (ret < 0) 67 | return ret; 68 | return 0; 69 | } 70 | 71 | /* Just do basic path resolve and type check */ 72 | static int btrfs_fuse_open(const char *path, struct fuse_file_info *fi) 73 | { 74 | struct btrfs_fs_info *fs_info = global_info; 75 | struct btrfs_inode inode = {}; 76 | int ret; 77 | 78 | ret = btrfs_resolve_path(fs_info, path, strlen(path), &inode); 79 | if (ret < 0) 80 | return ret; 81 | 82 | if (inode.file_type == BTRFS_FT_DIR) 83 | return -EISDIR; 84 | return 0; 85 | } 86 | 87 | static int btrfs_fuse_read(const char *path, char *output, size_t size, 88 | off_t offset, struct fuse_file_info *fi) 89 | { 90 | struct btrfs_fs_info *fs_info = global_info; 91 | struct btrfs_inode inode = {}; 92 | int ret; 93 | 94 | if (!IS_ALIGNED(offset, fs_info->sectorsize) || 95 | !IS_ALIGNED(size, fs_info->sectorsize)) { 96 | error("unaligned read range, size=%zu offset=%tu path=%s", 97 | size, offset, path); 98 | return -EINVAL; 99 | } 100 | 101 | ret = btrfs_resolve_path(fs_info, path, strlen(path), &inode); 102 | if (ret < 0) 103 | return ret; 104 | 105 | if (inode.file_type == BTRFS_FT_DIR) 106 | return -EISDIR; 107 | 108 | return btrfs_read_file(fs_info, &inode, offset, output, size); 109 | } 110 | 111 | static int btrfs_fuse_release(const char *path, struct fuse_file_info *fi) 112 | { 113 | return 0; 114 | } 115 | 116 | static void *btrfs_fuse_init(struct fuse_conn_info *conn, 117 | struct fuse_config *cfg) 118 | { 119 | cfg->use_ino = 1; 120 | cfg->intr = 1; 121 | cfg->nullpath_ok = 0; 122 | return NULL; 123 | } 124 | 125 | static int btrfs_fuse_opendir(const char *path, struct fuse_file_info *fi) 126 | { 127 | struct btrfs_fs_info *fs_info = global_info; 128 | struct btrfs_inode inode = {}; 129 | int ret; 130 | 131 | ret = btrfs_resolve_path(fs_info, path, strlen(path), &inode); 132 | if (ret < 0) 133 | return ret; 134 | 135 | if (inode.file_type != BTRFS_FT_DIR) 136 | return -ENOTDIR; 137 | return 0; 138 | } 139 | 140 | static int btrfs_fuse_readdir(const char *path, void *buf, 141 | fuse_fill_dir_t filler, off_t offset, 142 | struct fuse_file_info *fi, 143 | enum fuse_readdir_flags flags) 144 | { 145 | struct btrfs_fs_info *fs_info = global_info; 146 | struct btrfs_iterate_dir_ctrl ctrl = {}; 147 | struct btrfs_inode dir = {}; 148 | int ret; 149 | 150 | ret = btrfs_resolve_path(fs_info, path, strlen(path), &dir); 151 | if (ret < 0) 152 | return ret; 153 | 154 | if (dir.file_type != BTRFS_FT_DIR) 155 | return -ENOTDIR; 156 | 157 | /* 158 | * The @offset is the last returned found index. So we should start 159 | * from the next one. 160 | */ 161 | ret = btrfs_iterate_dir_start(fs_info, &ctrl, &dir, offset + 1); 162 | if (ret < 0) 163 | return ret; 164 | 165 | while (ret == 0) { 166 | u64 found_index; 167 | char name_buf[BTRFS_NAME_LEN + 1] = {}; 168 | size_t name_len; 169 | struct btrfs_inode entry = {}; 170 | struct stat st = {}; 171 | 172 | ret = btrfs_iterate_dir_get_inode(fs_info, &ctrl, &entry, 173 | &found_index, name_buf, &name_len); 174 | if (ret < 0) 175 | break; 176 | 177 | st.st_ino = entry.ino; 178 | st.st_mode = btrfs_type_to_imode(entry.file_type); 179 | if (filler(buf, name_buf, &st, found_index, 0)) 180 | break; 181 | ret = btrfs_iterate_dir_next(fs_info, &ctrl); 182 | } 183 | btrfs_iterate_dir_end(fs_info, &ctrl); 184 | if (ret > 0) 185 | ret = 0; 186 | return ret; 187 | } 188 | 189 | static void btrfs_fuse_destroy(void *private_data) 190 | { 191 | struct btrfs_fs_info *fs_info = global_info; 192 | 193 | global_info = NULL; 194 | btrfs_unmount(fs_info); 195 | btrfs_exit(); 196 | } 197 | 198 | static const struct fuse_operations btrfs_fuse_ops = { 199 | .statfs = btrfs_fuse_statfs, 200 | .getattr = btrfs_fuse_getattr, 201 | .readlink = btrfs_fuse_read_link, 202 | .open = btrfs_fuse_open, 203 | .read = btrfs_fuse_read, 204 | .release = btrfs_fuse_release, 205 | .opendir = btrfs_fuse_opendir, 206 | .readdir = btrfs_fuse_readdir, 207 | .init = btrfs_fuse_init, 208 | .destroy = btrfs_fuse_destroy, 209 | }; 210 | 211 | void usage(void) 212 | { 213 | fprintf(stderr, "usage: btrfs-fuse [] [...] \n"); 214 | } 215 | 216 | int main(int argc, char *argv[]) 217 | { 218 | enum { MAX_ARGS = 32 }; 219 | struct btrfs_fs_info *fs_info; 220 | int nargc = 0; 221 | char *nargv[MAX_ARGS] = {}; 222 | char *paras[2] = {}; 223 | int i; 224 | 225 | /* 226 | * We pass all parameters to fuse directly, but we want to scan btrfs 227 | * on all parameters except the last one. 228 | */ 229 | for (i = 0; i < argc && nargc < MAX_ARGS; i++) { 230 | int ret; 231 | 232 | if (i == 0) 233 | goto pass; 234 | 235 | if (argv[i][0] == '-') 236 | goto pass; 237 | 238 | /* 239 | * This parameter can be a device or a mount point. 240 | * 241 | * If it's the last parameter, it will be added to nargv[] 242 | * after the loop. 243 | * So we don't need to pass current parameter to fuse. 244 | */ 245 | paras[1] = paras[0]; 246 | paras[0] = argv[i]; 247 | if (!paras[1]) 248 | continue; 249 | /* 250 | * paras[1] is definitely not the last parameter, 251 | * thus it should be a btrfs device. 252 | * 253 | * Do the device scan and don't pass it to fuse. 254 | * Fuse only needs to handle all options and mount point. 255 | */ 256 | ret = btrfs_scan_device(paras[1], NULL); 257 | if (ret < 0) { 258 | error("failed to scan device %s: %d", paras[1], ret); 259 | btrfs_exit(); 260 | return 1; 261 | } 262 | continue; 263 | pass: 264 | nargv[nargc] = argv[i]; 265 | nargc++; 266 | } 267 | if (paras[0]) { 268 | nargv[nargc] = paras[0]; 269 | nargc++; 270 | } else { 271 | usage(); 272 | } 273 | 274 | if (nargc + 1 >= MAX_ARGS) { 275 | error("too many args for FUSE, max supported args is %u", MAX_ARGS); 276 | return 1; 277 | } 278 | 279 | if (paras[1]) { 280 | fs_info = btrfs_mount(paras[1]); 281 | if (IS_ERR(fs_info)) { 282 | error("failed to open btrfs on device %s", paras[1]); 283 | btrfs_exit(); 284 | return 1; 285 | } 286 | global_info = fs_info; 287 | } 288 | 289 | /* Either run FUSE or let FUSE handle "--help" output */ 290 | return fuse_main(nargc, nargv, &btrfs_fuse_ops, NULL); 291 | } 292 | -------------------------------------------------------------------------------- /libs/rbtree_augmented.h: -------------------------------------------------------------------------------- 1 | /* 2 | Red Black Trees 3 | (C) 1999 Andrea Arcangeli 4 | (C) 2002 David Woodhouse 5 | (C) 2012 Michel Lespinasse 6 | 7 | This program is free software; you can redistribute it and/or modify 8 | it under the terms of the GNU General Public License as published by 9 | the Free Software Foundation; either version 2 of the License, or 10 | (at your option) any later version. 11 | 12 | This program is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU General Public License for more details. 16 | 17 | You should have received a copy of the GNU General Public License 18 | along with this program; if not, write to the Free Software 19 | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 20 | 21 | linux/include/linux/rbtree_augmented.h 22 | */ 23 | 24 | #ifndef _LINUX_RBTREE_AUGMENTED_H 25 | #define _LINUX_RBTREE_AUGMENTED_H 26 | 27 | #include "libs/rbtree.h" 28 | 29 | /* 30 | * Please note - only struct rb_augment_callbacks and the prototypes for 31 | * rb_insert_augmented() and rb_erase_augmented() are intended to be public. 32 | * The rest are implementation details you are not expected to depend on. 33 | * 34 | * See Documentation/rbtree.txt for documentation and samples. 35 | */ 36 | 37 | struct rb_augment_callbacks { 38 | void (*propagate)(struct rb_node *node, struct rb_node *stop); 39 | void (*copy)(struct rb_node *old, struct rb_node *new); 40 | void (*rotate)(struct rb_node *old, struct rb_node *new); 41 | }; 42 | 43 | extern void __rb_insert_augmented(struct rb_node *node, struct rb_root *root, 44 | void (*augment_rotate)(struct rb_node *old, struct rb_node *new)); 45 | /* 46 | * Fixup the rbtree and update the augmented information when rebalancing. 47 | * 48 | * On insertion, the user must update the augmented information on the path 49 | * leading to the inserted node, then call rb_link_node() as usual and 50 | * rb_augment_inserted() instead of the usual rb_insert_color() call. 51 | * If rb_augment_inserted() rebalances the rbtree, it will callback into 52 | * a user provided function to update the augmented information on the 53 | * affected subtrees. 54 | */ 55 | static inline void 56 | rb_insert_augmented(struct rb_node *node, struct rb_root *root, 57 | const struct rb_augment_callbacks *augment) 58 | { 59 | __rb_insert_augmented(node, root, augment->rotate); 60 | } 61 | 62 | #define RB_DECLARE_CALLBACKS(rbstatic, rbname, rbstruct, rbfield, \ 63 | rbtype, rbaugmented, rbcompute) \ 64 | static inline void \ 65 | rbname ## _propagate(struct rb_node *rb, struct rb_node *stop) \ 66 | { \ 67 | while (rb != stop) { \ 68 | rbstruct *node = rb_entry(rb, rbstruct, rbfield); \ 69 | rbtype augmented = rbcompute(node); \ 70 | if (node->rbaugmented == augmented) \ 71 | break; \ 72 | node->rbaugmented = augmented; \ 73 | rb = rb_parent(&node->rbfield); \ 74 | } \ 75 | } \ 76 | static inline void \ 77 | rbname ## _copy(struct rb_node *rb_old, struct rb_node *rb_new) \ 78 | { \ 79 | rbstruct *old = rb_entry(rb_old, rbstruct, rbfield); \ 80 | rbstruct *new = rb_entry(rb_new, rbstruct, rbfield); \ 81 | new->rbaugmented = old->rbaugmented; \ 82 | } \ 83 | static void \ 84 | rbname ## _rotate(struct rb_node *rb_old, struct rb_node *rb_new) \ 85 | { \ 86 | rbstruct *old = rb_entry(rb_old, rbstruct, rbfield); \ 87 | rbstruct *new = rb_entry(rb_new, rbstruct, rbfield); \ 88 | new->rbaugmented = old->rbaugmented; \ 89 | old->rbaugmented = rbcompute(old); \ 90 | } \ 91 | rbstatic const struct rb_augment_callbacks rbname = { \ 92 | rbname ## _propagate, rbname ## _copy, rbname ## _rotate \ 93 | }; 94 | 95 | 96 | #define RB_RED 0 97 | #define RB_BLACK 1 98 | 99 | #define __rb_parent(pc) ((struct rb_node *)(pc & ~3)) 100 | 101 | #define __rb_color(pc) ((pc) & 1) 102 | #define __rb_is_black(pc) __rb_color(pc) 103 | #define __rb_is_red(pc) (!__rb_color(pc)) 104 | #define rb_color(rb) __rb_color((rb)->__rb_parent_color) 105 | #define rb_is_red(rb) __rb_is_red((rb)->__rb_parent_color) 106 | #define rb_is_black(rb) __rb_is_black((rb)->__rb_parent_color) 107 | 108 | static inline void rb_set_parent(struct rb_node *rb, struct rb_node *p) 109 | { 110 | rb->__rb_parent_color = rb_color(rb) | (unsigned long)p; 111 | } 112 | 113 | static inline void rb_set_parent_color(struct rb_node *rb, 114 | struct rb_node *p, int color) 115 | { 116 | rb->__rb_parent_color = (unsigned long)p | color; 117 | } 118 | 119 | static inline void 120 | __rb_change_child(struct rb_node *old, struct rb_node *new, 121 | struct rb_node *parent, struct rb_root *root) 122 | { 123 | if (parent) { 124 | if (parent->rb_left == old) 125 | parent->rb_left = new; 126 | else 127 | parent->rb_right = new; 128 | } else 129 | root->rb_node = new; 130 | } 131 | 132 | extern void __rb_erase_color(struct rb_node *parent, struct rb_root *root, 133 | void (*augment_rotate)(struct rb_node *old, struct rb_node *new)); 134 | 135 | static struct rb_node * 136 | __rb_erase_augmented(struct rb_node *node, struct rb_root *root, 137 | const struct rb_augment_callbacks *augment) 138 | { 139 | struct rb_node *child = node->rb_right, *tmp = node->rb_left; 140 | struct rb_node *parent, *rebalance; 141 | unsigned long pc; 142 | 143 | if (!tmp) { 144 | /* 145 | * Case 1: node to erase has no more than 1 child (easy!) 146 | * 147 | * Note that if there is one child it must be red due to 5) 148 | * and node must be black due to 4). We adjust colors locally 149 | * so as to bypass __rb_erase_color() later on. 150 | */ 151 | pc = node->__rb_parent_color; 152 | parent = __rb_parent(pc); 153 | __rb_change_child(node, child, parent, root); 154 | if (child) { 155 | child->__rb_parent_color = pc; 156 | rebalance = NULL; 157 | } else 158 | rebalance = __rb_is_black(pc) ? parent : NULL; 159 | tmp = parent; 160 | } else if (!child) { 161 | /* Still case 1, but this time the child is node->rb_left */ 162 | tmp->__rb_parent_color = pc = node->__rb_parent_color; 163 | parent = __rb_parent(pc); 164 | __rb_change_child(node, tmp, parent, root); 165 | rebalance = NULL; 166 | tmp = parent; 167 | } else { 168 | struct rb_node *successor = child, *child2; 169 | tmp = child->rb_left; 170 | if (!tmp) { 171 | /* 172 | * Case 2: node's successor is its right child 173 | * 174 | * (n) (s) 175 | * / \ / \ 176 | * (x) (s) -> (x) (c) 177 | * \ 178 | * (c) 179 | */ 180 | parent = successor; 181 | child2 = successor->rb_right; 182 | augment->copy(node, successor); 183 | } else { 184 | /* 185 | * Case 3: node's successor is leftmost under 186 | * node's right child subtree 187 | * 188 | * (n) (s) 189 | * / \ / \ 190 | * (x) (y) -> (x) (y) 191 | * / / 192 | * (p) (p) 193 | * / / 194 | * (s) (c) 195 | * \ 196 | * (c) 197 | */ 198 | do { 199 | parent = successor; 200 | successor = tmp; 201 | tmp = tmp->rb_left; 202 | } while (tmp); 203 | parent->rb_left = child2 = successor->rb_right; 204 | successor->rb_right = child; 205 | rb_set_parent(child, successor); 206 | augment->copy(node, successor); 207 | augment->propagate(parent, successor); 208 | } 209 | 210 | successor->rb_left = tmp = node->rb_left; 211 | rb_set_parent(tmp, successor); 212 | 213 | pc = node->__rb_parent_color; 214 | tmp = __rb_parent(pc); 215 | __rb_change_child(node, successor, tmp, root); 216 | if (child2) { 217 | successor->__rb_parent_color = pc; 218 | rb_set_parent_color(child2, parent, RB_BLACK); 219 | rebalance = NULL; 220 | } else { 221 | unsigned long pc2 = successor->__rb_parent_color; 222 | successor->__rb_parent_color = pc; 223 | rebalance = __rb_is_black(pc2) ? parent : NULL; 224 | } 225 | tmp = successor; 226 | } 227 | 228 | augment->propagate(tmp, NULL); 229 | return rebalance; 230 | } 231 | 232 | static inline void 233 | rb_erase_augmented(struct rb_node *node, struct rb_root *root, 234 | const struct rb_augment_callbacks *augment) 235 | { 236 | struct rb_node *rebalance = __rb_erase_augmented(node, root, augment); 237 | if (rebalance) 238 | __rb_erase_color(rebalance, root, augment->rotate); 239 | } 240 | 241 | #endif /* _LINUX_RBTREE_AUGMENTED_H */ 242 | -------------------------------------------------------------------------------- /inode.c: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: MIT 2 | 3 | #include 4 | #include 5 | #include 6 | #include "inode.h" 7 | #include "ctree.h" 8 | #include "metadata.h" 9 | #include "hash.h" 10 | #include "messages.h" 11 | 12 | int btrfs_lookup_one_name(struct btrfs_fs_info *fs_info, 13 | struct btrfs_inode *dir, const char *name, 14 | size_t name_len, struct btrfs_inode *inode_ret) 15 | { 16 | struct btrfs_dir_item *di; 17 | struct btrfs_root *root; 18 | struct btrfs_key key; 19 | struct btrfs_path path; 20 | u64 ino; 21 | u32 cur; 22 | u32 item_start; 23 | u32 item_len; 24 | u8 file_type; 25 | bool found = false; 26 | int ret; 27 | 28 | if (dir->file_type != BTRFS_FT_DIR) 29 | return -ENOTDIR; 30 | 31 | btrfs_init_path(&path); 32 | key.objectid = dir->ino; 33 | key.type = BTRFS_DIR_ITEM_KEY; 34 | key.offset = btrfs_name_hash(name, name_len); 35 | 36 | ret = btrfs_search_key(dir->root, &path, &key); 37 | if (ret < 0) { 38 | btrfs_release_path(&path); 39 | return ret; 40 | } 41 | 42 | item_start = btrfs_item_ptr_offset(path.nodes[0], path.slots[0]); 43 | item_len = btrfs_item_size_nr(path.nodes[0], path.slots[0]); 44 | cur = item_start; 45 | /* 46 | * We can have name hash conflicts, thus still need to verify the 47 | * found dir_item one by one. 48 | */ 49 | while (cur < item_start + item_len) { 50 | u32 name_ptr; 51 | u32 this_item_size; 52 | 53 | di = (struct btrfs_dir_item *)(long)cur; 54 | this_item_size = sizeof(*di) + 55 | btrfs_dir_data_len(path.nodes[0], di) + 56 | btrfs_dir_name_len(path.nodes[0], di); 57 | 58 | if (cur + this_item_size > item_start + item_len) { 59 | error( 60 | "invalid dir item size, cur=%u dir_item size=%u item start=%u item len=%u", 61 | cur, this_item_size, item_start, item_len); 62 | return -EUCLEAN; 63 | } 64 | 65 | cur = (u32)(long)(di + 1); 66 | name_ptr = cur; 67 | 68 | if (btrfs_dir_name_len(path.nodes[0], di) == name_len && 69 | !memcmp_extent_buffer(path.nodes[0], name, name_ptr, name_len)) { 70 | found = true; 71 | break; 72 | } 73 | cur += btrfs_dir_name_len(path.nodes[0], di); 74 | } 75 | if (!found) { 76 | btrfs_release_path(&path); 77 | return -ENOENT; 78 | } 79 | 80 | /* Found the dir item we want, extract root/ino from it */ 81 | btrfs_dir_item_key_to_cpu(path.nodes[0], di, &key); 82 | if (key.type == BTRFS_ROOT_ITEM_KEY) { 83 | root = btrfs_read_root(fs_info, key.objectid); 84 | if (IS_ERR(root)) { 85 | ret = PTR_ERR(root); 86 | btrfs_release_path(&path); 87 | return ret; 88 | } 89 | ino = root->root_dirid; 90 | file_type = BTRFS_FT_DIR; 91 | } else if (key.type == BTRFS_INODE_ITEM_KEY){ 92 | root = dir->root; 93 | ino = key.objectid; 94 | file_type = btrfs_dir_type(path.nodes[0], di); 95 | } else { 96 | error("invalid dir item key found: (%llu %u %llu)", 97 | key.objectid, key.type, key.offset); 98 | btrfs_release_path(&path); 99 | return -EUCLEAN; 100 | } 101 | btrfs_release_path(&path); 102 | 103 | inode_ret->root = root; 104 | inode_ret->ino = ino; 105 | inode_ret->file_type = file_type; 106 | return 0; 107 | } 108 | 109 | int btrfs_resolve_path(struct btrfs_fs_info *fs_info, 110 | const char *path, size_t path_len, 111 | struct btrfs_inode *inode_ret) 112 | { 113 | struct btrfs_inode cur_inode; 114 | struct btrfs_inode next_inode; 115 | size_t offset = 0; 116 | char *slash_char; 117 | int ret; 118 | 119 | cur_inode.root = fs_info->default_root; 120 | cur_inode.ino = fs_info->default_root->root_dirid; 121 | cur_inode.file_type = BTRFS_FT_DIR; 122 | 123 | while (offset < path_len) { 124 | u32 name_len; 125 | 126 | /* Skip any '/' in the path*/ 127 | if (path[offset] == '/') { 128 | offset++; 129 | continue; 130 | } 131 | 132 | /* Extract the next filename to resolve */ 133 | slash_char = memchr(path + offset, '/', path_len - offset); 134 | if (slash_char) { 135 | name_len = slash_char - (path + offset); 136 | } else { 137 | /* Last name, no more '/' */ 138 | name_len = path_len - offset; 139 | } 140 | 141 | ret = btrfs_lookup_one_name(fs_info, &cur_inode, path + offset, 142 | name_len, &next_inode); 143 | if (ret < 0) 144 | return ret; 145 | 146 | memcpy(&cur_inode, &next_inode, sizeof(next_inode)); 147 | offset += name_len; 148 | } 149 | 150 | memcpy(inode_ret, &cur_inode, sizeof(cur_inode)); 151 | return 0; 152 | } 153 | 154 | int btrfs_read_link(struct btrfs_fs_info *fs_info, 155 | struct btrfs_inode *inode, char *output, 156 | size_t output_size) 157 | { 158 | struct btrfs_file_extent_item *fi; 159 | struct btrfs_path path; 160 | struct btrfs_key key; 161 | u32 read_size; 162 | int ret; 163 | 164 | ASSERT(inode->file_type == BTRFS_FT_SYMLINK); 165 | 166 | btrfs_init_path(&path); 167 | key.objectid = inode->ino; 168 | key.type = BTRFS_EXTENT_DATA_KEY; 169 | key.offset = 0; 170 | 171 | ret = btrfs_search_key(inode->root, &path, &key); 172 | if (ret < 0) { 173 | btrfs_release_path(&path); 174 | return ret; 175 | } 176 | 177 | fi = btrfs_item_ptr(path.nodes[0], path.slots[0], 178 | struct btrfs_file_extent_item); 179 | if (btrfs_file_extent_type(path.nodes[0], fi) != 180 | BTRFS_FILE_EXTENT_INLINE) { 181 | error("invalid file extent type, has %u expect %u", 182 | btrfs_file_extent_type(path.nodes[0], fi), 183 | BTRFS_FILE_EXTENT_INLINE); 184 | btrfs_release_path(&path); 185 | return -EUCLEAN; 186 | } 187 | if (btrfs_file_extent_compression(path.nodes[0], fi) != 188 | BTRFS_COMPRESS_NONE) { 189 | error("invalid file extent compression, has %u expect %u", 190 | btrfs_file_extent_compression(path.nodes[0], fi), 191 | BTRFS_COMPRESS_NONE); 192 | btrfs_release_path(&path); 193 | return -EUCLEAN; 194 | } 195 | if (btrfs_file_extent_ram_bytes(path.nodes[0], fi) == 0) { 196 | error("empty link length"); 197 | btrfs_release_path(&path); 198 | return -EUCLEAN; 199 | } 200 | if (btrfs_file_extent_ram_bytes(path.nodes[0], fi) >= PATH_MAX) { 201 | error("invalid link length, has %llu max %u", 202 | btrfs_file_extent_ram_bytes(path.nodes[0], fi), 203 | PATH_MAX); 204 | btrfs_release_path(&path); 205 | return -ENAMETOOLONG; 206 | } 207 | read_size = MIN(btrfs_file_extent_ram_bytes(path.nodes[0], fi), 208 | output_size - 1); 209 | read_extent_buffer(path.nodes[0], output, 210 | btrfs_file_extent_inline_start(fi), 211 | read_size); 212 | output[read_size] = '\0'; 213 | btrfs_release_path(&path); 214 | return read_size; 215 | } 216 | 217 | int btrfs_iterate_dir_start(struct btrfs_fs_info *fs_info, 218 | struct btrfs_iterate_dir_ctrl *ctrl, 219 | const struct btrfs_inode *dir, u64 start_index) 220 | { 221 | struct btrfs_key_range *range = &ctrl->range; 222 | 223 | ASSERT(dir->file_type == BTRFS_FT_DIR); 224 | 225 | range->objectid = dir->ino; 226 | range->type_start = range->type_end = BTRFS_DIR_INDEX_KEY; 227 | range->offset_start = start_index; 228 | range->offset_end = (u64)-1; 229 | 230 | ctrl->dir.ino = dir->ino; 231 | ctrl->dir.root = dir->root; 232 | ctrl->dir.file_type = dir->file_type; 233 | btrfs_init_path(&ctrl->path); 234 | 235 | return btrfs_search_keys_start(ctrl->dir.root, &ctrl->path, &ctrl->range); 236 | } 237 | 238 | int btrfs_iterate_dir_get_inode(struct btrfs_fs_info *fs_info, 239 | struct btrfs_iterate_dir_ctrl *ctrl, 240 | struct btrfs_inode *entry, 241 | u64 *index_ret, char *name, size_t *name_len) 242 | { 243 | struct btrfs_dir_item *di; 244 | struct btrfs_key key; 245 | 246 | btrfs_item_key_to_cpu(ctrl->path.nodes[0], &key, ctrl->path.slots[0]); 247 | ASSERT(key.type == BTRFS_DIR_INDEX_KEY); 248 | 249 | if (index_ret) 250 | *index_ret = key.offset; 251 | 252 | di = btrfs_item_ptr(ctrl->path.nodes[0], ctrl->path.slots[0], 253 | struct btrfs_dir_item); 254 | btrfs_dir_item_key_to_cpu(ctrl->path.nodes[0], di, &key); 255 | 256 | if (key.type == BTRFS_INODE_ITEM_KEY) { 257 | entry->root = ctrl->dir.root; 258 | entry->ino = key.objectid; 259 | } else { 260 | struct btrfs_root *root; 261 | root = btrfs_read_root(fs_info, key.objectid); 262 | if (IS_ERR(root)) 263 | return PTR_ERR(root); 264 | 265 | entry->root = root; 266 | entry->ino = root->root_dirid; 267 | } 268 | entry->file_type = btrfs_dir_type(ctrl->path.nodes[0], di); 269 | 270 | *name_len = btrfs_dir_name_len(ctrl->path.nodes[0], di); 271 | read_extent_buffer(ctrl->path.nodes[0], name, (unsigned long)(di + 1), 272 | *name_len); 273 | return 0; 274 | } 275 | 276 | int btrfs_stat(struct btrfs_fs_info *fs_info, struct btrfs_inode *inode, 277 | struct stat *stbuf) 278 | { 279 | struct btrfs_inode_item *ii; 280 | struct extent_buffer *leaf; 281 | struct btrfs_path path; 282 | struct btrfs_key key; 283 | int ret; 284 | 285 | btrfs_init_path(&path); 286 | key.objectid = inode->ino; 287 | key.type = BTRFS_INODE_ITEM_KEY; 288 | key.offset = 0; 289 | 290 | ret = btrfs_search_key(inode->root, &path, &key); 291 | if (ret < 0) 292 | return ret; 293 | 294 | leaf = path.nodes[0]; 295 | ii = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_inode_item); 296 | stbuf->st_blksize = fs_info->sectorsize; 297 | stbuf->st_blocks = btrfs_inode_nbytes(leaf, ii) >> SECTOR_SHIFT; 298 | stbuf->st_size = btrfs_inode_size(leaf, ii); 299 | stbuf->st_gid = btrfs_inode_gid(leaf, ii); 300 | stbuf->st_uid = btrfs_inode_uid(leaf, ii); 301 | stbuf->st_mode = btrfs_inode_gid(leaf, ii); 302 | stbuf->st_ino = inode->ino; 303 | stbuf->st_nlink = btrfs_inode_nlink(leaf, ii); 304 | stbuf->st_mode = btrfs_inode_mode(leaf, ii); 305 | stbuf->st_atim.tv_sec = btrfs_timespec_sec(leaf, btrfs_inode_atime(ii)); 306 | stbuf->st_atim.tv_nsec = btrfs_timespec_nsec(leaf, btrfs_inode_atime(ii)); 307 | stbuf->st_ctim.tv_sec = btrfs_timespec_sec(leaf, btrfs_inode_ctime(ii)); 308 | stbuf->st_ctim.tv_nsec = btrfs_timespec_nsec(leaf, btrfs_inode_ctime(ii)); 309 | stbuf->st_mtim.tv_sec = btrfs_timespec_sec(leaf, btrfs_inode_mtime(ii)); 310 | stbuf->st_mtim.tv_nsec = btrfs_timespec_nsec(leaf, btrfs_inode_mtime(ii)); 311 | /* 312 | * We don't have a good way to emulate the same anonymous device 313 | * numbers in kernel. Thus here we just use subvolid id. 314 | */ 315 | stbuf->st_dev = inode->root->root_key.objectid; 316 | btrfs_release_path(&path); 317 | return 0; 318 | } 319 | -------------------------------------------------------------------------------- /super.c: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: MIT 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include "ondisk_format.h" 8 | #include "super.h" 9 | #include "messages.h" 10 | #include "hash.h" 11 | #include "volumes.h" 12 | #include "metadata.h" 13 | 14 | int btrfs_read_from_disk(int fd, char *buf, u64 offset, u32 len) 15 | { 16 | int cur = 0; 17 | 18 | while (cur < len) { 19 | int ret; 20 | 21 | ret = pread(fd, buf + cur, len - cur, offset + cur); 22 | if (ret < 0) { 23 | ret = -errno; 24 | return ret; 25 | } 26 | cur += ret; 27 | } 28 | return len; 29 | } 30 | 31 | int btrfs_check_super(struct btrfs_super_block *sb) 32 | { 33 | u8 result[BTRFS_CSUM_SIZE]; 34 | u32 sectorsize; 35 | u32 nodesize; 36 | u16 csum_type; 37 | int csum_size; 38 | 39 | if (btrfs_super_magic(sb) != BTRFS_MAGIC) 40 | return -EINVAL; 41 | 42 | csum_type = btrfs_super_csum_type(sb); 43 | if (csum_type >= btrfs_super_num_csums()) { 44 | error("unsupported checksum algorithm %u", csum_type); 45 | return -EIO; 46 | } 47 | csum_size = btrfs_super_csum_size(sb); 48 | 49 | btrfs_csum_data(csum_type, (u8 *)sb + BTRFS_CSUM_SIZE, 50 | result, BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE); 51 | 52 | if (memcmp(result, sb->csum, csum_size)) { 53 | error("superblock checksum mismatch"); 54 | return -EIO; 55 | } 56 | 57 | /* We don't support seed/dumps/FSID change yet */ 58 | if (btrfs_super_flags(sb) & ~BTRFS_HEADER_FLAG_WRITTEN) { 59 | error("unsupported super flags: %llx", btrfs_super_flags(sb)); 60 | goto error_out; 61 | } 62 | 63 | /* Root level checks */ 64 | if (btrfs_super_root_level(sb) >= BTRFS_MAX_LEVEL) { 65 | error("tree_root level too big: %d >= %d", 66 | btrfs_super_root_level(sb), BTRFS_MAX_LEVEL); 67 | goto error_out; 68 | } 69 | if (btrfs_super_chunk_root_level(sb) >= BTRFS_MAX_LEVEL) { 70 | error("chunk_root level too big: %d >= %d", 71 | btrfs_super_chunk_root_level(sb), BTRFS_MAX_LEVEL); 72 | goto error_out; 73 | } 74 | if (btrfs_super_log_root_level(sb) >= BTRFS_MAX_LEVEL) { 75 | error("log_root level too big: %d >= %d", 76 | btrfs_super_log_root_level(sb), BTRFS_MAX_LEVEL); 77 | goto error_out; 78 | } 79 | 80 | /* Sectorsize/nodesize checks */ 81 | sectorsize = btrfs_super_sectorsize(sb); 82 | nodesize = btrfs_super_nodesize(sb); 83 | 84 | if (!is_power_of_2(sectorsize) || sectorsize > BTRFS_SECTORSIZE_MAX || 85 | sectorsize < BTRFS_SECTORSIZE_MIN) { 86 | error("invalid sectorsize: %u", sectorsize); 87 | goto error_out; 88 | } 89 | if (!is_power_of_2(nodesize) || nodesize> BTRFS_NODESIZE_MAX || 90 | nodesize < BTRFS_NODESIZE_MIN || nodesize < sectorsize) { 91 | error("invalid nodesize: %u", nodesize); 92 | goto error_out; 93 | } 94 | 95 | /* 96 | * Root alignment check 97 | * 98 | * We may have rare case where chunk is sectorsize aligned but not 99 | * nodesize aligned. 100 | * In that case, we only require sectorsize alignment. 101 | */ 102 | if (!IS_ALIGNED(btrfs_super_root(sb), sectorsize)) { 103 | error("tree_root block unaligned: %llu", btrfs_super_root(sb)); 104 | goto error_out; 105 | } 106 | if (!IS_ALIGNED(btrfs_super_chunk_root(sb), sectorsize)) { 107 | error("chunk_root block unaligned: %llu", 108 | btrfs_super_chunk_root(sb)); 109 | goto error_out; 110 | } 111 | if (!IS_ALIGNED(btrfs_super_log_root(sb), sectorsize)) { 112 | error("log_root block unaligned: %llu", 113 | btrfs_super_log_root(sb)); 114 | goto error_out; 115 | } 116 | 117 | /* Basic size check */ 118 | if (btrfs_super_total_bytes(sb) == 0) { 119 | error("invalid total_bytes 0"); 120 | goto error_out; 121 | } 122 | if (btrfs_super_bytes_used(sb) < 6 * btrfs_super_nodesize(sb)) { 123 | error("invalid bytes_used %llu", btrfs_super_bytes_used(sb)); 124 | goto error_out; 125 | } 126 | 127 | if (memcmp(sb->fsid, sb->dev_item.fsid, BTRFS_FSID_SIZE) != 0) { 128 | char fsid[BTRFS_UUID_UNPARSED_SIZE]; 129 | char dev_fsid[BTRFS_UUID_UNPARSED_SIZE]; 130 | 131 | uuid_unparse(sb->fsid, fsid); 132 | uuid_unparse(sb->dev_item.fsid, dev_fsid); 133 | error("dev_item UUID does not match fsid: %s != %s", 134 | dev_fsid, fsid); 135 | goto error_out; 136 | } 137 | 138 | /* 139 | * Hint to catch really bogus numbers, bitflips or so 140 | */ 141 | if (btrfs_super_num_devices(sb) > (1UL << 31)) { 142 | warning("suspicious number of devices: %llu", 143 | btrfs_super_num_devices(sb)); 144 | } 145 | 146 | if (btrfs_super_num_devices(sb) == 0) { 147 | error("number of devices is 0"); 148 | goto error_out; 149 | } 150 | 151 | /* 152 | * Obvious sys_chunk_array corruptions, it must hold at least one key 153 | * and one chunk 154 | */ 155 | if (btrfs_super_sys_array_size(sb) > BTRFS_SYSTEM_CHUNK_ARRAY_SIZE) { 156 | error("system chunk array too big %u > %u", 157 | btrfs_super_sys_array_size(sb), 158 | BTRFS_SYSTEM_CHUNK_ARRAY_SIZE); 159 | goto error_out; 160 | } 161 | if (btrfs_super_sys_array_size(sb) < sizeof(struct btrfs_disk_key) 162 | + sizeof(struct btrfs_chunk)) { 163 | error("system chunk array too small %u < %zu", 164 | btrfs_super_sys_array_size(sb), 165 | sizeof(struct btrfs_disk_key) + 166 | sizeof(struct btrfs_chunk)); 167 | goto error_out; 168 | } 169 | 170 | return 0; 171 | 172 | error_out: 173 | error("superblock checksum matches but it has invalid members"); 174 | return -EIO; 175 | } 176 | 177 | static void free_root(struct btrfs_root *root) 178 | { 179 | if (!root || IS_ERR(root)) 180 | return; 181 | free_extent_buffer(root->node); 182 | free(root); 183 | } 184 | 185 | static void free_chunk_maps(struct btrfs_fs_info *fs_info) 186 | { 187 | struct btrfs_chunk_map *map; 188 | struct btrfs_chunk_map *tmp; 189 | 190 | rbtree_postorder_for_each_entry_safe(map, tmp, &fs_info->mapping_root, 191 | node) 192 | free(map); 193 | } 194 | 195 | void btrfs_unmount(struct btrfs_fs_info *fs_info) 196 | { 197 | struct btrfs_root *root; 198 | struct btrfs_root *tmp; 199 | struct btrfs_device *dev; 200 | 201 | rbtree_postorder_for_each_entry_safe(root, tmp, &fs_info->subvols_root, 202 | rb_node) 203 | free_root(root); 204 | 205 | free_root(fs_info->csum_root); 206 | free_root(fs_info->tree_root); 207 | free_root(fs_info->chunk_root); 208 | 209 | /* 210 | * At this stage, all extent buffers should be free, just to catch 211 | * unreleased ones. 212 | */ 213 | if (!RB_EMPTY_ROOT(&fs_info->eb_root)) { 214 | struct extent_buffer *eb; 215 | struct extent_buffer *tmp; 216 | warning("unreleased extent buffers detected"); 217 | 218 | rbtree_postorder_for_each_entry_safe(eb, tmp, &fs_info->eb_root, 219 | node) { 220 | warning("eb %llu unreleased", eb->start); 221 | free(eb); 222 | } 223 | } 224 | 225 | /* Now free the chunk maps */ 226 | free_chunk_maps(fs_info); 227 | 228 | if (!fs_info->fs_devices) 229 | goto out; 230 | 231 | /* Finally close all devices */ 232 | list_for_each_entry(dev, &fs_info->fs_devices->dev_list, list) { 233 | if (dev->fd >= 0) { 234 | close(dev->fd); 235 | dev->fd = -1; 236 | } 237 | } 238 | out: 239 | free(fs_info); 240 | } 241 | 242 | static struct btrfs_root *read_default_root(struct btrfs_fs_info *fs_info) 243 | { 244 | struct btrfs_key_range range; 245 | struct btrfs_dir_item *di; 246 | struct btrfs_path path; 247 | struct btrfs_key key; 248 | int ret; 249 | 250 | btrfs_init_path(&path); 251 | range.objectid = BTRFS_ROOT_TREE_DIR_OBJECTID; 252 | range.type_start = range.type_end = BTRFS_DIR_ITEM_KEY; 253 | range.offset_start = 0; 254 | range.offset_end = (u64)-1; 255 | 256 | ret = btrfs_search_keys_start(fs_info->tree_root, &path, &range); 257 | if (ret < 0) 258 | return ERR_PTR(ret); 259 | di = btrfs_item_ptr(path.nodes[0], path.slots[0], struct btrfs_dir_item); 260 | btrfs_dir_item_key_to_cpu(path.nodes[0], di, &key); 261 | btrfs_release_path(&path); 262 | 263 | ASSERT(is_fstree(key.objectid)); 264 | return btrfs_read_root(fs_info, key.objectid); 265 | } 266 | 267 | struct btrfs_fs_info *btrfs_mount(const char *path) 268 | { 269 | struct btrfs_fs_info *fs_info; 270 | int ret; 271 | 272 | fs_info = calloc(1, sizeof(*fs_info)); 273 | if (!fs_info) 274 | return ERR_PTR(-ENOMEM); 275 | 276 | pthread_mutex_init(&fs_info->eb_lock, NULL); 277 | /* Check if there is btrfs on the device */ 278 | ret = btrfs_scan_device(path, &fs_info->super_copy); 279 | if (ret < 0) { 280 | if (ret == -EINVAL) 281 | error("no btrfs found at %s", path); 282 | else 283 | error("failed to scan device %s: %d", path, ret); 284 | goto error; 285 | } 286 | fs_info->sectorsize = btrfs_super_sectorsize(&fs_info->super_copy); 287 | fs_info->nodesize = btrfs_super_nodesize(&fs_info->super_copy); 288 | fs_info->csum_type = btrfs_super_csum_type(&fs_info->super_copy); 289 | fs_info->csum_size = btrfs_super_csum_size(&fs_info->super_copy); 290 | memcpy(fs_info->fsid, fs_info->super_copy.fsid, BTRFS_UUID_SIZE); 291 | 292 | /* Now open all invovled devices of the fs */ 293 | fs_info->fs_devices = btrfs_open_devices(fs_info); 294 | if (IS_ERR(fs_info->fs_devices)) { 295 | ret = PTR_ERR(fs_info->fs_devices); 296 | error("failed to grab fs_devs: %d", ret); 297 | goto error; 298 | } 299 | 300 | /* Then read the system chunk array */ 301 | ret = btrfs_read_sys_chunk_array(fs_info); 302 | if (ret < 0) { 303 | error("failed to read system chunk array: %d", ret); 304 | goto error; 305 | } 306 | 307 | /* Now we can read the chunk tree */ 308 | fs_info->chunk_root = btrfs_read_root(fs_info, 309 | BTRFS_CHUNK_TREE_OBJECTID); 310 | if (IS_ERR(fs_info->chunk_root)) { 311 | ret = PTR_ERR(fs_info->chunk_root); 312 | error("failed to read chunk root: %d", ret); 313 | goto error; 314 | } 315 | 316 | /* Then read the chunk tree */ 317 | ret = btrfs_read_chunk_tree(fs_info); 318 | if (ret < 0) { 319 | error("failed to iterate chunk tree: %d", ret); 320 | goto error; 321 | } 322 | 323 | /* Read the remaining trees */ 324 | fs_info->tree_root = btrfs_read_root(fs_info, BTRFS_ROOT_TREE_OBJECTID); 325 | if (IS_ERR(fs_info->tree_root)) { 326 | ret = PTR_ERR(fs_info->tree_root); 327 | error("failed to read tree root: %d", ret); 328 | goto error; 329 | } 330 | fs_info->csum_root = btrfs_read_root(fs_info, BTRFS_CSUM_TREE_OBJECTID); 331 | if (IS_ERR(fs_info->csum_root)) { 332 | ret = PTR_ERR(fs_info->csum_root); 333 | error("failed to read csum root: %d", ret); 334 | goto error; 335 | } 336 | fs_info->default_root = read_default_root(fs_info); 337 | if (IS_ERR(fs_info->default_root)) { 338 | ret = PTR_ERR(fs_info->default_root); 339 | error("failed to read default root: %d", ret); 340 | goto error; 341 | } 342 | return fs_info; 343 | error: 344 | btrfs_unmount(fs_info); 345 | return ERR_PTR(ret); 346 | } 347 | -------------------------------------------------------------------------------- /libs/raid56.c: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: GPL-2.0-only 2 | 3 | /* Cross-ported from btrfs-progs, which is further cross-ported from kernel. */ 4 | 5 | /* -*- linux-c -*- ------------------------------------------------------- * 6 | * 7 | * Copyright 2002-2004 H. Peter Anvin - All Rights Reserved 8 | * 9 | * This program is free software; you can redistribute it and/or modify 10 | * it under the terms of the GNU General Public License as published by 11 | * the Free Software Foundation, Inc., 53 Temple Place Ste 330, 12 | * Boston MA 02111-1307, USA; either version 2 of the License, or 13 | * (at your option) any later version; incorporated herein by reference. 14 | * 15 | * ----------------------------------------------------------------------- */ 16 | 17 | /* 18 | * Added helpers for unaligned native int access 19 | */ 20 | 21 | /* 22 | * raid6int1.c 23 | * 24 | * 1-way unrolled portable integer math RAID-6 instruction set 25 | * 26 | * This file was postprocessed using unroll.pl and then ported to userspace 27 | */ 28 | #include 29 | #include 30 | #include "compat.h" 31 | #include "ondisk_format.h" 32 | #include "messages.h" 33 | #include "libs/raid56.h" 34 | 35 | /* 36 | * This is the C data type to use 37 | */ 38 | 39 | /* Change this from BITS_PER_LONG if there is something better... */ 40 | #if BITS_PER_LONG == 64 41 | # define NBYTES(x) ((x) * 0x0101010101010101UL) 42 | # define NSIZE 8 43 | # define NSHIFT 3 44 | typedef uint64_t unative_t; 45 | #define put_unaligned_native(val,p) put_unaligned_64((val),(p)) 46 | #define get_unaligned_native(p) get_unaligned_64((p)) 47 | #else 48 | # define NBYTES(x) ((x) * 0x01010101U) 49 | # define NSIZE 4 50 | # define NSHIFT 2 51 | typedef uint32_t unative_t; 52 | #define put_unaligned_native(val,p) put_unaligned_32((val),(p)) 53 | #define get_unaligned_native(p) get_unaligned_32((p)) 54 | #endif 55 | 56 | /* 57 | * These sub-operations are separate inlines since they can sometimes be 58 | * specially optimized using architecture-specific hacks. 59 | */ 60 | 61 | /* 62 | * The SHLBYTE() operation shifts each byte left by 1, *not* 63 | * rolling over into the next byte 64 | */ 65 | static inline __attribute_const__ unative_t SHLBYTE(unative_t v) 66 | { 67 | unative_t vv; 68 | 69 | vv = (v << 1) & NBYTES(0xfe); 70 | return vv; 71 | } 72 | 73 | /* 74 | * The MASK() operation returns 0xFF in any byte for which the high 75 | * bit is 1, 0x00 for any byte for which the high bit is 0. 76 | */ 77 | static inline __attribute_const__ unative_t MASK(unative_t v) 78 | { 79 | unative_t vv; 80 | 81 | vv = v & NBYTES(0x80); 82 | vv = (vv << 1) - (vv >> 7); /* Overflow on the top bit is OK */ 83 | return vv; 84 | } 85 | 86 | 87 | void raid6_gen_syndrome(int disks, size_t bytes, void **ptrs) 88 | { 89 | uint8_t **dptr = (uint8_t **)ptrs; 90 | uint8_t *p, *q; 91 | int d, z, z0; 92 | 93 | unative_t wd0, wq0, wp0, w10, w20; 94 | 95 | z0 = disks - 3; /* Highest data disk */ 96 | p = dptr[z0+1]; /* XOR parity */ 97 | q = dptr[z0+2]; /* RS syndrome */ 98 | 99 | for ( d = 0 ; d < bytes ; d += NSIZE*1 ) { 100 | wq0 = wp0 = get_unaligned_native(&dptr[z0][d+0*NSIZE]); 101 | for ( z = z0-1 ; z >= 0 ; z-- ) { 102 | wd0 = get_unaligned_native(&dptr[z][d+0*NSIZE]); 103 | wp0 ^= wd0; 104 | w20 = MASK(wq0); 105 | w10 = SHLBYTE(wq0); 106 | w20 &= NBYTES(0x1d); 107 | w10 ^= w20; 108 | wq0 = w10 ^ wd0; 109 | } 110 | put_unaligned_native(wp0, &p[d+NSIZE*0]); 111 | put_unaligned_native(wq0, &q[d+NSIZE*0]); 112 | } 113 | } 114 | 115 | static void xor_range(char *dst, const char*src, size_t size) 116 | { 117 | /* Move to DWORD aligned */ 118 | while (size && ((unsigned long)dst & sizeof(unsigned long))) { 119 | *dst++ ^= *src++; 120 | size--; 121 | } 122 | 123 | /* DWORD aligned part */ 124 | while (size >= sizeof(unsigned long)) { 125 | *(unsigned long *)dst ^= *(unsigned long *)src; 126 | src += sizeof(unsigned long); 127 | dst += sizeof(unsigned long); 128 | size -= sizeof(unsigned long); 129 | } 130 | /* Remaining */ 131 | while (size) { 132 | *dst++ ^= *src++; 133 | size--; 134 | } 135 | } 136 | 137 | /* 138 | * Generate desired data/parity stripe for RAID5 139 | * 140 | * @nr_devs: Total number of devices, including parity 141 | * @stripe_len: Stripe length 142 | * @data: Data, with special layout: 143 | * data[0]: Data stripe 0 144 | * data[nr_devs-2]: Last data stripe 145 | * data[nr_devs-1]: RAID5 parity 146 | * @dest: To generate which data. should follow above data layout 147 | */ 148 | int raid5_gen_result(int nr_devs, size_t stripe_len, int dest, void **data) 149 | { 150 | int i; 151 | char *buf = data[dest]; 152 | 153 | /* Validation check */ 154 | if (stripe_len <= 0 || stripe_len != BTRFS_STRIPE_LEN) { 155 | error("invalid parameter for %s", __func__); 156 | return -EINVAL; 157 | } 158 | 159 | if (dest >= nr_devs || nr_devs < 2) { 160 | error("invalid parameter for %s", __func__); 161 | return -EINVAL; 162 | } 163 | /* Shortcut for 2 devs RAID5, which is just RAID1 */ 164 | if (nr_devs == 2) { 165 | memcpy(data[dest], data[1 - dest], stripe_len); 166 | return 0; 167 | } 168 | memset(buf, 0, stripe_len); 169 | for (i = 0; i < nr_devs; i++) { 170 | if (i == dest) 171 | continue; 172 | xor_range(buf, data[i], stripe_len); 173 | } 174 | return 0; 175 | } 176 | 177 | /* 178 | * Raid 6 recovery code copied from kernel lib/raid6/recov.c. 179 | * With modifications: 180 | * - rename from raid6_2data_recov_intx1 181 | * - kfree/free modification for btrfs-progs 182 | */ 183 | int raid6_recov_data2(int nr_devs, size_t stripe_len, int dest1, int dest2, 184 | void **data) 185 | { 186 | u8 *p, *q, *dp, *dq; 187 | u8 px, qx, db; 188 | const u8 *pbmul; /* P multiplier table for B data */ 189 | const u8 *qmul; /* Q multiplier table (for both) */ 190 | char *zero_mem1, *zero_mem2; 191 | int ret = 0; 192 | 193 | /* Early check */ 194 | if (dest1 < 0 || dest1 >= nr_devs - 2 || 195 | dest2 < 0 || dest2 >= nr_devs - 2 || dest1 >= dest2) 196 | return -EINVAL; 197 | 198 | zero_mem1 = calloc(1, stripe_len); 199 | zero_mem2 = calloc(1, stripe_len); 200 | if (!zero_mem1 || !zero_mem2) { 201 | free(zero_mem1); 202 | free(zero_mem2); 203 | return -ENOMEM; 204 | } 205 | 206 | p = (u8 *)data[nr_devs - 2]; 207 | q = (u8 *)data[nr_devs - 1]; 208 | 209 | /* Compute syndrome with zero for the missing data pages 210 | Use the dead data pages as temporary storage for 211 | delta p and delta q */ 212 | dp = (u8 *)data[dest1]; 213 | data[dest1] = (void *)zero_mem1; 214 | data[nr_devs - 2] = dp; 215 | dq = (u8 *)data[dest2]; 216 | data[dest2] = (void *)zero_mem2; 217 | data[nr_devs - 1] = dq; 218 | 219 | raid6_gen_syndrome(nr_devs, stripe_len, data); 220 | 221 | /* Restore pointer table */ 222 | data[dest1] = dp; 223 | data[dest2] = dq; 224 | data[nr_devs - 2] = p; 225 | data[nr_devs - 1] = q; 226 | 227 | /* Now, pick the proper data tables */ 228 | pbmul = raid6_gfmul[raid6_gfexi[dest2 - dest1]]; 229 | qmul = raid6_gfmul[raid6_gfinv[raid6_gfexp[dest1]^raid6_gfexp[dest2]]]; 230 | 231 | /* Now do it... */ 232 | while ( stripe_len-- ) { 233 | px = *p ^ *dp; 234 | qx = qmul[*q ^ *dq]; 235 | *dq++ = db = pbmul[px] ^ qx; /* Reconstructed B */ 236 | *dp++ = db ^ px; /* Reconstructed A */ 237 | p++; q++; 238 | } 239 | 240 | free(zero_mem1); 241 | free(zero_mem2); 242 | return ret; 243 | } 244 | 245 | /* 246 | * Raid 6 recover code copied from kernel lib/raid6/recov.c 247 | * - rename from raid6_datap_recov_intx1() 248 | * - parameter changed from faila to dest1 249 | */ 250 | int raid6_recov_datap(int nr_devs, size_t stripe_len, int dest1, void **data) 251 | { 252 | u8 *p, *q, *dq; 253 | const u8 *qmul; /* Q multiplier table */ 254 | char *zero_mem; 255 | 256 | p = (u8 *)data[nr_devs - 2]; 257 | q = (u8 *)data[nr_devs - 1]; 258 | 259 | zero_mem = calloc(1, stripe_len); 260 | if (!zero_mem) 261 | return -ENOMEM; 262 | 263 | /* Compute syndrome with zero for the missing data page 264 | Use the dead data page as temporary storage for delta q */ 265 | dq = (u8 *)data[dest1]; 266 | data[dest1] = (void *)zero_mem; 267 | data[nr_devs - 1] = dq; 268 | 269 | raid6_gen_syndrome(nr_devs, stripe_len, data); 270 | 271 | /* Restore pointer table */ 272 | data[dest1] = dq; 273 | data[nr_devs - 1] = q; 274 | 275 | /* Now, pick the proper data tables */ 276 | qmul = raid6_gfmul[raid6_gfinv[raid6_gfexp[dest1]]]; 277 | 278 | /* Now do it... */ 279 | while ( stripe_len-- ) { 280 | *p++ ^= *dq = qmul[*q ^ *dq]; 281 | q++; dq++; 282 | } 283 | return 0; 284 | } 285 | 286 | static int get_raid56_devs_min(u64 profile) 287 | { 288 | /* Btrfs supports 2 device RAID5, even though it's the same as RAID1 */ 289 | if (profile & BTRFS_BLOCK_GROUP_RAID5) 290 | return 2; 291 | 292 | /* The same applies to RAID6, thus 3 devices RAID6 is also allowed */ 293 | if (profile & BTRFS_BLOCK_GROUP_RAID6) 294 | return 3; 295 | return -EINVAL; 296 | } 297 | /* Original raid56 recovery wrapper */ 298 | int raid56_recov(int nr_devs, size_t stripe_len, u64 profile, int dest1, 299 | int dest2, void **data) 300 | { 301 | int min_devs; 302 | int ret; 303 | 304 | min_devs = get_raid56_devs_min(profile); 305 | if (min_devs < 0) 306 | return min_devs; 307 | if (nr_devs < min_devs) 308 | return -EINVAL; 309 | 310 | /* Nothing to recover */ 311 | if (dest1 == -1 && dest2 == -1) 312 | return 0; 313 | 314 | /* Reorder dest1/2, so only dest2 can be -1 */ 315 | if (dest1 == -1) { 316 | dest1 = dest2; 317 | dest2 = -1; 318 | } else if (dest2 != -1 && dest1 != -1) { 319 | /* Reorder dest1/2, ensure dest2 > dest1 */ 320 | if (dest1 > dest2) { 321 | int tmp; 322 | 323 | tmp = dest2; 324 | dest2 = dest1; 325 | dest1 = tmp; 326 | } 327 | } 328 | 329 | if (profile & BTRFS_BLOCK_GROUP_RAID5) { 330 | if (dest2 != -1) 331 | return 1; 332 | return raid5_gen_result(nr_devs, stripe_len, dest1, data); 333 | } 334 | 335 | /* RAID6 one dev corrupted case*/ 336 | if (dest2 == -1) { 337 | /* Regenerate P/Q */ 338 | if (dest1 == nr_devs - 1 || dest1 == nr_devs - 2) { 339 | raid6_gen_syndrome(nr_devs, stripe_len, data); 340 | return 0; 341 | } 342 | 343 | /* Regenerate data from P */ 344 | return raid5_gen_result(nr_devs - 1, stripe_len, dest1, data); 345 | } 346 | 347 | /* P/Q bot corrupted */ 348 | if (dest1 == nr_devs - 2 && dest2 == nr_devs - 1) { 349 | raid6_gen_syndrome(nr_devs, stripe_len, data); 350 | return 0; 351 | } 352 | 353 | /* 2 Data corrupted */ 354 | if (dest2 < nr_devs - 2) 355 | return raid6_recov_data2(nr_devs, stripe_len, dest1, dest2, 356 | data); 357 | /* Data and P*/ 358 | if (dest2 == nr_devs - 2) 359 | return raid6_recov_datap(nr_devs, stripe_len, dest1, data); 360 | 361 | /* 362 | * Final case, Data and Q, recover data first then regenerate Q 363 | */ 364 | ret = raid5_gen_result(nr_devs - 1, stripe_len, dest1, data); 365 | if (ret < 0) 366 | return ret; 367 | raid6_gen_syndrome(nr_devs, stripe_len, data); 368 | return 0; 369 | } 370 | -------------------------------------------------------------------------------- /metadata.c: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: MIT 2 | 3 | #include 4 | #include "metadata.h" 5 | #include "volumes.h" 6 | #include "messages.h" 7 | #include "hash.h" 8 | 9 | void free_extent_buffer(struct extent_buffer *eb) 10 | { 11 | struct btrfs_fs_info *fs_info; 12 | if (!eb) 13 | return; 14 | ASSERT(eb->refs > 0); 15 | 16 | fs_info = eb->fs_info; 17 | pthread_mutex_lock(&fs_info->eb_lock); 18 | eb->refs--; 19 | if (eb->refs == 0) { 20 | rb_erase(&eb->node, &eb->fs_info->eb_root); 21 | free(eb); 22 | } 23 | pthread_mutex_unlock(&fs_info->eb_lock); 24 | } 25 | 26 | 27 | void btrfs_init_path(struct btrfs_path *path) 28 | { 29 | memset(path, 0, sizeof(*path)); 30 | } 31 | 32 | void btrfs_release_path(struct btrfs_path *path) 33 | { 34 | int i; 35 | 36 | for (i = BTRFS_MAX_LEVEL - 1; i >= 0; i--) { 37 | free_extent_buffer(path->nodes[i]); 38 | path->nodes[i] = NULL; 39 | path->slots[i] = 0; 40 | } 41 | } 42 | 43 | /* Check the sanity of the tree block, before doing the csum check */ 44 | static int verify_tree_block(struct extent_buffer *eb, u8 level, 45 | u64 transid, struct btrfs_key *first_key) 46 | { 47 | if (btrfs_header_bytenr(eb) != eb->start) { 48 | error("tree block %llu bad bytenr, has %llu expect %llu", 49 | eb->start, btrfs_header_bytenr(eb), eb->start); 50 | return -EIO; 51 | } 52 | if (btrfs_header_level(eb) != level) { 53 | error("tree block %llu bad level, has %u expect %u", 54 | eb->start, btrfs_header_level(eb), level); 55 | return -EIO; 56 | } 57 | if (btrfs_header_generation(eb) != transid) { 58 | error("tree block %llu bad trasid, has %llu expect %llu", 59 | eb->start, btrfs_header_generation(eb), transid); 60 | return -EIO; 61 | } 62 | if (first_key) { 63 | struct btrfs_key found_key; 64 | 65 | if (btrfs_header_level(eb)) 66 | btrfs_node_key_to_cpu(eb, &found_key, 0); 67 | else 68 | btrfs_item_key_to_cpu(eb, &found_key, 0); 69 | if (btrfs_comp_cpu_keys(first_key, &found_key)) { 70 | error( 71 | "tree block %llu key mismatch, has (%llu %u %llu) want (%llu %u %llu)", 72 | eb->start, found_key.objectid, found_key.type, 73 | found_key.offset, first_key->objectid, 74 | first_key->type, first_key->offset); 75 | return -EIO; 76 | } 77 | } 78 | return 0; 79 | } 80 | 81 | struct extent_buffer *btrfs_read_tree_block(struct btrfs_fs_info *fs_info, 82 | u64 logical, u8 level, u64 transid, 83 | struct btrfs_key *first_key) 84 | { 85 | struct rb_node **p = &fs_info->eb_root.rb_node; 86 | struct rb_node *parent = NULL; 87 | struct extent_buffer *eb; 88 | int mirror_nr; 89 | int max_mirror; 90 | int ret = 0; 91 | 92 | pthread_mutex_lock(&fs_info->eb_lock); 93 | while (*p) { 94 | parent = *p; 95 | eb = rb_entry(parent, struct extent_buffer, node); 96 | if (logical < eb->start) { 97 | p = &(*p)->rb_left; 98 | } else if (logical > eb->start) { 99 | p = &(*p)->rb_right; 100 | } else { 101 | /* 102 | * Even for cached tree block, we still need to verify 103 | * it in case of bad level/transid/first_key. 104 | */ 105 | ret = verify_tree_block(eb, level, transid, first_key); 106 | if (ret < 0) { 107 | pthread_mutex_unlock(&fs_info->eb_lock); 108 | return ERR_PTR(ret); 109 | } 110 | 111 | eb->refs++; 112 | pthread_mutex_unlock(&fs_info->eb_lock); 113 | return eb; 114 | } 115 | } 116 | 117 | max_mirror = btrfs_num_copies(fs_info, logical); 118 | if (max_mirror < 0) { 119 | pthread_mutex_unlock(&fs_info->eb_lock); 120 | return ERR_PTR(max_mirror); 121 | } 122 | 123 | eb = calloc(1, sizeof(*eb) + fs_info->nodesize); 124 | if (!eb) { 125 | pthread_mutex_unlock(&fs_info->eb_lock); 126 | return ERR_PTR(-ENOMEM); 127 | } 128 | eb->start = logical; 129 | eb->len = fs_info->nodesize; 130 | eb->refs = 0; 131 | eb->fs_info = fs_info; 132 | for (mirror_nr = 1; mirror_nr <= max_mirror; mirror_nr++) { 133 | u8 csum[BTRFS_CSUM_SIZE]; 134 | 135 | ret = btrfs_read_logical(fs_info, eb->data, 136 | fs_info->nodesize, logical, mirror_nr); 137 | /* Btrfs metadata should be read out in one go. */ 138 | if (ret < fs_info->nodesize) 139 | continue; 140 | ret = verify_tree_block(eb, level, transid, first_key); 141 | if (ret < 0) 142 | continue; 143 | btrfs_csum_data(fs_info->csum_type, 144 | (u8 *)eb->data + BTRFS_CSUM_SIZE, csum, 145 | fs_info->nodesize - BTRFS_CSUM_SIZE); 146 | if (memcmp(csum, eb->data, fs_info->csum_size)) 147 | continue; 148 | /* TODO: Add extra sanity check on the tree block contents */ 149 | eb->refs++; 150 | rb_link_node(&eb->node, parent, p); 151 | rb_insert_color(&eb->node, &fs_info->eb_root); 152 | pthread_mutex_unlock(&fs_info->eb_lock); 153 | return eb; 154 | } 155 | 156 | free(eb); 157 | pthread_mutex_unlock(&fs_info->eb_lock); 158 | return ERR_PTR(-EIO); 159 | } 160 | 161 | /* 162 | * Binary search inside an extent buffer. 163 | * 164 | * Since btrfs extent buffer has all its items/nodes put together sequentially, 165 | * we can do a binary search here. 166 | */ 167 | static int generic_bin_search(struct extent_buffer *eb, unsigned long p, 168 | int item_size, const struct btrfs_key *key, 169 | int max, int *slot) 170 | { 171 | int low = 0; 172 | int high = max; 173 | int mid; 174 | int ret; 175 | unsigned long offset; 176 | 177 | while(low < high) { 178 | struct btrfs_disk_key *tmp; 179 | struct btrfs_key tmp_cpu_key; 180 | 181 | mid = (low + high) / 2; 182 | offset = p + mid * item_size; 183 | 184 | tmp = (struct btrfs_disk_key *)(eb->data + offset); 185 | btrfs_disk_key_to_cpu(&tmp_cpu_key, tmp); 186 | ret = btrfs_comp_cpu_keys(&tmp_cpu_key, key); 187 | 188 | if (ret < 0) 189 | low = mid + 1; 190 | else if (ret > 0) 191 | high = mid; 192 | else { 193 | *slot = mid; 194 | return 0; 195 | } 196 | } 197 | *slot = low; 198 | return 1; 199 | } 200 | 201 | /* Locate the slot inside the extent buffer */ 202 | static int search_slot_in_eb(struct extent_buffer *eb, 203 | const struct btrfs_key *key, int *slot) 204 | { 205 | if (btrfs_header_level(eb) == 0) 206 | return generic_bin_search(eb, 207 | offsetof(struct btrfs_leaf, items), 208 | sizeof(struct btrfs_item), 209 | key, btrfs_header_nritems(eb), 210 | slot); 211 | else 212 | return generic_bin_search(eb, 213 | offsetof(struct btrfs_node, ptrs), 214 | sizeof(struct btrfs_key_ptr), 215 | key, btrfs_header_nritems(eb), 216 | slot); 217 | } 218 | 219 | static struct extent_buffer *read_node_child(struct extent_buffer *parent, 220 | int slot) 221 | { 222 | struct btrfs_key first_key; 223 | u64 bytenr; 224 | u64 gen; 225 | 226 | ASSERT(btrfs_header_level(parent) > 0); 227 | ASSERT(slot < btrfs_header_nritems(parent)); 228 | 229 | bytenr = btrfs_node_blockptr(parent, slot); 230 | gen = btrfs_node_ptr_generation(parent, slot); 231 | btrfs_node_key_to_cpu(parent, &first_key, slot); 232 | 233 | return btrfs_read_tree_block(parent->fs_info, bytenr, 234 | btrfs_header_level(parent) - 1, gen, &first_key); 235 | } 236 | 237 | int __btrfs_search_slot(struct btrfs_root *root, struct btrfs_path *path, 238 | struct btrfs_key *key) 239 | { 240 | int level; 241 | int ret = 0; 242 | 243 | /* The path must not hold any tree blocks, or we will leak some eb */ 244 | ASSERT(path->nodes[0] == NULL); 245 | level = btrfs_header_level(root->node); 246 | path->nodes[level] = extent_buffer_get(root->node); 247 | 248 | for (; level >= 0; level--) { 249 | int slot; 250 | 251 | ASSERT(path->nodes[level]); 252 | ret = search_slot_in_eb(path->nodes[level], key, &slot); 253 | /* 254 | * For nodes if we didn't found a match, we should go previous 255 | * slot. 256 | * As the current slot has key value larger than our target, 257 | * continue search will never hit our target, like this example: 258 | * 259 | * key = (1, 1, 1) 260 | * 261 | * (1, 1, 0) (1, 2, 0) 262 | * / \ 263 | * (1, 1, 0), (1, 1, 1) (1, 2, 0), (1, 2, 1) 264 | * 265 | * In above example, we should go through the child of (1, 1, 0) 266 | * other than the slot returned (1, 2, 0). 267 | * Not to mention returned slot may be unused. 268 | */ 269 | if (level && ret && slot > 0) 270 | slot--; 271 | path->slots[level] = slot; 272 | 273 | /* Now read the node for next level */ 274 | if (level > 0) { 275 | struct extent_buffer *eb; 276 | 277 | eb = read_node_child(path->nodes[level], slot); 278 | if (IS_ERR(eb)) { 279 | ret = PTR_ERR(eb); 280 | goto error; 281 | } 282 | path->nodes[level - 1] = eb; 283 | } 284 | } 285 | return ret; 286 | error: 287 | btrfs_release_path(path); 288 | return ret; 289 | } 290 | 291 | int btrfs_next_leaf(struct btrfs_path *path) 292 | { 293 | int slot; 294 | int level; 295 | 296 | for (level = 1; level < BTRFS_MAX_LEVEL; level++) { 297 | /* No more parent */ 298 | if (!path->nodes[level]) 299 | return 1; 300 | 301 | slot = path->slots[level] + 1; 302 | /* Parent has next slot, continue to next step */ 303 | if (slot < btrfs_header_nritems(path->nodes[level])) { 304 | path->slots[level] = slot; 305 | break; 306 | } 307 | /* Parent has no next slot, continue to higher level */ 308 | } 309 | if (level >= BTRFS_MAX_LEVEL) 310 | return 1; 311 | 312 | /* Now we're at @slot of @level, go to the left most path */ 313 | for (; level; level--) { 314 | struct extent_buffer *eb; 315 | 316 | slot = path->slots[level]; 317 | eb = read_node_child(path->nodes[level], slot); 318 | if (IS_ERR(eb)) { 319 | btrfs_release_path(path); 320 | return PTR_ERR(eb); 321 | } 322 | free_extent_buffer(path->nodes[level - 1]); 323 | path->nodes[level - 1] = eb; 324 | path->slots[level - 1] = 0; 325 | } 326 | return 0; 327 | } 328 | 329 | int btrfs_search_key(struct btrfs_root *root, struct btrfs_path *path, 330 | struct btrfs_key *key) 331 | { 332 | int ret; 333 | 334 | ret = __btrfs_search_slot(root, path, key); 335 | if (ret > 0) 336 | ret = -ENOENT; 337 | if (ret < 0) 338 | btrfs_release_path(path); 339 | return ret; 340 | } 341 | 342 | static int key_in_range(struct btrfs_key *key, 343 | struct btrfs_key_range *range) 344 | { 345 | struct btrfs_key range_key1; 346 | struct btrfs_key range_key2; 347 | 348 | range_key1.objectid = range->objectid; 349 | range_key1.type = range->type_start; 350 | range_key1.offset = range->offset_start; 351 | 352 | range_key2.objectid = range->objectid; 353 | range_key2.type = range->type_end; 354 | range_key2.offset = range->offset_end; 355 | 356 | return (btrfs_comp_cpu_keys(&range_key1, key) <= 0 && 357 | btrfs_comp_cpu_keys(key, &range_key2) <= 0); 358 | } 359 | 360 | int btrfs_search_keys_start(struct btrfs_root *root, struct btrfs_path *path, 361 | struct btrfs_key_range *range) 362 | { 363 | struct btrfs_key key; 364 | int ret; 365 | 366 | key.objectid = range->objectid; 367 | key.type = range->type_start; 368 | key.offset = range->offset_start; 369 | 370 | ret = __btrfs_search_slot(root, path, &key); 371 | /* Either found or error */ 372 | if (ret <= 0) 373 | return ret; 374 | 375 | /* Check if current slot is used first */ 376 | if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) { 377 | ret = btrfs_next_leaf(path); 378 | if (ret > 0) 379 | ret = -ENOENT; 380 | if (ret < 0) { 381 | btrfs_release_path(path); 382 | return ret; 383 | } 384 | } 385 | 386 | /* Check if the found key is in the target range */ 387 | btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 388 | if (!key_in_range(&key, range)) { 389 | btrfs_release_path(path); 390 | return -ENOENT; 391 | } 392 | return 0; 393 | } 394 | 395 | int btrfs_search_keys_next(struct btrfs_path *path, 396 | struct btrfs_key_range *range) 397 | { 398 | struct btrfs_key key; 399 | int ret; 400 | 401 | ASSERT(path->nodes[0]); 402 | 403 | path->slots[0]++; 404 | if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) { 405 | ret = btrfs_next_leaf(path); 406 | if (ret) 407 | return ret; 408 | } 409 | 410 | btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 411 | if (key_in_range(&key, range)) 412 | return 0; 413 | return 1; 414 | } 415 | 416 | static struct btrfs_root *find_cached_subvol_root(struct btrfs_fs_info *fs_info, 417 | u64 rootid) 418 | { 419 | struct rb_node *node = fs_info->subvols_root.rb_node; 420 | struct btrfs_root *root; 421 | 422 | while (node) { 423 | root = rb_entry(node, struct btrfs_root, rb_node); 424 | 425 | if (rootid < root->root_key.objectid) 426 | node = node->rb_left; 427 | else if (rootid > root->root_key.objectid) 428 | node = node->rb_right; 429 | else 430 | return root; 431 | } 432 | return NULL; 433 | } 434 | 435 | static int search_root_item(struct btrfs_fs_info *fs_info, u64 rootid, 436 | struct btrfs_key *found_key, 437 | struct btrfs_root_item *ri) 438 | { 439 | struct btrfs_key_range key_range; 440 | struct btrfs_path path; 441 | int ret; 442 | 443 | /* At this stage, root tree must be initialized */ 444 | ASSERT(fs_info->tree_root); 445 | 446 | btrfs_init_path(&path); 447 | key_range.objectid = rootid; 448 | key_range.type_start = key_range.type_end = BTRFS_ROOT_ITEM_KEY; 449 | key_range.offset_start = 0; 450 | key_range.offset_end = (u64)-1; 451 | 452 | ret = btrfs_search_keys_start(fs_info->tree_root, &path, &key_range); 453 | if (ret < 0) 454 | return ret; 455 | 456 | memset(ri, 0, sizeof(*ri)); 457 | read_extent_buffer(path.nodes[0], ri, 458 | btrfs_item_ptr_offset(path.nodes[0], path.slots[0]), 459 | btrfs_item_size_nr(path.nodes[0], path.slots[0])); 460 | btrfs_item_key_to_cpu(path.nodes[0], found_key, path.slots[0]); 461 | btrfs_release_path(&path); 462 | return 0; 463 | } 464 | 465 | struct btrfs_root *btrfs_read_root(struct btrfs_fs_info *fs_info, u64 rootid) 466 | { 467 | struct btrfs_super_block *sb = &fs_info->super_copy; 468 | struct btrfs_root *root; 469 | struct btrfs_key root_key = {}; 470 | u64 gen; 471 | u64 bytenr; 472 | u8 level; 473 | int ret; 474 | 475 | /* For non-subvolume trees, return cached result */ 476 | if (rootid == BTRFS_CHUNK_TREE_OBJECTID && fs_info->chunk_root) 477 | return fs_info->chunk_root; 478 | if (rootid == BTRFS_ROOT_TREE_OBJECTID && fs_info->tree_root) 479 | return fs_info->tree_root; 480 | if (rootid == BTRFS_CSUM_TREE_OBJECTID && fs_info->csum_root) 481 | return fs_info->csum_root; 482 | 483 | root = find_cached_subvol_root(fs_info, rootid); 484 | if (root) 485 | return root; 486 | 487 | root = calloc(1, sizeof(*root)); 488 | if (!root) 489 | return ERR_PTR(-ENOMEM); 490 | 491 | RB_CLEAR_NODE(&root->rb_node); 492 | root->fs_info = fs_info; 493 | 494 | root_key.type = BTRFS_ROOT_ITEM_KEY; 495 | root_key.offset = 0; 496 | /* 497 | * Allocate a new root and read from disk, we need to grab the info for 498 | * the root tree block. 499 | * 500 | * For chunk and root tree, they need to be grabbed from superblock, all 501 | * other trees needs to be grabed from tree root. 502 | */ 503 | if (rootid == BTRFS_CHUNK_TREE_OBJECTID) { 504 | gen = btrfs_super_chunk_root_generation(sb); 505 | level = btrfs_super_chunk_root_level(sb); 506 | bytenr = btrfs_super_chunk_root(sb); 507 | root_key.objectid = rootid; 508 | root_key.type = BTRFS_ROOT_ITEM_KEY; 509 | root_key.offset = 0; 510 | } else if (rootid == BTRFS_ROOT_TREE_OBJECTID){ 511 | gen = btrfs_super_generation(sb); 512 | level = btrfs_super_root_level(sb); 513 | bytenr = btrfs_super_root(sb); 514 | root_key.objectid = rootid; 515 | } else { 516 | struct btrfs_root_item ri; 517 | 518 | ret = search_root_item(fs_info, rootid, &root_key, &ri); 519 | if (ret < 0) 520 | return ERR_PTR(ret); 521 | gen = btrfs_root_generation(&ri); 522 | level = btrfs_root_level(&ri); 523 | bytenr = btrfs_root_bytenr(&ri); 524 | root->root_dirid = btrfs_root_dirid(&ri); 525 | } 526 | 527 | memcpy(&root->root_key, &root_key, sizeof(root_key)); 528 | root->node = btrfs_read_tree_block(fs_info, bytenr, level, gen, NULL); 529 | if (IS_ERR(root->node)) { 530 | ret = PTR_ERR(root->node); 531 | free(root); 532 | return ERR_PTR(ret); 533 | } 534 | 535 | /* If it's a subvolume tree, also add it to subvols_root rb tree */ 536 | if (is_fstree(rootid)) { 537 | struct rb_node **p = &fs_info->subvols_root.rb_node; 538 | struct rb_node *parent = NULL; 539 | struct btrfs_root *entry; 540 | 541 | while (*p) { 542 | parent = *p; 543 | entry = rb_entry(parent, struct btrfs_root, rb_node); 544 | 545 | if (rootid < entry->root_key.objectid) { 546 | p = &(*p)->rb_left; 547 | } else if (rootid > entry->root_key.objectid) { 548 | p = &(*p)->rb_right; 549 | } else { 550 | free_extent_buffer(root->node); 551 | free(root); 552 | return ERR_PTR(-EEXIST); 553 | } 554 | } 555 | rb_link_node(&root->rb_node, parent, p); 556 | rb_insert_color(&root->rb_node, &fs_info->subvols_root); 557 | } 558 | return root; 559 | } 560 | -------------------------------------------------------------------------------- /libs/list.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2007 Oracle. All rights reserved. 3 | * 4 | * This program is free software; you can redistribute it and/or 5 | * modify it under the terms of the GNU General Public 6 | * License v2 as published by the Free Software Foundation. 7 | * 8 | * This program is distributed in the hope that it will be useful, 9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 | * General Public License for more details. 12 | * 13 | * You should have received a copy of the GNU General Public 14 | * License along with this program; if not, write to the 15 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 16 | * Boston, MA 021110-1307, USA. 17 | */ 18 | 19 | #ifndef _LINUX_LIST_H 20 | #define _LINUX_LIST_H 21 | 22 | #define LIST_POISON1 ((struct list_head *) 0x00100100) 23 | #define LIST_POISON2 ((struct list_head *) 0x00200200) 24 | 25 | /* 26 | * Simple doubly linked list implementation. 27 | * 28 | * Some of the internal functions ("__xxx") are useful when 29 | * manipulating whole lists rather than single entries, as 30 | * sometimes we already know the next/prev entries and we can 31 | * generate better code by using them directly rather than 32 | * using the generic single-entry routines. 33 | */ 34 | 35 | struct list_head { 36 | struct list_head *next, *prev; 37 | }; 38 | 39 | #define LIST_HEAD_INIT(name) { &(name), &(name) } 40 | 41 | #define LIST_HEAD(name) \ 42 | struct list_head name = LIST_HEAD_INIT(name) 43 | 44 | static inline void INIT_LIST_HEAD(struct list_head *list) 45 | { 46 | list->next = list; 47 | list->prev = list; 48 | } 49 | 50 | /* 51 | * Insert a new entry between two known consecutive entries. 52 | * 53 | * This is only for internal list manipulation where we know 54 | * the prev/next entries already! 55 | */ 56 | #ifndef CONFIG_DEBUG_LIST 57 | static inline void __list_add(struct list_head *xnew, 58 | struct list_head *prev, 59 | struct list_head *next) 60 | { 61 | next->prev = xnew; 62 | xnew->next = next; 63 | xnew->prev = prev; 64 | prev->next = xnew; 65 | } 66 | #else 67 | extern void __list_add(struct list_head *xnew, 68 | struct list_head *prev, 69 | struct list_head *next); 70 | #endif 71 | 72 | /** 73 | * list_add - add a new entry 74 | * @new: new entry to be added 75 | * @head: list head to add it after 76 | * 77 | * Insert a new entry after the specified head. 78 | * This is good for implementing stacks. 79 | */ 80 | #ifndef CONFIG_DEBUG_LIST 81 | static inline void list_add(struct list_head *xnew, struct list_head *head) 82 | { 83 | __list_add(xnew, head, head->next); 84 | } 85 | #else 86 | extern void list_add(struct list_head *xnew, struct list_head *head); 87 | #endif 88 | 89 | 90 | /** 91 | * list_add_tail - add a new entry 92 | * @new: new entry to be added 93 | * @head: list head to add it before 94 | * 95 | * Insert a new entry before the specified head. 96 | * This is useful for implementing queues. 97 | */ 98 | static inline void list_add_tail(struct list_head *xnew, struct list_head *head) 99 | { 100 | __list_add(xnew, head->prev, head); 101 | } 102 | 103 | /* 104 | * Delete a list entry by making the prev/next entries 105 | * point to each other. 106 | * 107 | * This is only for internal list manipulation where we know 108 | * the prev/next entries already! 109 | */ 110 | static inline void __list_del(struct list_head * prev, struct list_head * next) 111 | { 112 | next->prev = prev; 113 | prev->next = next; 114 | } 115 | 116 | /** 117 | * list_del - deletes entry from list. 118 | * @entry: the element to delete from the list. 119 | * Note: list_empty on entry does not return true after this, the entry is 120 | * in an undefined state. 121 | */ 122 | #ifndef CONFIG_DEBUG_LIST 123 | static inline void list_del(struct list_head *entry) 124 | { 125 | __list_del(entry->prev, entry->next); 126 | entry->next = LIST_POISON1; 127 | entry->prev = LIST_POISON2; 128 | } 129 | #else 130 | extern void list_del(struct list_head *entry); 131 | #endif 132 | 133 | /** 134 | * list_replace - replace old entry by new one 135 | * @old : the element to be replaced 136 | * @new : the new element to insert 137 | * Note: if 'old' was empty, it will be overwritten. 138 | */ 139 | static inline void list_replace(struct list_head *old, 140 | struct list_head *xnew) 141 | { 142 | xnew->next = old->next; 143 | xnew->next->prev = xnew; 144 | xnew->prev = old->prev; 145 | xnew->prev->next = xnew; 146 | } 147 | 148 | static inline void list_replace_init(struct list_head *old, 149 | struct list_head *xnew) 150 | { 151 | list_replace(old, xnew); 152 | INIT_LIST_HEAD(old); 153 | } 154 | /** 155 | * list_del_init - deletes entry from list and reinitialize it. 156 | * @entry: the element to delete from the list. 157 | */ 158 | static inline void list_del_init(struct list_head *entry) 159 | { 160 | __list_del(entry->prev, entry->next); 161 | INIT_LIST_HEAD(entry); 162 | } 163 | 164 | /** 165 | * list_move - delete from one list and add as another's head 166 | * @list: the entry to move 167 | * @head: the head that will precede our entry 168 | */ 169 | static inline void list_move(struct list_head *list, struct list_head *head) 170 | { 171 | __list_del(list->prev, list->next); 172 | list_add(list, head); 173 | } 174 | 175 | /** 176 | * list_move_tail - delete from one list and add as another's tail 177 | * @list: the entry to move 178 | * @head: the head that will follow our entry 179 | */ 180 | static inline void list_move_tail(struct list_head *list, 181 | struct list_head *head) 182 | { 183 | __list_del(list->prev, list->next); 184 | list_add_tail(list, head); 185 | } 186 | 187 | /** 188 | * list_is_last - tests whether @list is the last entry in list @head 189 | * @list: the entry to test 190 | * @head: the head of the list 191 | */ 192 | static inline int list_is_last(const struct list_head *list, 193 | const struct list_head *head) 194 | { 195 | return list->next == head; 196 | } 197 | 198 | /** 199 | * list_empty - tests whether a list is empty 200 | * @head: the list to test. 201 | */ 202 | static inline int list_empty(const struct list_head *head) 203 | { 204 | return head->next == head; 205 | } 206 | 207 | /** 208 | * list_empty_careful - tests whether a list is empty and not being modified 209 | * @head: the list to test 210 | * 211 | * Description: 212 | * tests whether a list is empty _and_ checks that no other CPU might be 213 | * in the process of modifying either member (next or prev) 214 | * 215 | * NOTE: using list_empty_careful() without synchronization 216 | * can only be safe if the only activity that can happen 217 | * to the list entry is list_del_init(). Eg. it cannot be used 218 | * if another CPU could re-list_add() it. 219 | */ 220 | static inline int list_empty_careful(const struct list_head *head) 221 | { 222 | struct list_head *next = head->next; 223 | return (next == head) && (next == head->prev); 224 | } 225 | 226 | static inline void __list_splice(const struct list_head *list, 227 | struct list_head *prev, 228 | struct list_head *next) 229 | { 230 | struct list_head *first = list->next; 231 | struct list_head *last = list->prev; 232 | 233 | first->prev = prev; 234 | prev->next = first; 235 | 236 | last->next = next; 237 | next->prev = last; 238 | } 239 | 240 | /** 241 | * list_splice - join two lists 242 | * @list: the new list to add. 243 | * @head: the place to add it in the first list. 244 | */ 245 | static inline void list_splice(struct list_head *list, struct list_head *head) 246 | { 247 | if (!list_empty(list)) 248 | __list_splice(list, head, head->next); 249 | } 250 | 251 | /** 252 | * list_splice_tail - join two lists, each list being a queue 253 | * @list: the new list to add. 254 | * @head: the place to add it in the first list. 255 | */ 256 | static inline void list_splice_tail(struct list_head *list, 257 | struct list_head *head) 258 | { 259 | if (!list_empty(list)) 260 | __list_splice(list, head->prev, head); 261 | } 262 | 263 | /** 264 | * list_splice_init - join two lists and reinitialise the emptied list. 265 | * @list: the new list to add. 266 | * @head: the place to add it in the first list. 267 | * 268 | * The list at @list is reinitialised 269 | */ 270 | static inline void list_splice_init(struct list_head *list, 271 | struct list_head *head) 272 | { 273 | if (!list_empty(list)) { 274 | __list_splice(list, head, head->next); 275 | INIT_LIST_HEAD(list); 276 | } 277 | } 278 | 279 | /** 280 | * list_splice_tail_init - join two lists and reinitialise the emptied list 281 | * @list: the new list to add. 282 | * @head: the place to add it in the first list. 283 | * 284 | * Each of the lists is a queue. 285 | * The list at @list is reinitialised 286 | */ 287 | static inline void list_splice_tail_init(struct list_head *list, 288 | struct list_head *head) 289 | { 290 | if (!list_empty(list)) { 291 | __list_splice(list, head->prev, head); 292 | INIT_LIST_HEAD(list); 293 | } 294 | } 295 | 296 | /** 297 | * list_entry - get the struct for this entry 298 | * @ptr: the &struct list_head pointer. 299 | * @type: the type of the struct this is embedded in. 300 | * @member: the name of the list_struct within the struct. 301 | */ 302 | #define list_entry(ptr, type, member) \ 303 | container_of(ptr, type, member) 304 | 305 | /** 306 | * list_first_entry - get the first element from a list 307 | * @ptr: the list head to take the element from. 308 | * @type: the type of the struct this is embedded in. 309 | * @member: the name of the list_struct within the struct. 310 | * 311 | * Note, that list is expected to be not empty. 312 | */ 313 | #define list_first_entry(ptr, type, member) \ 314 | list_entry((ptr)->next, type, member) 315 | 316 | /** 317 | * list_next_entry - get the next element from a list 318 | * @ptr: the list head to take the element from. 319 | * @member: the name of the list_struct within the struct. 320 | * 321 | * Note, that next is expected to be not null. 322 | */ 323 | #define list_next_entry(ptr, member) \ 324 | list_entry((ptr)->member.next, typeof(*ptr), member) 325 | 326 | /** 327 | * list_for_each - iterate over a list 328 | * @pos: the &struct list_head to use as a loop cursor. 329 | * @head: the head for your list. 330 | */ 331 | #define list_for_each(pos, head) \ 332 | for (pos = (head)->next; pos != (head); \ 333 | pos = pos->next) 334 | 335 | /** 336 | * __list_for_each - iterate over a list 337 | * @pos: the &struct list_head to use as a loop cursor. 338 | * @head: the head for your list. 339 | * 340 | * This variant differs from list_for_each() in that it's the 341 | * simplest possible list iteration code, no prefetching is done. 342 | * Use this for code that knows the list to be very short (empty 343 | * or 1 entry) most of the time. 344 | */ 345 | #define __list_for_each(pos, head) \ 346 | for (pos = (head)->next; pos != (head); pos = pos->next) 347 | 348 | /** 349 | * list_for_each_prev - iterate over a list backwards 350 | * @pos: the &struct list_head to use as a loop cursor. 351 | * @head: the head for your list. 352 | */ 353 | #define list_for_each_prev(pos, head) \ 354 | for (pos = (head)->prev; pos != (head); \ 355 | pos = pos->prev) 356 | 357 | /** 358 | * list_for_each_safe - iterate over a list safe against removal of list entry 359 | * @pos: the &struct list_head to use as a loop cursor. 360 | * @n: another &struct list_head to use as temporary storage 361 | * @head: the head for your list. 362 | */ 363 | #define list_for_each_safe(pos, n, head) \ 364 | for (pos = (head)->next, n = pos->next; pos != (head); \ 365 | pos = n, n = pos->next) 366 | 367 | /** 368 | * list_for_each_entry - iterate over list of given type 369 | * @pos: the type * to use as a loop cursor. 370 | * @head: the head for your list. 371 | * @member: the name of the list_struct within the struct. 372 | */ 373 | #define list_for_each_entry(pos, head, member) \ 374 | for (pos = list_entry((head)->next, typeof(*pos), member); \ 375 | &pos->member != (head); \ 376 | pos = list_entry(pos->member.next, typeof(*pos), member)) 377 | 378 | /** 379 | * list_for_each_entry_reverse - iterate backwards over list of given type. 380 | * @pos: the type * to use as a loop cursor. 381 | * @head: the head for your list. 382 | * @member: the name of the list_struct within the struct. 383 | */ 384 | #define list_for_each_entry_reverse(pos, head, member) \ 385 | for (pos = list_entry((head)->prev, typeof(*pos), member); \ 386 | &pos->member != (head); \ 387 | pos = list_entry(pos->member.prev, typeof(*pos), member)) 388 | 389 | /** 390 | * list_prepare_entry - prepare a pos entry for use in list_for_each_entry_continue 391 | * @pos: the type * to use as a start point 392 | * @head: the head of the list 393 | * @member: the name of the list_struct within the struct. 394 | * 395 | * Prepares a pos entry for use as a start point in list_for_each_entry_continue. 396 | */ 397 | #define list_prepare_entry(pos, head, member) \ 398 | ((pos) ? : list_entry(head, typeof(*pos), member)) 399 | 400 | /** 401 | * list_for_each_entry_continue - continue iteration over list of given type 402 | * @pos: the type * to use as a loop cursor. 403 | * @head: the head for your list. 404 | * @member: the name of the list_struct within the struct. 405 | * 406 | * Continue to iterate over list of given type, continuing after 407 | * the current position. 408 | */ 409 | #define list_for_each_entry_continue(pos, head, member) \ 410 | for (pos = list_entry(pos->member.next, typeof(*pos), member); \ 411 | &pos->member != (head); \ 412 | pos = list_entry(pos->member.next, typeof(*pos), member)) 413 | 414 | /** 415 | * list_for_each_entry_from - iterate over list of given type from the current point 416 | * @pos: the type * to use as a loop cursor. 417 | * @head: the head for your list. 418 | * @member: the name of the list_struct within the struct. 419 | * 420 | * Iterate over list of given type, continuing from current position. 421 | */ 422 | #define list_for_each_entry_from(pos, head, member) \ 423 | for (; &pos->member != (head); \ 424 | pos = list_entry(pos->member.next, typeof(*pos), member)) 425 | 426 | /** 427 | * list_for_each_entry_safe - iterate over list of given type safe against removal of list entry 428 | * @pos: the type * to use as a loop cursor. 429 | * @n: another type * to use as temporary storage 430 | * @head: the head for your list. 431 | * @member: the name of the list_struct within the struct. 432 | */ 433 | #define list_for_each_entry_safe(pos, n, head, member) \ 434 | for (pos = list_entry((head)->next, typeof(*pos), member), \ 435 | n = list_entry(pos->member.next, typeof(*pos), member); \ 436 | &pos->member != (head); \ 437 | pos = n, n = list_entry(n->member.next, typeof(*n), member)) 438 | 439 | /** 440 | * list_for_each_entry_safe_continue 441 | * @pos: the type * to use as a loop cursor. 442 | * @n: another type * to use as temporary storage 443 | * @head: the head for your list. 444 | * @member: the name of the list_struct within the struct. 445 | * 446 | * Iterate over list of given type, continuing after current point, 447 | * safe against removal of list entry. 448 | */ 449 | #define list_for_each_entry_safe_continue(pos, n, head, member) \ 450 | for (pos = list_entry(pos->member.next, typeof(*pos), member), \ 451 | n = list_entry(pos->member.next, typeof(*pos), member); \ 452 | &pos->member != (head); \ 453 | pos = n, n = list_entry(n->member.next, typeof(*n), member)) 454 | 455 | /** 456 | * list_for_each_entry_safe_from 457 | * @pos: the type * to use as a loop cursor. 458 | * @n: another type * to use as temporary storage 459 | * @head: the head for your list. 460 | * @member: the name of the list_struct within the struct. 461 | * 462 | * Iterate over list of given type from current point, safe against 463 | * removal of list entry. 464 | */ 465 | #define list_for_each_entry_safe_from(pos, n, head, member) \ 466 | for (n = list_entry(pos->member.next, typeof(*pos), member); \ 467 | &pos->member != (head); \ 468 | pos = n, n = list_entry(n->member.next, typeof(*n), member)) 469 | 470 | /** 471 | * list_for_each_entry_safe_reverse 472 | * @pos: the type * to use as a loop cursor. 473 | * @n: another type * to use as temporary storage 474 | * @head: the head for your list. 475 | * @member: the name of the list_struct within the struct. 476 | * 477 | * Iterate backwards over list of given type, safe against removal 478 | * of list entry. 479 | */ 480 | #define list_for_each_entry_safe_reverse(pos, n, head, member) \ 481 | for (pos = list_entry((head)->prev, typeof(*pos), member), \ 482 | n = list_entry(pos->member.prev, typeof(*pos), member); \ 483 | &pos->member != (head); \ 484 | pos = n, n = list_entry(n->member.prev, typeof(*n), member)) 485 | 486 | #endif 487 | -------------------------------------------------------------------------------- /data.c: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: MIT 2 | 3 | #include 4 | #include "metadata.h" 5 | #include "volumes.h" 6 | #include "messages.h" 7 | #include "hash.h" 8 | #include "inode.h" 9 | #include "data.h" 10 | #include "compression.h" 11 | 12 | struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_fs_info *fs_info, 13 | struct btrfs_path *path, 14 | u64 bytenr) 15 | { 16 | struct btrfs_key key; 17 | struct btrfs_csum_item *ci; 18 | u32 item_size; 19 | int ret; 20 | 21 | ASSERT(IS_ALIGNED(bytenr, fs_info->sectorsize)); 22 | key.objectid = BTRFS_EXTENT_CSUM_OBJECTID; 23 | key.type = BTRFS_EXTENT_CSUM_KEY; 24 | key.offset = bytenr; 25 | 26 | ret = __btrfs_search_slot(fs_info->csum_root, path, &key); 27 | if (ret < 0) { 28 | btrfs_release_path(path); 29 | return ERR_PTR(ret); 30 | } 31 | 32 | /* The csum we're looking for is at the offset 0 of the item */ 33 | if (ret == 0) 34 | return btrfs_item_ptr(path->nodes[0], path->slots[0], 35 | struct btrfs_csum_item); 36 | 37 | /* 38 | * The only time we got slot[0] == 0 without an exact match is when the 39 | * tree only has one leaf, and since we didn't get an exact match, it's 40 | * no longer possible to find an csum item before us. 41 | * 42 | * But we don't want to release @path, as caller may use @path to locate 43 | * where the next csum starts at. 44 | */ 45 | if (path->slots[0] == 0) { 46 | ASSERT(path->nodes[1] == NULL); 47 | return ERR_PTR(-ENOENT); 48 | } 49 | 50 | /* 51 | * Now we don't have an exact match, but we have one previous item, 52 | * which may contain the bytenr. 53 | */ 54 | path->slots[0]--; 55 | btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 56 | item_size = btrfs_item_size_nr(path->nodes[0], path->slots[0]); 57 | 58 | /* 59 | * Current item doesn't cover our bytenr, step forward to next item so 60 | * caller can know where next csum starts. 61 | */ 62 | if (key.offset + item_size / fs_info->csum_size * fs_info->sectorsize <= 63 | bytenr) { 64 | path->slots[0]++; 65 | if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) { 66 | ret = btrfs_next_leaf(path); 67 | if (ret < 0) { 68 | btrfs_release_path(path); 69 | return ERR_PTR(ret); 70 | } 71 | } 72 | return ERR_PTR(-ENOENT); 73 | } 74 | 75 | /* Now current item covers the bytenr, adjust the pointer */ 76 | ci = btrfs_item_ptr(path->nodes[0], path->slots[0], 77 | struct btrfs_csum_item); 78 | 79 | ci = (struct btrfs_csum_item *)((char *)ci + 80 | (bytenr - key.offset) / fs_info->sectorsize * 81 | fs_info->csum_size); 82 | return ci; 83 | } 84 | 85 | static inline u32 bytes_to_csum_size(struct btrfs_fs_info *fs_info, u32 bytes) 86 | { 87 | return bytes / fs_info->sectorsize * fs_info->csum_size; 88 | } 89 | 90 | static inline u32 csum_size_to_bytes(struct btrfs_fs_info *fs_info, 91 | u32 csum_size) 92 | { 93 | return csum_size / fs_info->csum_size * fs_info->sectorsize; 94 | } 95 | 96 | /* 97 | * Verify the data checksum. 98 | * 99 | * Return >=0 for how many bytes passed the data checksum. 100 | */ 101 | static u32 check_data_csum(struct btrfs_fs_info *fs_info, 102 | const char *buf, size_t buf_bytes, 103 | const char *csum) 104 | { 105 | u8 result[BTRFS_CSUM_SIZE]; 106 | u32 cur; 107 | 108 | ASSERT(IS_ALIGNED(buf_bytes, fs_info->sectorsize)); 109 | 110 | for (cur = 0; cur < buf_bytes; cur += fs_info->sectorsize) { 111 | btrfs_csum_data(fs_info->csum_type, (u8 *)buf + cur, result, 112 | fs_info->sectorsize); 113 | if (memcmp(result, csum + bytes_to_csum_size(fs_info, cur), 114 | fs_info->csum_size)) 115 | break; 116 | } 117 | return cur; 118 | } 119 | 120 | /* The maximum size that we read from disk for one batch. */ 121 | #define BTRFS_CACHE_SIZE (128 * 1024) 122 | 123 | ssize_t btrfs_read_data(struct btrfs_fs_info *fs_info, char *buf, 124 | size_t num_bytes, u64 logical) 125 | { 126 | struct btrfs_csum_item *ci; 127 | struct btrfs_path path; 128 | struct btrfs_key key; 129 | char *csum_buf; 130 | bool has_csum; 131 | u32 bytes_to_read; 132 | u64 next_range_start; 133 | int ret = 0; 134 | int mirror_nr; 135 | int max_mirror; 136 | 137 | ASSERT(IS_ALIGNED(logical, fs_info->sectorsize) && 138 | IS_ALIGNED(num_bytes, fs_info->sectorsize)); 139 | 140 | num_bytes = MIN(num_bytes, BTRFS_CACHE_SIZE); 141 | 142 | max_mirror = btrfs_num_copies(fs_info, logical); 143 | if (max_mirror < 0) 144 | return max_mirror; 145 | 146 | btrfs_init_path(&path); 147 | ci = btrfs_lookup_csum(fs_info, &path, logical); 148 | if (IS_ERR(ci)) { 149 | has_csum = false; 150 | ret = PTR_ERR(ci); 151 | /* 152 | * We may still have path pointing to the next item, get the 153 | * start bytenr of the next item, so we know how many bytes 154 | * don't have csum. 155 | */ 156 | if (ret == -ENOENT && path.nodes[0] && 157 | path.slots[0] < btrfs_header_nritems(path.nodes[0])) { 158 | btrfs_item_key_to_cpu(path.nodes[0], &key, 159 | path.slots[0]); 160 | next_range_start = key.offset; 161 | } else { 162 | next_range_start = logical + num_bytes; 163 | } 164 | csum_buf = NULL; 165 | bytes_to_read = MIN(next_range_start, logical + num_bytes) - 166 | logical; 167 | } else { 168 | u32 item_size; 169 | 170 | has_csum = true; 171 | /* 172 | * We got an csum item covering the starting bytenr, thus 173 | * @next_range_start should be the end of the csum item. 174 | */ 175 | btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]); 176 | item_size = btrfs_item_size_nr(path.nodes[0], path.slots[0]); 177 | 178 | next_range_start = csum_size_to_bytes(fs_info, item_size) + 179 | key.offset; 180 | bytes_to_read = MIN(next_range_start, logical + num_bytes) - 181 | logical; 182 | csum_buf = malloc(bytes_to_csum_size(fs_info, bytes_to_read)); 183 | if (!csum_buf) { 184 | btrfs_release_path(&path); 185 | return -ENOMEM; 186 | } 187 | read_extent_buffer(path.nodes[0], csum_buf, (unsigned long)ci, 188 | bytes_to_csum_size(fs_info, bytes_to_read)); 189 | } 190 | btrfs_release_path(&path); 191 | 192 | /* 193 | * Now we have @has_csum, @csum_buf, @bytes_to_read setup, 194 | * we can read the data from disk. 195 | */ 196 | for (mirror_nr = 1; mirror_nr <= max_mirror; mirror_nr++) { 197 | u32 bytes_csum_ok; 198 | 199 | ret = btrfs_read_logical(fs_info, buf, bytes_to_read, logical, 200 | mirror_nr); 201 | /* Read completely failed, mostly missing dev, go next copy */ 202 | if (ret < 0) 203 | continue; 204 | if (has_csum) 205 | bytes_csum_ok = check_data_csum(fs_info, buf, ret, 206 | csum_buf); 207 | else 208 | bytes_csum_ok = ret; 209 | /* Got some csum match, return the read bytes */ 210 | if (bytes_csum_ok > 0) { 211 | ret = bytes_csum_ok; 212 | break; 213 | } else { 214 | warning( 215 | "checksum mismatch for logical bytenr %llu mirror %d", 216 | logical, mirror_nr); 217 | ret = -EIO; 218 | } 219 | } 220 | free(csum_buf); 221 | return ret; 222 | } 223 | 224 | /* 225 | * Lookup the file extent for file_offset 226 | * 227 | * Return 0 if we find an file extent which covers @file_offset, and @path 228 | * will point to it. 229 | * 230 | * Return >0 if we can't find an file extent, and @next_file_offset_ret 231 | * will be updated to indicate the next file offset where we can find the next 232 | * file extent. This behavior can be very handy for NO_HOLES cases to skip 233 | * to next non-hole extent. 234 | * 235 | * Return <0 for error. 236 | */ 237 | static int lookup_file_extent(struct btrfs_fs_info *fs_info, 238 | struct btrfs_path *path, 239 | struct btrfs_inode *inode, u64 file_offset, 240 | u64 *next_file_offset_ret) 241 | { 242 | struct btrfs_file_extent_item *fi; 243 | struct btrfs_key key; 244 | u64 next_offset = (u64)-1; 245 | u64 extent_len; 246 | u8 type; 247 | int ret; 248 | 249 | ASSERT(IS_ALIGNED(file_offset, fs_info->sectorsize)); 250 | key.objectid = inode->ino; 251 | key.type = BTRFS_EXTENT_DATA_KEY; 252 | key.offset = file_offset; 253 | 254 | ret = __btrfs_search_slot(inode->root, path, &key); 255 | /* Either we fond an exact match or error */ 256 | if (ret <= 0) 257 | return ret; 258 | 259 | /* 260 | * Check btrfs_lookup_csum() for reason why path->slots[0] == 0 case 261 | * means no match at all. 262 | */ 263 | if (path->slots[0] == 0) 264 | goto not_found; 265 | 266 | /* Check if previous item covers @file_offset. */ 267 | path->slots[0]--; 268 | btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 269 | 270 | /* Previous item doesn't even belong to this inode, no found */ 271 | if (key.objectid != inode->ino) 272 | goto not_found; 273 | 274 | /* 275 | * Previous item is not an file extent, but belongs to the same inode, 276 | * this means we may be before the first file extent, still need to 277 | * check next item. 278 | */ 279 | if (key.type != BTRFS_EXTENT_DATA_KEY) 280 | goto next_item; 281 | 282 | /* Now we're at previous file extent which belonds to this inode */ 283 | fi = btrfs_item_ptr(path->nodes[0], path->slots[0], 284 | struct btrfs_file_extent_item); 285 | 286 | type = btrfs_file_extent_type(path->nodes[0], fi); 287 | if (type == BTRFS_FILE_EXTENT_INLINE && key.offset != 0) { 288 | error("unexpected inline extent at inode %llu file offset %llu", 289 | inode->ino, key.offset); 290 | btrfs_release_path(path); 291 | return -EUCLEAN; 292 | } 293 | if (type == BTRFS_FILE_EXTENT_INLINE) 294 | extent_len = fs_info->sectorsize; 295 | else 296 | extent_len = btrfs_file_extent_num_bytes(path->nodes[0], fi); 297 | 298 | /* The extent covers the range, found */ 299 | if (key.offset + extent_len > file_offset) 300 | return 0; 301 | 302 | next_item: 303 | /* No found, go next slot to grab next file_offset */ 304 | path->slots[0]++; 305 | if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) { 306 | ret = btrfs_next_leaf(path); 307 | if (ret) 308 | goto not_found; 309 | } 310 | btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 311 | if (key.objectid != inode->ino || key.type != BTRFS_EXTENT_DATA_KEY) 312 | goto not_found; 313 | next_offset = key.offset; 314 | 315 | not_found: 316 | if (next_file_offset_ret) 317 | *next_file_offset_ret = next_offset; 318 | btrfs_release_path(path); 319 | return 1; 320 | } 321 | 322 | static ssize_t read_compressed_inline(struct btrfs_fs_info *fs_info, 323 | struct btrfs_path *path, 324 | struct btrfs_file_extent_item *fi, 325 | char *buf) 326 | { 327 | u32 csize = btrfs_file_extent_inline_item_len(path->nodes[0], 328 | btrfs_item_nr(path->slots[0])); 329 | u32 dsize = btrfs_file_extent_ram_bytes(path->nodes[0], fi); 330 | u8 compression = btrfs_file_extent_type(path->nodes[0], fi); 331 | char *cbuf; 332 | int ret; 333 | 334 | ASSERT(dsize <= fs_info->sectorsize); 335 | 336 | cbuf = malloc(csize); 337 | if (!cbuf) 338 | return -ENOMEM; 339 | 340 | read_extent_buffer(path->nodes[0], cbuf, 341 | btrfs_file_extent_inline_start(fi), csize); 342 | 343 | ret = btrfs_decompress(fs_info, cbuf, csize, buf, 344 | dsize, compression); 345 | memset(buf + dsize, 0, fs_info->sectorsize - dsize); 346 | if (ret < 0) 347 | return ret; 348 | return fs_info->sectorsize; 349 | } 350 | 351 | static ssize_t read_compressed_file_extent(struct btrfs_fs_info *fs_info, 352 | struct btrfs_path *path, 353 | struct btrfs_inode *inode, 354 | u64 file_offset, char *buf, 355 | u32 num_bytes) 356 | { 357 | struct btrfs_file_extent_item *fi; 358 | struct btrfs_key key; 359 | char *cbuf; /* Compressed data buffer */ 360 | char *dbuf; /* Uncompressed data buffer */ 361 | u64 csize; /* Compressed data size */ 362 | u64 dsize; /* Uncompressed data size */ 363 | u64 disk_bytenr; 364 | u64 fi_offset; 365 | u64 fi_num_bytes; 366 | u32 cur_off = 0; 367 | u8 compress; 368 | u8 type; 369 | int ret; 370 | 371 | fi = btrfs_item_ptr(path->nodes[0], path->slots[0], 372 | struct btrfs_file_extent_item); 373 | type = btrfs_file_extent_type(path->nodes[0], fi); 374 | compress = btrfs_file_extent_compression(path->nodes[0], fi); 375 | 376 | /* Prealloc is never compressed */ 377 | ASSERT(type == BTRFS_FILE_EXTENT_INLINE || 378 | type == BTRFS_FILE_EXTENT_REG); 379 | 380 | if (type == BTRFS_FILE_EXTENT_INLINE) { 381 | ASSERT(file_offset == 0); 382 | return read_compressed_inline(fs_info, path, fi, buf); 383 | } 384 | 385 | /* Regular compressed extent */ 386 | csize = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi); 387 | dsize = btrfs_file_extent_ram_bytes(path->nodes[0], fi); 388 | disk_bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi); 389 | 390 | /* No hole extent should be compressed */ 391 | ASSERT(disk_bytenr); 392 | 393 | cbuf = malloc(csize); 394 | dbuf = malloc(dsize); 395 | if (!cbuf || !dbuf) { 396 | free(dbuf); 397 | free(cbuf); 398 | return -ENOMEM; 399 | } 400 | 401 | /* Read compressed data */ 402 | while (cur_off < csize) { 403 | ret = btrfs_read_data(fs_info, cbuf + cur_off, csize - cur_off, 404 | disk_bytenr + cur_off); 405 | if (ret < 0) 406 | goto out; 407 | cur_off += ret; 408 | } 409 | 410 | ret = btrfs_decompress(fs_info, cbuf, csize, dbuf, dsize, compress); 411 | if (ret < 0) 412 | goto out; 413 | 414 | /* Now copy the part the file extent item refers to */ 415 | btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 416 | fi_offset = btrfs_file_extent_offset(path->nodes[0], fi); 417 | fi_num_bytes = btrfs_file_extent_num_bytes(path->nodes[0], fi); 418 | ret = MIN(file_offset + num_bytes, key.offset + fi_num_bytes) - file_offset; 419 | memcpy(buf, dbuf + (file_offset - key.offset + fi_offset), ret); 420 | 421 | out: 422 | free(cbuf); 423 | free(dbuf); 424 | return ret; 425 | } 426 | 427 | /* Read a file extent specified by @path into @buf. */ 428 | static ssize_t read_file_extent(struct btrfs_fs_info *fs_info, 429 | struct btrfs_path *path, 430 | struct btrfs_inode *inode, u64 file_offset, 431 | char *buf, u32 num_bytes) 432 | { 433 | struct btrfs_file_extent_item *fi; 434 | struct btrfs_key key; 435 | u64 disk_bytenr; 436 | u64 nr_bytes; 437 | u32 read_bytes; 438 | u32 cur_off = 0; 439 | u8 type; 440 | int ret; 441 | 442 | ASSERT(path->nodes[0]); 443 | ASSERT(path->slots[0] < btrfs_header_nritems(path->nodes[0])); 444 | ASSERT(IS_ALIGNED(file_offset, fs_info->sectorsize) && 445 | IS_ALIGNED(num_bytes, fs_info->sectorsize)); 446 | btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 447 | 448 | ASSERT(key.objectid == inode->ino && key.type == BTRFS_EXTENT_DATA_KEY); 449 | fi = btrfs_item_ptr(path->nodes[0], path->slots[0], 450 | struct btrfs_file_extent_item); 451 | type = btrfs_file_extent_type(path->nodes[0], fi); 452 | 453 | if (btrfs_file_extent_compression(path->nodes[0], fi) != 454 | BTRFS_COMPRESS_NONE) 455 | return read_compressed_file_extent(fs_info, path, inode, 456 | file_offset, buf, num_bytes); 457 | 458 | if (type == BTRFS_FILE_EXTENT_INLINE) { 459 | read_bytes = btrfs_file_extent_ram_bytes(path->nodes[0], fi); 460 | ASSERT(file_offset == 0 && read_bytes <= fs_info->sectorsize); 461 | read_extent_buffer(path->nodes[0], buf, 462 | btrfs_file_extent_inline_start(fi), read_bytes); 463 | memset(buf + read_bytes, 0, fs_info->sectorsize - read_bytes); 464 | return fs_info->sectorsize; 465 | } 466 | 467 | nr_bytes = btrfs_file_extent_num_bytes(path->nodes[0], fi); 468 | 469 | read_bytes = MIN(key.offset + nr_bytes, file_offset + BTRFS_CACHE_SIZE); 470 | read_bytes = MIN(read_bytes, file_offset + num_bytes); 471 | read_bytes -= file_offset; 472 | 473 | if (type == BTRFS_FILE_EXTENT_PREALLOC) { 474 | memset(buf, 0, read_bytes); 475 | return read_bytes; 476 | } 477 | /* A hole extent */ 478 | if (btrfs_file_extent_disk_bytenr(path->nodes[0], fi) == 0) { 479 | memset(buf, 0, read_bytes); 480 | return read_bytes; 481 | } 482 | 483 | /* Regular type */ 484 | disk_bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi) + 485 | btrfs_file_extent_offset(path->nodes[0], fi) + 486 | file_offset - key.offset; 487 | while (cur_off < read_bytes) { 488 | ret = btrfs_read_data(fs_info, buf + cur_off, read_bytes - cur_off, 489 | disk_bytenr + cur_off); 490 | if (ret < 0) 491 | break; 492 | cur_off += ret; 493 | } 494 | if (ret < 0 && cur_off == 0) 495 | return ret; 496 | return cur_off; 497 | } 498 | 499 | ssize_t btrfs_read_file(struct btrfs_fs_info *fs_info, 500 | struct btrfs_inode *inode, u64 file_offset, 501 | char *buf, u32 num_bytes) 502 | { 503 | struct btrfs_path path; 504 | u32 cur_off = 0; 505 | int ret; 506 | 507 | ASSERT(IS_ALIGNED(file_offset, fs_info->sectorsize) && 508 | IS_ALIGNED(num_bytes, fs_info->sectorsize)); 509 | btrfs_init_path(&path); 510 | 511 | while (cur_off < num_bytes) { 512 | u64 next_offset; 513 | 514 | btrfs_release_path(&path); 515 | ret = lookup_file_extent(fs_info, &path, inode, 516 | file_offset + cur_off, &next_offset); 517 | if (ret < 0) 518 | goto out; 519 | /* No file extent found, mostly NO_HOLES case */ 520 | if (ret > 0) { 521 | u32 read_bytes; 522 | 523 | read_bytes = MIN(next_offset - file_offset, num_bytes) - 524 | cur_off; 525 | memset(buf + cur_off, 0, read_bytes); 526 | cur_off += read_bytes; 527 | continue; 528 | } 529 | 530 | ret = read_file_extent(fs_info, &path, inode, 531 | file_offset + cur_off, buf + cur_off, 532 | num_bytes - cur_off); 533 | if (ret < 0) 534 | break; 535 | cur_off += ret; 536 | } 537 | out: 538 | btrfs_release_path(&path); 539 | if (ret < 0 && cur_off == 0) 540 | return ret; 541 | return cur_off; 542 | } 543 | -------------------------------------------------------------------------------- /libs/rbtree.c: -------------------------------------------------------------------------------- 1 | /* 2 | Red Black Trees 3 | (C) 1999 Andrea Arcangeli 4 | (C) 2002 David Woodhouse 5 | (C) 2012 Michel Lespinasse 6 | 7 | This program is free software; you can redistribute it and/or modify 8 | it under the terms of the GNU General Public License as published by 9 | the Free Software Foundation; either version 2 of the License, or 10 | (at your option) any later version. 11 | 12 | This program is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU General Public License for more details. 16 | 17 | You should have received a copy of the GNU General Public License 18 | along with this program; if not, write to the Free Software 19 | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 20 | 21 | linux/lib/rbtree.c 22 | */ 23 | 24 | #include 25 | #include "libs/rbtree_augmented.h" 26 | 27 | /* 28 | * red-black trees properties: http://en.wikipedia.org/wiki/Rbtree 29 | * 30 | * 1) A node is either red or black 31 | * 2) The root is black 32 | * 3) All leaves (NULL) are black 33 | * 4) Both children of every red node are black 34 | * 5) Every simple path from root to leaves contains the same number 35 | * of black nodes. 36 | * 37 | * 4 and 5 give the O(log n) guarantee, since 4 implies you cannot have two 38 | * consecutive red nodes in a path and every red node is therefore followed by 39 | * a black. So if B is the number of black nodes on every simple path (as per 40 | * 5), then the longest possible path due to 4 is 2B. 41 | * 42 | * We shall indicate color with case, where black nodes are uppercase and red 43 | * nodes will be lowercase. Unknown color nodes shall be drawn as red within 44 | * parentheses and have some accompanying text comment. 45 | */ 46 | 47 | static inline void rb_set_black(struct rb_node *rb) 48 | { 49 | rb->__rb_parent_color |= RB_BLACK; 50 | } 51 | 52 | static inline struct rb_node *rb_red_parent(struct rb_node *red) 53 | { 54 | return (struct rb_node *)red->__rb_parent_color; 55 | } 56 | 57 | /* 58 | * Helper function for rotations: 59 | * - old's parent and color get assigned to new 60 | * - old gets assigned new as a parent and 'color' as a color. 61 | */ 62 | static inline void 63 | __rb_rotate_set_parents(struct rb_node *old, struct rb_node *new, 64 | struct rb_root *root, int color) 65 | { 66 | struct rb_node *parent = rb_parent(old); 67 | new->__rb_parent_color = old->__rb_parent_color; 68 | rb_set_parent_color(old, new, color); 69 | __rb_change_child(old, new, parent, root); 70 | } 71 | 72 | static void 73 | __rb_insert(struct rb_node *node, struct rb_root *root, 74 | void (*augment_rotate)(struct rb_node *old, struct rb_node *new)) 75 | { 76 | struct rb_node *parent = rb_red_parent(node), *gparent, *tmp; 77 | 78 | while (true) { 79 | /* 80 | * Loop invariant: node is red 81 | * 82 | * If there is a black parent, we are done. 83 | * Otherwise, take some corrective action as we don't 84 | * want a red root or two consecutive red nodes. 85 | */ 86 | if (!parent) { 87 | rb_set_parent_color(node, NULL, RB_BLACK); 88 | break; 89 | } else if (rb_is_black(parent)) 90 | break; 91 | 92 | gparent = rb_red_parent(parent); 93 | 94 | tmp = gparent->rb_right; 95 | if (parent != tmp) { /* parent == gparent->rb_left */ 96 | if (tmp && rb_is_red(tmp)) { 97 | /* 98 | * Case 1 - color flips 99 | * 100 | * G g 101 | * / \ / \ 102 | * p u --> P U 103 | * / / 104 | * n n 105 | * 106 | * However, since g's parent might be red, and 107 | * 4) does not allow this, we need to recurse 108 | * at g. 109 | */ 110 | rb_set_parent_color(tmp, gparent, RB_BLACK); 111 | rb_set_parent_color(parent, gparent, RB_BLACK); 112 | node = gparent; 113 | parent = rb_parent(node); 114 | rb_set_parent_color(node, parent, RB_RED); 115 | continue; 116 | } 117 | 118 | tmp = parent->rb_right; 119 | if (node == tmp) { 120 | /* 121 | * Case 2 - left rotate at parent 122 | * 123 | * G G 124 | * / \ / \ 125 | * p U --> n U 126 | * \ / 127 | * n p 128 | * 129 | * This still leaves us in violation of 4), the 130 | * continuation into Case 3 will fix that. 131 | */ 132 | parent->rb_right = tmp = node->rb_left; 133 | node->rb_left = parent; 134 | if (tmp) 135 | rb_set_parent_color(tmp, parent, 136 | RB_BLACK); 137 | rb_set_parent_color(parent, node, RB_RED); 138 | augment_rotate(parent, node); 139 | parent = node; 140 | tmp = node->rb_right; 141 | } 142 | 143 | /* 144 | * Case 3 - right rotate at gparent 145 | * 146 | * G P 147 | * / \ / \ 148 | * p U --> n g 149 | * / \ 150 | * n U 151 | */ 152 | gparent->rb_left = tmp; /* == parent->rb_right */ 153 | parent->rb_right = gparent; 154 | if (tmp) 155 | rb_set_parent_color(tmp, gparent, RB_BLACK); 156 | __rb_rotate_set_parents(gparent, parent, root, RB_RED); 157 | augment_rotate(gparent, parent); 158 | break; 159 | } else { 160 | tmp = gparent->rb_left; 161 | if (tmp && rb_is_red(tmp)) { 162 | /* Case 1 - color flips */ 163 | rb_set_parent_color(tmp, gparent, RB_BLACK); 164 | rb_set_parent_color(parent, gparent, RB_BLACK); 165 | node = gparent; 166 | parent = rb_parent(node); 167 | rb_set_parent_color(node, parent, RB_RED); 168 | continue; 169 | } 170 | 171 | tmp = parent->rb_left; 172 | if (node == tmp) { 173 | /* Case 2 - right rotate at parent */ 174 | parent->rb_left = tmp = node->rb_right; 175 | node->rb_right = parent; 176 | if (tmp) 177 | rb_set_parent_color(tmp, parent, 178 | RB_BLACK); 179 | rb_set_parent_color(parent, node, RB_RED); 180 | augment_rotate(parent, node); 181 | parent = node; 182 | tmp = node->rb_left; 183 | } 184 | 185 | /* Case 3 - left rotate at gparent */ 186 | gparent->rb_right = tmp; /* == parent->rb_left */ 187 | parent->rb_left = gparent; 188 | if (tmp) 189 | rb_set_parent_color(tmp, gparent, RB_BLACK); 190 | __rb_rotate_set_parents(gparent, parent, root, RB_RED); 191 | augment_rotate(gparent, parent); 192 | break; 193 | } 194 | } 195 | } 196 | 197 | /* 198 | * Inline version for rb_erase() use - we want to be able to inline 199 | * and eliminate the dummy_rotate callback there 200 | */ 201 | static void 202 | ____rb_erase_color(struct rb_node *parent, struct rb_root *root, 203 | void (*augment_rotate)(struct rb_node *old, struct rb_node *new)) 204 | { 205 | struct rb_node *node = NULL, *sibling, *tmp1, *tmp2; 206 | 207 | while (true) { 208 | /* 209 | * Loop invariants: 210 | * - node is black (or NULL on first iteration) 211 | * - node is not the root (parent is not NULL) 212 | * - All leaf paths going through parent and node have a 213 | * black node count that is 1 lower than other leaf paths. 214 | */ 215 | sibling = parent->rb_right; 216 | if (node != sibling) { /* node == parent->rb_left */ 217 | if (rb_is_red(sibling)) { 218 | /* 219 | * Case 1 - left rotate at parent 220 | * 221 | * P S 222 | * / \ / \ 223 | * N s --> p Sr 224 | * / \ / \ 225 | * Sl Sr N Sl 226 | */ 227 | parent->rb_right = tmp1 = sibling->rb_left; 228 | sibling->rb_left = parent; 229 | rb_set_parent_color(tmp1, parent, RB_BLACK); 230 | __rb_rotate_set_parents(parent, sibling, root, 231 | RB_RED); 232 | augment_rotate(parent, sibling); 233 | sibling = tmp1; 234 | } 235 | tmp1 = sibling->rb_right; 236 | if (!tmp1 || rb_is_black(tmp1)) { 237 | tmp2 = sibling->rb_left; 238 | if (!tmp2 || rb_is_black(tmp2)) { 239 | /* 240 | * Case 2 - sibling color flip 241 | * (p could be either color here) 242 | * 243 | * (p) (p) 244 | * / \ / \ 245 | * N S --> N s 246 | * / \ / \ 247 | * Sl Sr Sl Sr 248 | * 249 | * This leaves us violating 5) which 250 | * can be fixed by flipping p to black 251 | * if it was red, or by recursing at p. 252 | * p is red when coming from Case 1. 253 | */ 254 | rb_set_parent_color(sibling, parent, 255 | RB_RED); 256 | if (rb_is_red(parent)) 257 | rb_set_black(parent); 258 | else { 259 | node = parent; 260 | parent = rb_parent(node); 261 | if (parent) 262 | continue; 263 | } 264 | break; 265 | } 266 | /* 267 | * Case 3 - right rotate at sibling 268 | * (p could be either color here) 269 | * 270 | * (p) (p) 271 | * / \ / \ 272 | * N S --> N Sl 273 | * / \ \ 274 | * sl Sr s 275 | * \ 276 | * Sr 277 | */ 278 | sibling->rb_left = tmp1 = tmp2->rb_right; 279 | tmp2->rb_right = sibling; 280 | parent->rb_right = tmp2; 281 | if (tmp1) 282 | rb_set_parent_color(tmp1, sibling, 283 | RB_BLACK); 284 | augment_rotate(sibling, tmp2); 285 | tmp1 = sibling; 286 | sibling = tmp2; 287 | } 288 | /* 289 | * Case 4 - left rotate at parent + color flips 290 | * (p and sl could be either color here. 291 | * After rotation, p becomes black, s acquires 292 | * p's color, and sl keeps its color) 293 | * 294 | * (p) (s) 295 | * / \ / \ 296 | * N S --> P Sr 297 | * / \ / \ 298 | * (sl) sr N (sl) 299 | */ 300 | parent->rb_right = tmp2 = sibling->rb_left; 301 | sibling->rb_left = parent; 302 | rb_set_parent_color(tmp1, sibling, RB_BLACK); 303 | if (tmp2) 304 | rb_set_parent(tmp2, parent); 305 | __rb_rotate_set_parents(parent, sibling, root, 306 | RB_BLACK); 307 | augment_rotate(parent, sibling); 308 | break; 309 | } else { 310 | sibling = parent->rb_left; 311 | if (rb_is_red(sibling)) { 312 | /* Case 1 - right rotate at parent */ 313 | parent->rb_left = tmp1 = sibling->rb_right; 314 | sibling->rb_right = parent; 315 | rb_set_parent_color(tmp1, parent, RB_BLACK); 316 | __rb_rotate_set_parents(parent, sibling, root, 317 | RB_RED); 318 | augment_rotate(parent, sibling); 319 | sibling = tmp1; 320 | } 321 | tmp1 = sibling->rb_left; 322 | if (!tmp1 || rb_is_black(tmp1)) { 323 | tmp2 = sibling->rb_right; 324 | if (!tmp2 || rb_is_black(tmp2)) { 325 | /* Case 2 - sibling color flip */ 326 | rb_set_parent_color(sibling, parent, 327 | RB_RED); 328 | if (rb_is_red(parent)) 329 | rb_set_black(parent); 330 | else { 331 | node = parent; 332 | parent = rb_parent(node); 333 | if (parent) 334 | continue; 335 | } 336 | break; 337 | } 338 | /* Case 3 - right rotate at sibling */ 339 | sibling->rb_right = tmp1 = tmp2->rb_left; 340 | tmp2->rb_left = sibling; 341 | parent->rb_left = tmp2; 342 | if (tmp1) 343 | rb_set_parent_color(tmp1, sibling, 344 | RB_BLACK); 345 | augment_rotate(sibling, tmp2); 346 | tmp1 = sibling; 347 | sibling = tmp2; 348 | } 349 | /* Case 4 - left rotate at parent + color flips */ 350 | parent->rb_left = tmp2 = sibling->rb_right; 351 | sibling->rb_right = parent; 352 | rb_set_parent_color(tmp1, sibling, RB_BLACK); 353 | if (tmp2) 354 | rb_set_parent(tmp2, parent); 355 | __rb_rotate_set_parents(parent, sibling, root, 356 | RB_BLACK); 357 | augment_rotate(parent, sibling); 358 | break; 359 | } 360 | } 361 | } 362 | 363 | /* Non-inline version for rb_erase_augmented() use */ 364 | void __rb_erase_color(struct rb_node *parent, struct rb_root *root, 365 | void (*augment_rotate)(struct rb_node *old, struct rb_node *new)) 366 | { 367 | ____rb_erase_color(parent, root, augment_rotate); 368 | } 369 | 370 | /* 371 | * Non-augmented rbtree manipulation functions. 372 | * 373 | * We use dummy augmented callbacks here, and have the compiler optimize them 374 | * out of the rb_insert_color() and rb_erase() function definitions. 375 | */ 376 | 377 | static inline void dummy_propagate(struct rb_node *node, struct rb_node *stop) {} 378 | static inline void dummy_copy(struct rb_node *old, struct rb_node *new) {} 379 | static inline void dummy_rotate(struct rb_node *old, struct rb_node *new) {} 380 | 381 | static const struct rb_augment_callbacks dummy_callbacks = { 382 | dummy_propagate, dummy_copy, dummy_rotate 383 | }; 384 | 385 | void rb_insert_color(struct rb_node *node, struct rb_root *root) 386 | { 387 | __rb_insert(node, root, dummy_rotate); 388 | } 389 | 390 | void rb_erase(struct rb_node *node, struct rb_root *root) 391 | { 392 | struct rb_node *rebalance; 393 | rebalance = __rb_erase_augmented(node, root, &dummy_callbacks); 394 | if (rebalance) 395 | ____rb_erase_color(rebalance, root, dummy_rotate); 396 | } 397 | 398 | /* 399 | * Augmented rbtree manipulation functions. 400 | * 401 | * This instantiates the same functions as in the non-augmented 402 | * case, but this time with user-defined callbacks. 403 | */ 404 | 405 | void __rb_insert_augmented(struct rb_node *node, struct rb_root *root, 406 | void (*augment_rotate)(struct rb_node *old, struct rb_node *new)) 407 | { 408 | __rb_insert(node, root, augment_rotate); 409 | } 410 | 411 | /* 412 | * This function returns the first node (in sort order) of the tree. 413 | */ 414 | struct rb_node *rb_first(const struct rb_root *root) 415 | { 416 | struct rb_node *n; 417 | 418 | n = root->rb_node; 419 | if (!n) 420 | return NULL; 421 | while (n->rb_left) 422 | n = n->rb_left; 423 | return n; 424 | } 425 | 426 | struct rb_node *rb_last(const struct rb_root *root) 427 | { 428 | struct rb_node *n; 429 | 430 | n = root->rb_node; 431 | if (!n) 432 | return NULL; 433 | while (n->rb_right) 434 | n = n->rb_right; 435 | return n; 436 | } 437 | 438 | struct rb_node *rb_next(const struct rb_node *node) 439 | { 440 | struct rb_node *parent; 441 | 442 | if (RB_EMPTY_NODE(node)) 443 | return NULL; 444 | 445 | /* 446 | * If we have a right-hand child, go down and then left as far 447 | * as we can. 448 | */ 449 | if (node->rb_right) { 450 | node = node->rb_right; 451 | while (node->rb_left) 452 | node=node->rb_left; 453 | return (struct rb_node *)node; 454 | } 455 | 456 | /* 457 | * No right-hand children. Everything down and left is smaller than us, 458 | * so any 'next' node must be in the general direction of our parent. 459 | * Go up the tree; any time the ancestor is a right-hand child of its 460 | * parent, keep going up. First time it's a left-hand child of its 461 | * parent, said parent is our 'next' node. 462 | */ 463 | while ((parent = rb_parent(node)) && node == parent->rb_right) 464 | node = parent; 465 | 466 | return parent; 467 | } 468 | 469 | struct rb_node *rb_prev(const struct rb_node *node) 470 | { 471 | struct rb_node *parent; 472 | 473 | if (RB_EMPTY_NODE(node)) 474 | return NULL; 475 | 476 | /* 477 | * If we have a left-hand child, go down and then right as far 478 | * as we can. 479 | */ 480 | if (node->rb_left) { 481 | node = node->rb_left; 482 | while (node->rb_right) 483 | node=node->rb_right; 484 | return (struct rb_node *)node; 485 | } 486 | 487 | /* 488 | * No left-hand children. Go up till we find an ancestor which 489 | * is a right-hand child of its parent. 490 | */ 491 | while ((parent = rb_parent(node)) && node == parent->rb_left) 492 | node = parent; 493 | 494 | return parent; 495 | } 496 | 497 | void rb_replace_node(struct rb_node *victim, struct rb_node *new, 498 | struct rb_root *root) 499 | { 500 | struct rb_node *parent = rb_parent(victim); 501 | 502 | /* Set the surrounding nodes to point to the replacement */ 503 | __rb_change_child(victim, new, parent, root); 504 | if (victim->rb_left) 505 | rb_set_parent(victim->rb_left, new); 506 | if (victim->rb_right) 507 | rb_set_parent(victim->rb_right, new); 508 | 509 | /* Copy the pointers/colour from the victim to the replacement */ 510 | *new = *victim; 511 | } 512 | 513 | static struct rb_node *rb_left_deepest_node(const struct rb_node *node) 514 | { 515 | for (;;) { 516 | if (node->rb_left) 517 | node = node->rb_left; 518 | else if (node->rb_right) 519 | node = node->rb_right; 520 | else 521 | return (struct rb_node *)node; 522 | } 523 | } 524 | 525 | struct rb_node *rb_next_postorder(const struct rb_node *node) 526 | { 527 | const struct rb_node *parent; 528 | if (!node) 529 | return NULL; 530 | parent = rb_parent(node); 531 | 532 | /* If we're sitting on node, we've already seen our children */ 533 | if (parent && node == parent->rb_left && parent->rb_right) { 534 | /* If we are the parent's left node, go to the parent's right 535 | * node then all the way down to the left */ 536 | return rb_left_deepest_node(parent->rb_right); 537 | } else 538 | /* Otherwise we are the parent's right node, and the parent 539 | * should be next */ 540 | return (struct rb_node *)parent; 541 | } 542 | 543 | struct rb_node *rb_first_postorder(const struct rb_root *root) 544 | { 545 | if (!root->rb_node) 546 | return NULL; 547 | 548 | return rb_left_deepest_node(root->rb_node); 549 | } 550 | -------------------------------------------------------------------------------- /ondisk_format.h: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: MIT 2 | 3 | #ifndef BTRFS_FUSE_ONDISK_FORMAT_H 4 | #define BTRFS_FUSE_ONDISK_FORMAT_H 5 | 6 | #include 7 | #include "compat.h" 8 | 9 | /* 10 | * Supported sectorsize range in theory. 11 | * 12 | * This is the minimal IO unit for data. 13 | * 14 | * All supported sectorsize must be power of 2. 15 | * Kernel may only support sectorsize == PAGE_SIZE. 16 | * (Since v5.14 btrfs has experimental support for 4K sectorsize with 64K page 17 | * size) 18 | */ 19 | #define BTRFS_SECTORSIZE_MIN 4096 20 | #define BTRFS_SECTORSIZE_MAX 65536 21 | 22 | /* Maximum filename length (without the tailing '\0') */ 23 | #define BTRFS_NAME_LEN 255 24 | 25 | /* 26 | * Supported nodesize range. 27 | * 28 | * This is the minimal IO unit for metadata. 29 | * 30 | * All supported nodesize must be power of 2. 31 | * Kernel only supports nodesize >= sectorsize. 32 | */ 33 | #define BTRFS_NODESIZE_MIN 4096 34 | #define BTRFS_NODESIZE_MAX 65536 35 | 36 | /* Stripe length for stripe based profiles (RAID0/RAID10/RAID56) */ 37 | #define BTRFS_STRIPE_LEN 65536 38 | 39 | /* 40 | * The maximum checksum size in bytes, not all checksum algorithms use all 41 | * available bytes. 42 | */ 43 | #define BTRFS_CSUM_SIZE 32 44 | 45 | #define BTRFS_FSID_SIZE 16 46 | #define BTRFS_UUID_SIZE 16 47 | 48 | /* Supported checksum algorithms */ 49 | enum btrfs_csum_type { 50 | BTRFS_CSUM_TYPE_CRC32 = 0, 51 | BTRFS_CSUM_TYPE_XXHASH = 1, 52 | BTRFS_CSUM_TYPE_SHA256 = 2, 53 | BTRFS_CSUM_TYPE_BLAKE2 = 3, 54 | }; 55 | 56 | /* Location of btrfs super blocks, here we only care the primary superblock */ 57 | #define BTRFS_SUPER_INFO_OFFSET 65536 58 | 59 | #define BTRFS_SUPER_INFO_SIZE 4096 60 | 61 | #define BTRFS_MAGIC 0x4D5F53665248425FULL /* ascii _BHRfS_M, no null */ 62 | 63 | /* A subset of needed key types for read-only operations */ 64 | #define BTRFS_INODE_ITEM_KEY 1 65 | #define BTRFS_DIR_ITEM_KEY 84 66 | #define BTRFS_DIR_INDEX_KEY 96 67 | #define BTRFS_EXTENT_DATA_KEY 108 68 | #define BTRFS_EXTENT_CSUM_KEY 128 69 | #define BTRFS_ROOT_ITEM_KEY 132 70 | #define BTRFS_DEV_ITEM_KEY 216 71 | #define BTRFS_CHUNK_ITEM_KEY 228 72 | 73 | #define BTRFS_ROOT_TREE_OBJECTID 1ULL 74 | #define BTRFS_DEV_ITEMS_OBJECTID 1ULL 75 | #define BTRFS_CHUNK_TREE_OBJECTID 3ULL 76 | #define BTRFS_FS_TREE_OBJECTID 5ULL 77 | 78 | /* 79 | * This is for a special dir inode in root tree to indicate which root is the 80 | * default subvolume (stored as a DIR_ITEM). 81 | */ 82 | #define BTRFS_ROOT_TREE_DIR_OBJECTID 6ULL 83 | #define BTRFS_CSUM_TREE_OBJECTID 7ULL 84 | #define BTRFS_FIRST_CHUNK_TREE_OBJECTID 256ULL 85 | #define BTRFS_FIRST_FREE_OBJECTID 256ULL 86 | #define BTRFS_LAST_FREE_OBJECTID -256ULL 87 | #define BTRFS_EXTENT_CSUM_OBJECTID -10ULL 88 | 89 | /* 90 | * Describes a device 91 | * 92 | * Key format: 93 | * (BTRFS_DEV_ITEMS_OBJECTID, BTRFS_DEV_ITEM_KEY, ) 94 | * 95 | * Tree: 96 | * Chunk tree, btrfs_super_block::dev_item 97 | * 98 | * It provides a binding between (devid, UUID) and FSID , so btrfs can assemble 99 | * multi-device fs correctly. 100 | */ 101 | struct btrfs_dev_item { 102 | __le64 devid; 103 | __le64 total_bytes; 104 | 105 | /* We may want to check this value to ensure the dev item is sane */ 106 | __le64 bytes_used; 107 | 108 | __le32 __unused1[5]; 109 | 110 | __le64 generation; 111 | 112 | /* 113 | * starting byte of this partition on the device, 114 | * to allow for stripe alignment in the future 115 | */ 116 | __u8 __unused2[14]; 117 | 118 | /* btrfs generated uuid for this device */ 119 | __u8 uuid[BTRFS_UUID_SIZE]; 120 | 121 | /* uuid of FS who owns this device */ 122 | __u8 fsid[BTRFS_UUID_SIZE]; 123 | } __attribute__ ((__packed__)); 124 | 125 | #define BTRFS_SYSTEM_CHUNK_ARRAY_SIZE 2048 126 | 127 | #define BTRFS_LABEL_SIZE 256 128 | 129 | #define BTRFS_HEADER_FLAG_WRITTEN (1ULL << 0) 130 | #define BTRFS_HEADER_FLAG_RELOC (1ULL << 0) 131 | /* 132 | * We have extra BTRFS_SUPER_FLAG_* flags, but we don't want to support them 133 | * for now. 134 | */ 135 | 136 | #define BTRFS_FEATURE_COMPAT_SUPP 0ULL 137 | #define BTRFS_FEATURE_COMPAT_SAFE_SET 0ULL 138 | #define BTRFS_FEATURE_COMPAT_SAFE_CLEAR 0ULL 139 | 140 | #define BTRFS_FEATURE_COMPAT_RO_SUPP \ 141 | (BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE | \ 142 | BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE_VALID | \ 143 | BTRFS_FEATURE_COMPAT_RO_VERITY) 144 | 145 | #define BTRFS_FEATURE_COMPAT_RO_SAFE_SET 0ULL 146 | #define BTRFS_FEATURE_COMPAT_RO_SAFE_CLEAR 0ULL 147 | 148 | #define BTRFS_FEATURE_INCOMPAT_SUPP \ 149 | (BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF | \ 150 | BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL | \ 151 | BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS | \ 152 | BTRFS_FEATURE_INCOMPAT_BIG_METADATA | \ 153 | BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO | \ 154 | BTRFS_FEATURE_INCOMPAT_COMPRESS_ZSTD | \ 155 | BTRFS_FEATURE_INCOMPAT_RAID56 | \ 156 | BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF | \ 157 | BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA | \ 158 | BTRFS_FEATURE_INCOMPAT_NO_HOLES | \ 159 | BTRFS_FEATURE_INCOMPAT_RAID1C34 | \ 160 | BTRFS_FEATURE_INCOMPAT_ZONED) 161 | /* 162 | * Decribes the essential info 163 | * 164 | * It contains the following types of info: 165 | * - Lowlevel info 166 | * Like csum_type, sectorsize, nodesize, compatible flags, how many devices 167 | * are in the fs. 168 | * 169 | * - Tree info 170 | * Like where the essential trees are (tree root, chunk tree). 171 | * 172 | * - Device info 173 | * This is for the block device containing this superblock, this is essential 174 | * to assemble the devices of a multi-device btrfs. 175 | * 176 | * - System chunk array 177 | * Most bytenr in btrfs are in btrfs logical address space, thus to bootstrap 178 | * we need a subset of the logical address space mapping. 179 | * We store all our SYSTEM type chunk mapping into super block, and with 180 | * SYSTEM type chunks mapped, we can read the whole chunk tree, then map the 181 | * rest of the filesystem. 182 | * 183 | * Unnecessary members for read-only operations will be skipped. 184 | */ 185 | struct btrfs_super_block { 186 | u8 csum[BTRFS_CSUM_SIZE]; 187 | u8 fsid[BTRFS_FSID_SIZE]; 188 | __le64 bytenr; 189 | __le64 flags; 190 | __le64 magic; 191 | __le64 generation; 192 | __le64 root; 193 | __le64 chunk_root; 194 | 195 | /* 196 | * We may still want to check log tree so that one day we can provide 197 | * the latest file content in log tree. 198 | */ 199 | __le64 log_root; 200 | __le64 log_root_transid; 201 | __le64 total_bytes; 202 | __le64 bytes_used; 203 | __le64 root_dir_objectid; 204 | __le64 num_devices; 205 | __le32 sectorsize; 206 | __le32 nodesize; 207 | __le32 __unused1[2]; 208 | __le32 sys_chunk_array_size; 209 | __le64 chunk_root_generation; 210 | __le64 compat_flags; 211 | __le64 compat_ro_flags; 212 | __le64 incompat_flags; 213 | __le16 csum_type; 214 | u8 root_level; 215 | u8 chunk_root_level; 216 | u8 log_root_level; 217 | struct btrfs_dev_item dev_item; 218 | 219 | char label[BTRFS_LABEL_SIZE]; 220 | 221 | __le64 __unused2[2]; 222 | 223 | u8 __unused3[BTRFS_FSID_SIZE]; 224 | 225 | __le64 __unused4[28]; 226 | u8 sys_chunk_array[BTRFS_SYSTEM_CHUNK_ARRAY_SIZE]; 227 | 228 | u8 __unused5[1237]; 229 | } __attribute__ ((__packed__)); 230 | 231 | static_assert(sizeof(struct btrfs_super_block) == BTRFS_SUPER_INFO_SIZE, "Superblock size mismatch!"); 232 | 233 | /* 234 | * Btrfs metadata blocks has two different types: 235 | * - leave 236 | * Tree blocks at level 0 (lowest level). 237 | * Contains both fixed keys and variable length data. 238 | * 239 | * - nodes 240 | * Tree blocks at level 1~7 241 | * Contains fixed keys and position of the child nodes/leaves for each key. 242 | * 243 | * Both nodes and leave share the same header. 244 | */ 245 | 246 | #define BTRFS_MAX_LEVEL 8 247 | 248 | struct btrfs_header { 249 | u8 csum[BTRFS_CSUM_SIZE]; 250 | u8 fsid[BTRFS_FSID_SIZE]; 251 | 252 | /* Logical bytenr of this tree block */ 253 | __le64 bytenr; 254 | __le64 flags; 255 | 256 | u8 chunk_tree_uuid[BTRFS_UUID_SIZE]; 257 | __le64 generation; 258 | __le64 owner; 259 | __le32 nritems; 260 | u8 level; 261 | } __attribute__ ((__packed__)); 262 | 263 | 264 | /* 265 | * Btrfs uses a fixed key to organize all its metadata. 266 | * It can be considered as a U132 (64 + 8 + 64) value. 267 | * 268 | * Type determines the meaning of objectid and offset. 269 | * For full document on all meanings of different keys, check: 270 | * https://btrfs.wiki.kernel.org/index.php/On-disk_Format 271 | */ 272 | struct btrfs_disk_key { 273 | __le64 objectid; 274 | __u8 type; 275 | __le64 offset; 276 | } __attribute__ ((__packed__)); 277 | 278 | /* While for most operation, we use btrfs_key, which is in cpu native endian */ 279 | struct btrfs_key { 280 | __u64 objectid; 281 | __u8 type; 282 | __u64 offset; 283 | } __attribute__ ((__packed__)); 284 | 285 | /* 286 | * A btrfs leaf puts its data like this: 287 | * 288 | * [header][item 0][item 1]..[item n][free space][data n]...[data 0] 289 | * 290 | * Each item needs the offset/size inside the leaf to locate the corresponding 291 | * data. 292 | */ 293 | struct btrfs_item { 294 | struct btrfs_disk_key key; 295 | __le32 offset; 296 | __le32 size; 297 | } __attribute__ ((__packed__)); 298 | 299 | struct btrfs_leaf { 300 | struct btrfs_header header; 301 | struct btrfs_item items[]; 302 | } __attribute__ ((__packed__)); 303 | 304 | /* 305 | * A btrfs node only contains all keys and locations (in btrfs logical address 306 | * space) of its children. 307 | * 308 | * Thus it doesn't need the offset/size pointer, only need a fixed key_ptr. 309 | */ 310 | 311 | struct btrfs_key_ptr { 312 | struct btrfs_disk_key key; 313 | __le64 blockptr; 314 | __le64 generation; 315 | } __attribute__ ((__packed__)); 316 | 317 | struct btrfs_node { 318 | struct btrfs_header header; 319 | struct btrfs_key_ptr ptrs[]; 320 | } __attribute__ ((__packed__)); 321 | 322 | 323 | /* 324 | * Different types of block groups (and chunks). 325 | * 326 | * Btrfs has block_group_item::flags and btrfs_chunk_item::flags 327 | * sharing these flags. 328 | * 329 | * DATA|SYSTEM|METADATA indicates the type of the chunk. 330 | * DATA chunks contain data, while METADATA contains all tree blocks 331 | * but chunk tree blocks. 332 | * SYSTEM chunks contain tree blocks for chunk tree only. 333 | * 334 | * DATA and METADATA can co-exist for MIXED_BLOCK_GROUP feature. 335 | * 336 | * The rest bits are the profile of the chunk. 337 | * If none of the profile bit is set, it means SINGLE profile. 338 | */ 339 | #define BTRFS_BLOCK_GROUP_DATA (1ULL << 0) 340 | #define BTRFS_BLOCK_GROUP_SYSTEM (1ULL << 1) 341 | #define BTRFS_BLOCK_GROUP_METADATA (1ULL << 2) 342 | 343 | #define BTRFS_BLOCK_GROUP_RAID0 (1ULL << 3) 344 | #define BTRFS_BLOCK_GROUP_RAID1 (1ULL << 4) 345 | #define BTRFS_BLOCK_GROUP_DUP (1ULL << 5) 346 | #define BTRFS_BLOCK_GROUP_RAID10 (1ULL << 6) 347 | #define BTRFS_BLOCK_GROUP_RAID5 (1ULL << 7) 348 | #define BTRFS_BLOCK_GROUP_RAID6 (1ULL << 8) 349 | #define BTRFS_BLOCK_GROUP_RAID1C3 (1ULL << 9) 350 | #define BTRFS_BLOCK_GROUP_RAID1C4 (1ULL << 10) 351 | 352 | #define BTRFS_BLOCK_GROUP_PROFILE_MASK (BTRFS_BLOCK_GROUP_RAID0 |\ 353 | BTRFS_BLOCK_GROUP_RAID1 |\ 354 | BTRFS_BLOCK_GROUP_DUP |\ 355 | BTRFS_BLOCK_GROUP_RAID10 |\ 356 | BTRFS_BLOCK_GROUP_RAID5 |\ 357 | BTRFS_BLOCK_GROUP_RAID6 |\ 358 | BTRFS_BLOCK_GROUP_RAID1C3 |\ 359 | BTRFS_BLOCK_GROUP_RAID1C4) 360 | #define BTRFS_BLOCK_GROUP_RAID56_MASK (BTRFS_BLOCK_GROUP_RAID5 |\ 361 | BTRFS_BLOCK_GROUP_RAID6) 362 | 363 | struct btrfs_stripe { 364 | __le64 devid; 365 | 366 | /* The offset is in physical device bytenr */ 367 | __le64 offset; 368 | __u8 dev_uuid[BTRFS_UUID_SIZE]; 369 | } __attribute__ ((__packed__)); 370 | 371 | /* 372 | * Describe a chunk, mapping a logical bytenr range to a physical device range 373 | * 374 | * Key format: 375 | * (BTRFS_CHUNK_TREE_OBJECTID, BTRFS_CHUNK_ITEM_KEY, ) 376 | * 377 | * Tree: 378 | * Chunk tree 379 | */ 380 | struct btrfs_chunk { 381 | /* size of this chunk in bytes */ 382 | __le64 length; 383 | 384 | /* objectid of the root referencing this chunk */ 385 | __le64 __unused1; 386 | 387 | /* 388 | * The stripe length for stripe based profiles 389 | * (RAID0/RAID10/RAID5/RAID6). 390 | * Currently it should be fixed to 64K. 391 | */ 392 | __le64 stripe_len; 393 | __le64 type; 394 | 395 | __le32 __unused2[3]; 396 | 397 | __le16 num_stripes; 398 | 399 | /* Only for RAID10, and for RAID10, it's fixed to 2 */ 400 | __le16 sub_stripes; 401 | 402 | /* One chunk must have as least one stripe */ 403 | struct btrfs_stripe stripes[]; 404 | } __attribute__ ((__packed__)); 405 | 406 | struct btrfs_timespec { 407 | __le64 sec; 408 | __le32 nsec; 409 | } __attribute__ ((__packed__)); 410 | 411 | /* 412 | * Describes an inode in btrfs 413 | * 414 | * Key format: 415 | * (, BTRFS_INODE_ITEM_KEY, 0) 416 | * 417 | * Tree: 418 | * Fs and subvolume tree, root tree (for v1 space cache and default root), 419 | * log tree. 420 | */ 421 | struct btrfs_inode_item { 422 | /* At which generation the inode is created */ 423 | __le64 generation; 424 | 425 | /* At which generation the inode is updated */ 426 | __le64 transid; 427 | 428 | /* Total file size in bytes */ 429 | __le64 size; 430 | 431 | /* Real space took in bytes, doesn't take RAID into consideration */ 432 | __le64 nbytes; 433 | 434 | __le64 __unused1; 435 | 436 | /* 437 | * How many hard link the inode has 438 | * 439 | * For directory it should be at most 1. 440 | */ 441 | __le32 nlink; 442 | __le32 uid; 443 | __le32 gid; 444 | 445 | /* File type and owner/group/other permission bits */ 446 | __le32 mode; 447 | __le64 __unused2; 448 | 449 | /* Btrfs specific flags like NODATASUM|NODATACOW */ 450 | __le64 flags; 451 | 452 | __le64 __unused3[5]; 453 | struct btrfs_timespec atime; 454 | struct btrfs_timespec ctime; 455 | struct btrfs_timespec mtime; 456 | struct btrfs_timespec otime; 457 | } __attribute__ ((__packed__)); 458 | 459 | /* 460 | * Describe a tree root 461 | * 462 | * Key format: 463 | * ( BTRFS_ROOT_ITEM_KEY ) 464 | * 465 | * Tree: 466 | * Root tree 467 | * 468 | * For non-snapshot root, their key::offset will always be 0. 469 | * For snapshot root, their key::offset will be the generation when the 470 | * snapshot is created. 471 | */ 472 | struct btrfs_root_item { 473 | struct btrfs_inode_item inode; 474 | __le64 generation; 475 | __le64 root_dirid; 476 | __le64 bytenr; 477 | __le64 __unused2[3]; 478 | __le64 flags; 479 | __le32 __unused3; 480 | struct btrfs_disk_key __unused4; 481 | __u8 __unused5; 482 | __u8 level; 483 | 484 | /* 485 | * The following fields appear after subvol_uuids+subvol_times 486 | * were introduced. They don't make much difference for read-only, 487 | * but we need to make the root item size on-disk, or it will not 488 | * be stack safe if we want to read more data into on-disk 489 | * btrfs_root_item. 490 | */ 491 | __u8 __unused6[200]; 492 | } __attribute__ ((__packed__)); 493 | 494 | #define BTRFS_FT_UNKNOWN 0 495 | #define BTRFS_FT_REG_FILE 1 496 | #define BTRFS_FT_DIR 2 497 | #define BTRFS_FT_CHRDEV 3 498 | #define BTRFS_FT_BLKDEV 4 499 | #define BTRFS_FT_FIFO 5 500 | #define BTRFS_FT_SOCK 6 501 | #define BTRFS_FT_SYMLINK 7 502 | #define BTRFS_FT_XATTR 8 503 | 504 | /* 505 | * Extra info to bind an child inode to its parent inode 506 | * 507 | * Key format: 508 | * (, BTRFS_DIR_(ITEM|INDEX)_KEY, ) 509 | * 510 | * Tree: 511 | * Fs and subvolume tree, root tree (for default subvolume), log tree 512 | * 513 | * Both BTRFS_DIR_ITEM and BTRFS_DIR_INDEX share the same btrfs_dir_item, 514 | * just for different purpose. 515 | * 516 | * BTRFS_DIR_ITEM stores hash of the filename in its key::offset, while 517 | * BTRFS_DIR_INDEX stores the index number of the inode. 518 | * 519 | * This also means, BTRFS_DIR_ITEM can have hash conflicts and have several 520 | * different btrfs_dir_item stored in sequence. 521 | */ 522 | struct btrfs_dir_item { 523 | /* 524 | * Where to find the child inode 525 | * 526 | * It can be either: 527 | * 528 | * - (, BTRFS_INODE_KEY, 0) 529 | * Pointing to the inode item inside the same subvolume 530 | * 531 | * - (, BTRFS_ROOT_ITEM, -1) 532 | * Pointing to another subvolume 533 | */ 534 | struct btrfs_disk_key location; 535 | __le64 transid; 536 | 537 | /* 538 | * For BTRFS_DIR_ITEM/BTRFS_DIR_INDEX, data_len is always 0. 539 | * Only BTRFS_XATTR_ITEM uses this value. 540 | */ 541 | __le16 data_len; 542 | 543 | /* The length of the dir/file name, no tailing '\0' */ 544 | __le16 name_len; 545 | 546 | /* Indicate the type of the child inode, using above BTRFS_FT_* number */ 547 | __u8 type; 548 | } __attribute__ ((__packed__)); 549 | 550 | enum { 551 | BTRFS_FILE_EXTENT_INLINE = 0, 552 | BTRFS_FILE_EXTENT_REG = 1, 553 | BTRFS_FILE_EXTENT_PREALLOC = 2, 554 | BTRFS_NR_FILE_EXTENT_TYPES = 3, 555 | }; 556 | 557 | enum { 558 | BTRFS_COMPRESS_NONE = 0, 559 | BTRFS_COMPRESS_ZLIB = 1, 560 | BTRFS_COMPRESS_LZO = 2, 561 | BTRFS_COMPRESS_ZSTD = 3, 562 | BTRFS_COMPRESS_LAST = 4, 563 | }; 564 | 565 | /* 566 | * Describe a file extent 567 | * 568 | * Key format: 569 | * (, BTRFS_EXTENT_DATA_KEY, ) 570 | */ 571 | struct btrfs_file_extent_item { 572 | /* At which transaction the file extent is created */ 573 | __le64 generation; 574 | 575 | /* Uncompressed size of the whole file extent*/ 576 | __le64 ram_bytes; 577 | 578 | /* The compression algorithm */ 579 | __u8 compression; 580 | __u8 __unused1[3]; 581 | 582 | /* Whether the file extent is INLINE or REGular or PREALLOCated */ 583 | __u8 type; 584 | 585 | /* 586 | * Logical bytenr where the data is. 587 | * 588 | * At this offset in the structure, the __inline__ extent data start. 589 | * 590 | * For REGULAR file extent, if this is 0, it means this file extent is 591 | * a hole, all its content is 0, and takes no space on disk. 592 | */ 593 | __le64 disk_bytenr; 594 | 595 | /* 596 | * Logical size it takes in logical address space 597 | * (aka compressed size for compressed extent). 598 | */ 599 | __le64 disk_num_bytes; 600 | 601 | /* 602 | * Offset inside the uncompressed data we read from. 603 | * 604 | * In btrfs we can refer to only part of the whole file extent. 605 | */ 606 | __le64 offset; 607 | 608 | /* How many bytes we're really referring to the uncompressed extent */ 609 | __le64 num_bytes; 610 | 611 | } __attribute__ ((__packed__)); 612 | 613 | /* Max uncompressed size for compressed extent */ 614 | #define BTRFS_MAX_COMPRESSED (128 * 1024) 615 | 616 | /* 617 | * Describe data checksum. 618 | * 619 | * Key format: 620 | * (BTRFS_EXTENT_CSUM_OBJECTID, BTRFS_EXTENT_CSUM_KEY, ) 621 | * 622 | * The csum item can be merged to save space, and the data length of one csum 623 | * item covers can be calculated using its item size. 624 | * (item_size / csum_size * sectorsize). 625 | * 626 | * The csum is for any data extent lies in btrfs logical address space, this 627 | * also means, for compressed file extent, it's the csum of the compressed data, 628 | * not the uncompressed data. 629 | */ 630 | struct btrfs_csum_item { 631 | __u8 csum; 632 | } __attribute__ ((__packed__)); 633 | 634 | #endif 635 | -------------------------------------------------------------------------------- /accessors.h: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: MIT 2 | 3 | #ifndef BTRFS_FUSE_ACCESSORS_H 4 | #define BTRFS_FUSE_ACCESSORS_H 5 | 6 | #include 7 | #include 8 | #include "ondisk_format.h" 9 | #include "libs/rbtree.h" 10 | 11 | /* 12 | * Various helpers to access the on-disk tree blocks 13 | * 14 | * We have extent_buffer structure to represent one tree block. 15 | * But callers shouldn't just access extent_buffer::data[] directly, 16 | * as we still need to do endian converts. 17 | * 18 | * To access one named structure, like btrfs_root_item, we need to either: 19 | * 20 | * - Get offset inside an tree block, then use accessors 21 | * 22 | * struct btrfs_root_item *ri; 23 | * u64 generation; 24 | * 25 | * ri = btrfs_item_ptr(extent_buffer, slot, struct btrfs_root_item); 26 | * generation = btrfs_disk_root_generation(tree_block, ri); 27 | * 28 | * - Copy the item into a memory, then use accessors on the memory directly 29 | * This is also calle the "STACK" way. 30 | * 31 | * u8 buf[sizeof(struct btrfs_root_item)]; 32 | * u64 generation; 33 | * 34 | * read_extent_buffer(extent_buffer, btrfs_item_ptr_nr(extent_buffer, slot), 35 | * buffer, sizeof(struct btrfs_root_item)); 36 | * 37 | * Even in this project we don't need the complex page opeartions in the kernel, 38 | * the accessors interface is kept the same as kernel intentionally, to allow 39 | * btrfs developers to quickly switch between this and Linux kernel. 40 | */ 41 | 42 | struct extent_buffer { 43 | /* Cached result of btrfs_header::bytenr */ 44 | u64 start; 45 | 46 | /* The same value as the nodesize of the fs */ 47 | u32 len; 48 | 49 | /* Extra runtime flags */ 50 | u32 flags; 51 | struct btrfs_fs_info *fs_info; 52 | int refs; 53 | struct rb_node node; 54 | char data[]; 55 | }; 56 | 57 | static inline void read_extent_buffer(const struct extent_buffer *eb, 58 | void *dst, unsigned start, unsigned len) 59 | { 60 | memcpy(dst, eb->data + start, len); 61 | } 62 | 63 | static inline int memcmp_extent_buffer(const struct extent_buffer *eb, 64 | const void *src, unsigned start, 65 | unsigned len) 66 | { 67 | return memcmp(eb->data + start, src, len); 68 | } 69 | 70 | #define BTRFS_GET_HEADER_FUNCS(name, type, member, bits) \ 71 | static inline u##bits btrfs_##name(const struct extent_buffer *eb) \ 72 | { \ 73 | const struct btrfs_header *h = (struct btrfs_header *)eb->data; \ 74 | return le##bits##_to_cpu(h->member); \ 75 | } 76 | 77 | #define BTRFS_GET_FUNCS(name, type, member, bits) \ 78 | static inline u##bits btrfs_##name(const struct extent_buffer *eb, \ 79 | const type *s) \ 80 | { \ 81 | unsigned long offset = (unsigned long)s; \ 82 | const type *p = (type *) (eb->data + offset); \ 83 | return get_unaligned_le##bits(&p->member); \ 84 | } 85 | 86 | #define BTRFS_GET_STACK_FUNCS(name, type, member, bits) \ 87 | static inline u##bits btrfs_##name(const type *s) \ 88 | { \ 89 | return le##bits##_to_cpu(s->member); \ 90 | } 91 | 92 | #define read_eb_member(eb, ptr, type, member, result) ( \ 93 | read_extent_buffer(eb, (char *)(result), \ 94 | ((unsigned long)(ptr)) + \ 95 | offsetof(type, member), \ 96 | sizeof(((type *)0)->member))) 97 | 98 | /* struct btrfs_dev_item */ 99 | BTRFS_GET_FUNCS(device_total_bytes, struct btrfs_dev_item, total_bytes, 64); 100 | BTRFS_GET_FUNCS(device_bytes_used, struct btrfs_dev_item, bytes_used, 64); 101 | BTRFS_GET_FUNCS(device_id, struct btrfs_dev_item, devid, 64); 102 | BTRFS_GET_FUNCS(device_generation, struct btrfs_dev_item, generation, 64); 103 | 104 | BTRFS_GET_STACK_FUNCS(stack_device_total_bytes, struct btrfs_dev_item, 105 | total_bytes, 64); 106 | BTRFS_GET_STACK_FUNCS(stack_device_bytes_used, struct btrfs_dev_item, 107 | bytes_used, 64); 108 | BTRFS_GET_STACK_FUNCS(stack_device_id, struct btrfs_dev_item, devid, 64); 109 | BTRFS_GET_STACK_FUNCS(stack_device_generation, struct btrfs_dev_item, 110 | generation, 64); 111 | 112 | static inline char *btrfs_device_uuid(struct btrfs_dev_item *d) 113 | { 114 | return (char *)d + offsetof(struct btrfs_dev_item, uuid); 115 | } 116 | 117 | static inline char *btrfs_device_fsid(struct btrfs_dev_item *d) 118 | { 119 | return (char *)d + offsetof(struct btrfs_dev_item, fsid); 120 | } 121 | 122 | /* struct btrfs_chunk */ 123 | BTRFS_GET_FUNCS(chunk_length, struct btrfs_chunk, length, 64); 124 | BTRFS_GET_FUNCS(chunk_stripe_len, struct btrfs_chunk, stripe_len, 64); 125 | BTRFS_GET_FUNCS(chunk_type, struct btrfs_chunk, type, 64); 126 | BTRFS_GET_FUNCS(chunk_num_stripes, struct btrfs_chunk, num_stripes, 16); 127 | BTRFS_GET_FUNCS(chunk_sub_stripes, struct btrfs_chunk, sub_stripes, 16); 128 | BTRFS_GET_FUNCS(stripe_devid, struct btrfs_stripe, devid, 64); 129 | BTRFS_GET_FUNCS(stripe_offset, struct btrfs_stripe, offset, 64); 130 | 131 | static inline char *btrfs_stripe_dev_uuid(struct btrfs_stripe *s) 132 | { 133 | return (char *)s + offsetof(struct btrfs_stripe, dev_uuid); 134 | } 135 | 136 | BTRFS_GET_STACK_FUNCS(stack_chunk_length, struct btrfs_chunk, length, 64); 137 | BTRFS_GET_STACK_FUNCS(stack_chunk_stripe_len, struct btrfs_chunk, 138 | stripe_len, 64); 139 | BTRFS_GET_STACK_FUNCS(stack_chunk_type, struct btrfs_chunk, type, 64); 140 | BTRFS_GET_STACK_FUNCS(stack_chunk_num_stripes, struct btrfs_chunk, 141 | num_stripes, 16); 142 | BTRFS_GET_STACK_FUNCS(stack_chunk_sub_stripes, struct btrfs_chunk, 143 | sub_stripes, 16); 144 | BTRFS_GET_STACK_FUNCS(stack_stripe_devid, struct btrfs_stripe, devid, 64); 145 | BTRFS_GET_STACK_FUNCS(stack_stripe_offset, struct btrfs_stripe, offset, 64); 146 | 147 | static inline struct btrfs_stripe *btrfs_stripe_nr(struct btrfs_chunk *c, 148 | int nr) 149 | { 150 | unsigned long offset = (unsigned long)c; 151 | offset += offsetof(struct btrfs_chunk, stripes); 152 | offset += nr * sizeof(struct btrfs_stripe); 153 | return (struct btrfs_stripe *)offset; 154 | } 155 | 156 | static inline char *btrfs_stripe_dev_uuid_nr(struct btrfs_chunk *c, int nr) 157 | { 158 | return btrfs_stripe_dev_uuid(btrfs_stripe_nr(c, nr)); 159 | } 160 | 161 | static inline u64 btrfs_stripe_offset_nr(struct extent_buffer *eb, 162 | struct btrfs_chunk *c, int nr) 163 | { 164 | return btrfs_stripe_offset(eb, btrfs_stripe_nr(c, nr)); 165 | } 166 | 167 | static inline u64 btrfs_stripe_devid_nr(struct extent_buffer *eb, 168 | struct btrfs_chunk *c, int nr) 169 | { 170 | return btrfs_stripe_devid(eb, btrfs_stripe_nr(c, nr)); 171 | } 172 | 173 | /* struct btrfs_inode_item */ 174 | BTRFS_GET_FUNCS(inode_generation, struct btrfs_inode_item, generation, 64); 175 | BTRFS_GET_FUNCS(inode_transid, struct btrfs_inode_item, transid, 64); 176 | BTRFS_GET_FUNCS(inode_size, struct btrfs_inode_item, size, 64); 177 | BTRFS_GET_FUNCS(inode_nbytes, struct btrfs_inode_item, nbytes, 64); 178 | BTRFS_GET_FUNCS(inode_nlink, struct btrfs_inode_item, nlink, 32); 179 | BTRFS_GET_FUNCS(inode_uid, struct btrfs_inode_item, uid, 32); 180 | BTRFS_GET_FUNCS(inode_gid, struct btrfs_inode_item, gid, 32); 181 | BTRFS_GET_FUNCS(inode_mode, struct btrfs_inode_item, mode, 32); 182 | BTRFS_GET_FUNCS(inode_flags, struct btrfs_inode_item, flags, 64); 183 | 184 | BTRFS_GET_STACK_FUNCS(stack_inode_generation, 185 | struct btrfs_inode_item, generation, 64); 186 | BTRFS_GET_STACK_FUNCS(stack_inode_transid, 187 | struct btrfs_inode_item, transid, 64); 188 | BTRFS_GET_STACK_FUNCS(stack_inode_size, 189 | struct btrfs_inode_item, size, 64); 190 | BTRFS_GET_STACK_FUNCS(stack_inode_nbytes, 191 | struct btrfs_inode_item, nbytes, 64); 192 | BTRFS_GET_STACK_FUNCS(stack_inode_nlink, 193 | struct btrfs_inode_item, nlink, 32); 194 | BTRFS_GET_STACK_FUNCS(stack_inode_uid, 195 | struct btrfs_inode_item, uid, 32); 196 | BTRFS_GET_STACK_FUNCS(stack_inode_gid, 197 | struct btrfs_inode_item, gid, 32); 198 | BTRFS_GET_STACK_FUNCS(stack_inode_mode, 199 | struct btrfs_inode_item, mode, 32); 200 | BTRFS_GET_STACK_FUNCS(stack_inode_flags, 201 | struct btrfs_inode_item, flags, 64); 202 | 203 | static inline struct btrfs_timespec * 204 | btrfs_inode_atime(struct btrfs_inode_item *inode_item) 205 | { 206 | unsigned long ptr = (unsigned long)inode_item; 207 | ptr += offsetof(struct btrfs_inode_item, atime); 208 | return (struct btrfs_timespec *)ptr; 209 | } 210 | 211 | static inline struct btrfs_timespec * 212 | btrfs_inode_mtime(struct btrfs_inode_item *inode_item) 213 | { 214 | unsigned long ptr = (unsigned long)inode_item; 215 | ptr += offsetof(struct btrfs_inode_item, mtime); 216 | return (struct btrfs_timespec *)ptr; 217 | } 218 | 219 | static inline struct btrfs_timespec * 220 | btrfs_inode_ctime(struct btrfs_inode_item *inode_item) 221 | { 222 | unsigned long ptr = (unsigned long)inode_item; 223 | ptr += offsetof(struct btrfs_inode_item, ctime); 224 | return (struct btrfs_timespec *)ptr; 225 | } 226 | 227 | static inline struct btrfs_timespec * 228 | btrfs_inode_otime(struct btrfs_inode_item *inode_item) 229 | { 230 | unsigned long ptr = (unsigned long)inode_item; 231 | ptr += offsetof(struct btrfs_inode_item, otime); 232 | return (struct btrfs_timespec *)ptr; 233 | } 234 | 235 | BTRFS_GET_FUNCS(timespec_sec, struct btrfs_timespec, sec, 64); 236 | BTRFS_GET_FUNCS(timespec_nsec, struct btrfs_timespec, nsec, 32); 237 | BTRFS_GET_STACK_FUNCS(stack_timespec_sec, struct btrfs_timespec, sec, 64); 238 | BTRFS_GET_STACK_FUNCS(stack_timespec_nsec, struct btrfs_timespec, nsec, 32); 239 | 240 | /* struct btrfs_node */ 241 | BTRFS_GET_FUNCS(key_blockptr, struct btrfs_key_ptr, blockptr, 64); 242 | BTRFS_GET_FUNCS(key_generation, struct btrfs_key_ptr, generation, 64); 243 | 244 | static inline u64 btrfs_node_blockptr(struct extent_buffer *eb, int nr) 245 | { 246 | unsigned long ptr; 247 | ptr = offsetof(struct btrfs_node, ptrs) + 248 | sizeof(struct btrfs_key_ptr) * nr; 249 | return btrfs_key_blockptr(eb, (struct btrfs_key_ptr *)ptr); 250 | } 251 | 252 | static inline u64 btrfs_node_ptr_generation(struct extent_buffer *eb, int nr) 253 | { 254 | unsigned long ptr; 255 | ptr = offsetof(struct btrfs_node, ptrs) + 256 | sizeof(struct btrfs_key_ptr) * nr; 257 | return btrfs_key_generation(eb, (struct btrfs_key_ptr *)ptr); 258 | } 259 | static inline unsigned long btrfs_node_key_ptr_offset(int nr) 260 | { 261 | return offsetof(struct btrfs_node, ptrs) + 262 | sizeof(struct btrfs_key_ptr) * nr; 263 | } 264 | 265 | static inline void btrfs_node_key(struct extent_buffer *eb, 266 | struct btrfs_disk_key *disk_key, int nr) 267 | { 268 | unsigned long ptr; 269 | ptr = btrfs_node_key_ptr_offset(nr); 270 | read_eb_member(eb, (struct btrfs_key_ptr *)ptr, 271 | struct btrfs_key_ptr, key, disk_key); 272 | } 273 | 274 | /* struct btrfs_item */ 275 | BTRFS_GET_FUNCS(item_offset, struct btrfs_item, offset, 32); 276 | BTRFS_GET_FUNCS(item_size, struct btrfs_item, size, 32); 277 | 278 | static inline unsigned long btrfs_item_nr_offset(int nr) 279 | { 280 | return offsetof(struct btrfs_leaf, items) + 281 | sizeof(struct btrfs_item) * nr; 282 | } 283 | 284 | static inline struct btrfs_item *btrfs_item_nr(int nr) 285 | { 286 | return (struct btrfs_item *)btrfs_item_nr_offset(nr); 287 | } 288 | 289 | static inline u32 btrfs_item_end(struct extent_buffer *eb, 290 | struct btrfs_item *item) 291 | { 292 | return btrfs_item_offset(eb, item) + btrfs_item_size(eb, item); 293 | } 294 | 295 | static inline u32 btrfs_item_end_nr(struct extent_buffer *eb, int nr) 296 | { 297 | return btrfs_item_end(eb, btrfs_item_nr(nr)); 298 | } 299 | 300 | static inline u32 btrfs_item_offset_nr(const struct extent_buffer *eb, int nr) 301 | { 302 | return btrfs_item_offset(eb, btrfs_item_nr(nr)); 303 | } 304 | 305 | static inline u32 btrfs_item_size_nr(struct extent_buffer *eb, int nr) 306 | { 307 | return btrfs_item_size(eb, btrfs_item_nr(nr)); 308 | } 309 | 310 | static inline void btrfs_item_key(struct extent_buffer *eb, 311 | struct btrfs_disk_key *disk_key, int nr) 312 | { 313 | struct btrfs_item *item = btrfs_item_nr(nr); 314 | read_eb_member(eb, item, struct btrfs_item, key, disk_key); 315 | } 316 | 317 | #define btrfs_item_ptr(leaf, slot, type) \ 318 | ((type *)(btrfs_leaf_data(leaf) + btrfs_item_offset_nr(leaf, slot))) 319 | 320 | #define btrfs_item_ptr_offset(leaf, slot) \ 321 | ((u32)(btrfs_leaf_data(leaf) + btrfs_item_offset_nr(leaf, slot))) 322 | 323 | /* struct btrfs_dir_item */ 324 | BTRFS_GET_FUNCS(dir_data_len, struct btrfs_dir_item, data_len, 16); 325 | BTRFS_GET_FUNCS(dir_type, struct btrfs_dir_item, type, 8); 326 | BTRFS_GET_FUNCS(dir_name_len, struct btrfs_dir_item, name_len, 16); 327 | BTRFS_GET_FUNCS(dir_transid, struct btrfs_dir_item, transid, 64); 328 | 329 | BTRFS_GET_STACK_FUNCS(stack_dir_data_len, struct btrfs_dir_item, data_len, 16); 330 | BTRFS_GET_STACK_FUNCS(stack_dir_type, struct btrfs_dir_item, type, 8); 331 | BTRFS_GET_STACK_FUNCS(stack_dir_name_len, struct btrfs_dir_item, name_len, 16); 332 | BTRFS_GET_STACK_FUNCS(stack_dir_transid, struct btrfs_dir_item, transid, 64); 333 | 334 | static inline void btrfs_dir_item_key(struct extent_buffer *eb, 335 | struct btrfs_dir_item *item, 336 | struct btrfs_disk_key *key) 337 | { 338 | read_eb_member(eb, item, struct btrfs_dir_item, location, key); 339 | } 340 | 341 | /* struct btrfs_disk_key */ 342 | BTRFS_GET_STACK_FUNCS(disk_key_objectid, struct btrfs_disk_key, 343 | objectid, 64); 344 | BTRFS_GET_STACK_FUNCS(disk_key_offset, struct btrfs_disk_key, offset, 64); 345 | BTRFS_GET_STACK_FUNCS(disk_key_type, struct btrfs_disk_key, type, 8); 346 | 347 | static inline void btrfs_disk_key_to_cpu(struct btrfs_key *cpu, 348 | struct btrfs_disk_key *disk) 349 | { 350 | cpu->offset = le64_to_cpu(disk->offset); 351 | cpu->type = disk->type; 352 | cpu->objectid = le64_to_cpu(disk->objectid); 353 | } 354 | 355 | static inline void btrfs_cpu_key_to_disk(struct btrfs_disk_key *disk, 356 | const struct btrfs_key *cpu) 357 | { 358 | disk->offset = cpu_to_le64(cpu->offset); 359 | disk->type = cpu->type; 360 | disk->objectid = cpu_to_le64(cpu->objectid); 361 | } 362 | 363 | static inline void btrfs_node_key_to_cpu(struct extent_buffer *eb, 364 | struct btrfs_key *key, int nr) 365 | { 366 | struct btrfs_disk_key disk_key; 367 | btrfs_node_key(eb, &disk_key, nr); 368 | btrfs_disk_key_to_cpu(key, &disk_key); 369 | } 370 | 371 | static inline void btrfs_item_key_to_cpu(struct extent_buffer *eb, 372 | struct btrfs_key *key, int nr) 373 | { 374 | struct btrfs_disk_key disk_key; 375 | btrfs_item_key(eb, &disk_key, nr); 376 | btrfs_disk_key_to_cpu(key, &disk_key); 377 | } 378 | 379 | static inline void btrfs_dir_item_key_to_cpu(struct extent_buffer *eb, 380 | struct btrfs_dir_item *item, 381 | struct btrfs_key *key) 382 | { 383 | struct btrfs_disk_key disk_key; 384 | btrfs_dir_item_key(eb, item, &disk_key); 385 | btrfs_disk_key_to_cpu(key, &disk_key); 386 | } 387 | 388 | /* struct btrfs_header */ 389 | BTRFS_GET_HEADER_FUNCS(header_bytenr, struct btrfs_header, bytenr, 64); 390 | BTRFS_GET_HEADER_FUNCS(header_generation, struct btrfs_header, 391 | generation, 64); 392 | BTRFS_GET_HEADER_FUNCS(header_owner, struct btrfs_header, owner, 64); 393 | BTRFS_GET_HEADER_FUNCS(header_nritems, struct btrfs_header, nritems, 32); 394 | BTRFS_GET_HEADER_FUNCS(header_flags, struct btrfs_header, flags, 64); 395 | BTRFS_GET_HEADER_FUNCS(header_level, struct btrfs_header, level, 8); 396 | BTRFS_GET_STACK_FUNCS(stack_header_bytenr, struct btrfs_header, bytenr, 64); 397 | BTRFS_GET_STACK_FUNCS(stack_header_nritems, struct btrfs_header, nritems, 398 | 32); 399 | BTRFS_GET_STACK_FUNCS(stack_header_owner, struct btrfs_header, owner, 64); 400 | BTRFS_GET_STACK_FUNCS(stack_header_generation, struct btrfs_header, 401 | generation, 64); 402 | 403 | static inline int btrfs_header_flag(struct extent_buffer *eb, u64 flag) 404 | { 405 | return (btrfs_header_flags(eb) & flag) == flag; 406 | } 407 | 408 | static inline unsigned long btrfs_header_fsid(void) 409 | { 410 | return offsetof(struct btrfs_header, fsid); 411 | } 412 | 413 | static inline unsigned long btrfs_header_chunk_tree_uuid(struct extent_buffer *eb) 414 | { 415 | return offsetof(struct btrfs_header, chunk_tree_uuid); 416 | } 417 | 418 | static inline u8 *btrfs_header_csum(struct extent_buffer *eb) 419 | { 420 | unsigned long ptr = offsetof(struct btrfs_header, csum); 421 | return (u8 *)ptr; 422 | } 423 | 424 | static inline int btrfs_is_leaf(struct extent_buffer *eb) 425 | { 426 | return (btrfs_header_level(eb) == 0); 427 | } 428 | 429 | /* struct btrfs_root_item */ 430 | BTRFS_GET_FUNCS(disk_root_generation, struct btrfs_root_item, 431 | generation, 64); 432 | BTRFS_GET_FUNCS(disk_root_bytenr, struct btrfs_root_item, bytenr, 64); 433 | BTRFS_GET_FUNCS(disk_root_level, struct btrfs_root_item, level, 8); 434 | 435 | BTRFS_GET_STACK_FUNCS(root_generation, struct btrfs_root_item, 436 | generation, 64); 437 | BTRFS_GET_STACK_FUNCS(root_bytenr, struct btrfs_root_item, bytenr, 64); 438 | BTRFS_GET_STACK_FUNCS(root_level, struct btrfs_root_item, level, 8); 439 | BTRFS_GET_STACK_FUNCS(root_flags, struct btrfs_root_item, flags, 64); 440 | BTRFS_GET_STACK_FUNCS(root_dirid, struct btrfs_root_item, root_dirid, 64); 441 | 442 | /* 443 | * struct btrfs_super_block 444 | * 445 | * Since super block is not accessed inside an extent_buffer, thus only 446 | * stack version accessors are provided. 447 | */ 448 | BTRFS_GET_STACK_FUNCS(super_bytenr, struct btrfs_super_block, bytenr, 64); 449 | BTRFS_GET_STACK_FUNCS(super_flags, struct btrfs_super_block, flags, 64); 450 | BTRFS_GET_STACK_FUNCS(super_generation, struct btrfs_super_block, 451 | generation, 64); 452 | BTRFS_GET_STACK_FUNCS(super_root, struct btrfs_super_block, root, 64); 453 | BTRFS_GET_STACK_FUNCS(super_sys_array_size, struct btrfs_super_block, 454 | sys_chunk_array_size, 32); 455 | BTRFS_GET_STACK_FUNCS(super_chunk_root_generation, struct btrfs_super_block, 456 | chunk_root_generation, 64); 457 | BTRFS_GET_STACK_FUNCS(super_root_level, struct btrfs_super_block, root_level, 458 | 8); 459 | BTRFS_GET_STACK_FUNCS(super_chunk_root, struct btrfs_super_block, chunk_root, 460 | 64); 461 | BTRFS_GET_STACK_FUNCS(super_chunk_root_level, struct btrfs_super_block, 462 | chunk_root_level, 8); 463 | BTRFS_GET_STACK_FUNCS(super_log_root, struct btrfs_super_block, log_root, 64); 464 | BTRFS_GET_STACK_FUNCS(super_log_root_transid, struct btrfs_super_block, 465 | log_root_transid, 64); 466 | BTRFS_GET_STACK_FUNCS(super_log_root_level, struct btrfs_super_block, 467 | log_root_level, 8); 468 | BTRFS_GET_STACK_FUNCS(super_total_bytes, struct btrfs_super_block, total_bytes, 469 | 64); 470 | BTRFS_GET_STACK_FUNCS(super_bytes_used, struct btrfs_super_block, bytes_used, 471 | 64); 472 | BTRFS_GET_STACK_FUNCS(super_sectorsize, struct btrfs_super_block, sectorsize, 473 | 32); 474 | BTRFS_GET_STACK_FUNCS(super_nodesize, struct btrfs_super_block, nodesize, 32); 475 | BTRFS_GET_STACK_FUNCS(super_num_devices, struct btrfs_super_block, num_devices, 476 | 64); 477 | BTRFS_GET_STACK_FUNCS(super_compat_flags, struct btrfs_super_block, 478 | compat_flags, 64); 479 | BTRFS_GET_STACK_FUNCS(super_compat_ro_flags, struct btrfs_super_block, 480 | compat_ro_flags, 64); 481 | BTRFS_GET_STACK_FUNCS(super_incompat_flags, struct btrfs_super_block, 482 | incompat_flags, 64); 483 | BTRFS_GET_STACK_FUNCS(super_csum_type, struct btrfs_super_block, csum_type, 16); 484 | BTRFS_GET_STACK_FUNCS(super_magic, struct btrfs_super_block, magic, 64); 485 | 486 | static inline unsigned long btrfs_leaf_data(struct extent_buffer *l) 487 | { 488 | return offsetof(struct btrfs_leaf, items); 489 | } 490 | 491 | /* struct btrfs_file_extent_item */ 492 | BTRFS_GET_FUNCS(file_extent_type, struct btrfs_file_extent_item, type, 8); 493 | BTRFS_GET_STACK_FUNCS(stack_file_extent_type, struct btrfs_file_extent_item, type, 8); 494 | 495 | static inline unsigned long btrfs_file_extent_inline_start(struct 496 | btrfs_file_extent_item *e) 497 | { 498 | unsigned long offset = (unsigned long)e; 499 | offset += offsetof(struct btrfs_file_extent_item, disk_bytenr); 500 | return offset; 501 | } 502 | 503 | static inline u32 btrfs_file_extent_calc_inline_size(u32 datasize) 504 | { 505 | return offsetof(struct btrfs_file_extent_item, disk_bytenr) + datasize; 506 | } 507 | 508 | #define BTRFS_FILE_EXTENT_INLINE_DATA_START \ 509 | (offsetof(struct btrfs_file_extent_item, disk_bytenr)) 510 | 511 | static inline u32 512 | btrfs_file_extent_inline_item_len(const struct extent_buffer *eb, 513 | struct btrfs_item *e) 514 | { 515 | return btrfs_item_size(eb, e) - BTRFS_FILE_EXTENT_INLINE_DATA_START; 516 | } 517 | 518 | BTRFS_GET_FUNCS(file_extent_disk_bytenr, struct btrfs_file_extent_item, 519 | disk_bytenr, 64); 520 | BTRFS_GET_FUNCS(file_extent_generation, struct btrfs_file_extent_item, 521 | generation, 64); 522 | BTRFS_GET_FUNCS(file_extent_disk_num_bytes, struct btrfs_file_extent_item, 523 | disk_num_bytes, 64); 524 | BTRFS_GET_FUNCS(file_extent_offset, struct btrfs_file_extent_item, 525 | offset, 64); 526 | BTRFS_GET_FUNCS(file_extent_num_bytes, struct btrfs_file_extent_item, 527 | num_bytes, 64); 528 | BTRFS_GET_FUNCS(file_extent_ram_bytes, struct btrfs_file_extent_item, 529 | ram_bytes, 64); 530 | BTRFS_GET_FUNCS(file_extent_compression, struct btrfs_file_extent_item, 531 | compression, 8); 532 | BTRFS_GET_STACK_FUNCS(stack_file_extent_compression, struct btrfs_file_extent_item, 533 | compression, 8); 534 | BTRFS_GET_STACK_FUNCS(stack_file_extent_ram_bytes, struct btrfs_file_extent_item, 535 | ram_bytes, 64); 536 | BTRFS_GET_STACK_FUNCS(stack_file_extent_num_bytes, struct btrfs_file_extent_item, 537 | num_bytes, 64); 538 | BTRFS_GET_STACK_FUNCS(stack_file_extent_offset, struct btrfs_file_extent_item, 539 | offset, 64); 540 | BTRFS_GET_STACK_FUNCS(stack_file_extent_generation, struct btrfs_file_extent_item, 541 | generation, 64); 542 | BTRFS_GET_STACK_FUNCS(stack_file_extent_disk_bytenr, struct btrfs_file_extent_item, 543 | disk_bytenr, 64); 544 | 545 | u16 btrfs_super_csum_size(const struct btrfs_super_block *sb); 546 | size_t btrfs_super_num_csums(void); 547 | #endif 548 | --------------------------------------------------------------------------------