├── .gitignore ├── Makefile ├── README.md ├── _pr.h ├── a_list.h ├── b-minmax.h ├── common.zus.mk ├── dot-config.sample ├── fs-loader.c ├── fs ├── foofs │ ├── Makefile │ └── foofs.c └── toyfs │ ├── .gitignore │ ├── Makefile │ ├── common.c │ ├── dir.c │ ├── file.c │ ├── inode.c │ ├── mkfs.c │ ├── mmap.c │ ├── namei.c │ ├── super.c │ ├── symlink.c │ ├── toyfs.h │ └── xattr.c ├── iom_enc.h ├── main.c ├── md_zus.c ├── module.c ├── movnt.h ├── nvml_movnt.c ├── pa.c ├── pkg ├── create_pkg.sh ├── install.sh ├── post_install.sh ├── post_uninstall.sh ├── pre_uninstall.sh ├── zusd.helper └── zusd.service ├── printz.c ├── printz.h ├── signals.c ├── slab.c ├── utils.c ├── wtz.h ├── zuf_call.h ├── zus-core.c ├── zus-vfs.c ├── zus.h ├── zus_ddbg.ld ├── zusd.h ├── zusd.mk ├── zuslib.mk └── zusmodule.mk /.gitignore: -------------------------------------------------------------------------------- 1 | .dependencies 2 | *.kdev4 3 | .config 4 | .nfs* 5 | .*.swp 6 | 7 | # Headers lineked (ln -s) from Kernel 8 | md_def.h 9 | md.h 10 | zus_api.h 11 | linux/stat.h 12 | 13 | # targets 14 | b_io 15 | zusd 16 | 17 | # ignore external FS(s) linked under fs/ (except local ones) 18 | !fs/ 19 | fs/* 20 | !fs/Makefile 21 | !fs/foofs 22 | !fs/toyfs 23 | 24 | # common ignores 25 | objs/ 26 | *.so 27 | *.a 28 | *.rpm 29 | 30 | # tags 31 | cscope.* 32 | # eclipse 33 | .cproject 34 | .project 35 | .settings/ 36 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: BSD-3-Clause 2 | # 3 | # Makefile for the zus user-mode application 4 | # 5 | # Copyright (C) 2018 NetApp, Inc. All rights reserved. 6 | # 7 | # See module.c for LICENSE details. 8 | # 9 | # Authors: 10 | # Omer Caspi 11 | ifeq ($(M),) 12 | -include .config 13 | MAKEFLAGS := --no-print-directory 14 | LIBFS_DIRS := $(addprefix fs/,$(CONFIG_LIBFS_MODULES)) 15 | LIBFS_CLEAN := $(addprefix clean_,$(CONFIG_LIBFS_MODULES)) 16 | core: 17 | @echo "Building ZUS library" 18 | @$(MAKE) -f zuslib.mk 19 | @echo "Building ZUS daemon" 20 | @$(MAKE) -f zusd.mk 21 | 22 | clean: $(LIBFS_CLEAN) 23 | @echo "Cleaning ZUS core" 24 | @$(MAKE) -f zuslib.mk __clean 25 | @$(MAKE) -f zusd.mk __clean 26 | 27 | $(CONFIG_LIBFS_MODULES): core 28 | @echo "Building $@ ZUS FS module" 29 | @$(MAKE) M=fs/$@ -C $(CURDIR) module 30 | 31 | clean_%: 32 | $(eval NAME := $(patsubst clean_%,%,$(@))) 33 | @echo "Cleaning $(NAME) ZUS FS module" 34 | @$(MAKE) M=fs/$(NAME) -C $(CURDIR) module_clean 35 | 36 | all: core $(CONFIG_LIBFS_MODULES) 37 | 38 | install: 39 | pkg/install.sh 40 | rpm deb: 41 | pkg/create_pkg.sh $@ 42 | cscope: 43 | find . -type f -name '*.[c|h]' > cscope.files 44 | find . -type l -name '*.[c|h]' -exec realpath \ 45 | --relative-to=$(CURDIR) '{}' \; >> cscope.files 46 | cscope -bcqR 47 | 48 | .PHONY: install rpm deb all clean cscope 49 | .NOTPARALLEL: 50 | .DEFAULT_GOAL := all 51 | else 52 | include zusmodule.mk 53 | endif 54 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ZUFS: Zero-copy User-space File-System 2 | 3 | * ZUS: [ZUFS Server (user-space)](https://github.com/NetApp/zufs-zus) 4 | 5 | * ZUF: [ZUFS Feeder (kernel module)](https://github.com/NetApp/zufs-zuf) 6 | 7 | 8 | -------------------------------------------------------------------------------- /_pr.h: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: BSD-3-Clause */ 2 | /* 3 | * zuf_call.c - C Wrappers over the ZUFS_IOCTL Api 4 | * 5 | * Copyright (c) 2018 NetApp, Inc. All rights reserved. 6 | * 7 | * See module.c for LICENSE details. 8 | * 9 | * Authors: 10 | * Boaz Harrosh 11 | */ 12 | #ifndef ___PR_H__ 13 | #define ___PR_H__ 14 | 15 | #include 16 | #include 17 | 18 | #include "printz.h" 19 | 20 | /* FIXME*/ 21 | #define ERROR(fmt, a...) \ 22 | fprintf(stderr, LOG_STR(LOG_ERR) \ 23 | "zus: [%s:%d]: " fmt, __func__, __LINE__, ##a) 24 | #define INFO(fmt, a...) \ 25 | fprintf(stderr, LOG_STR(LOG_INFO) "zus: ~info~ " fmt, ##a) 26 | 27 | extern ulong g_DBGMASK; 28 | #define ZUS_DBGPRNT (g_DBGMASK & 1) 29 | 30 | #define DBG(fmt, a...) \ 31 | do { if (unlikely(ZUS_DBGPRNT)) \ 32 | fprintf(stderr, LOG_STR(LOG_INFO) \ 33 | "zus: [%s:%d]: " fmt, __func__, __LINE__, ##a); \ 34 | } while (0) 35 | 36 | #define DBGCONT(fmt, a...) \ 37 | do { if (unlikely(ZUS_DBGPRNT)) fprintf(stderr, fmt, ##a); } while (0) 38 | 39 | #define md_dbg_err DBG 40 | #define md_warn_cnd(silent, s, args ...) \ 41 | do {if (!silent) \ 42 | fprintf(stderr, LOG_STR(LOG_WARNING) \ 43 | "md-zus: [%s:%d] " s, __func__, __LINE__, ## args); \ 44 | } while (0) 45 | 46 | #endif /* define ___PR_H__ */ 47 | -------------------------------------------------------------------------------- /a_list.h: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: BSD-3-Clause */ 2 | /* 3 | * Simple double-linked list 4 | * 5 | * Copyright (c) 2018 NetApp, Inc. All rights reserved. 6 | * 7 | * See module.c for LICENSE details. 8 | * 9 | * Authors: 10 | * Shachar Sharon 11 | * Sagi Manole 12 | */ 13 | #ifndef _LINUX_ZUFS_A_LIST_H 14 | #define _LINUX_ZUFS_A_LIST_H 15 | 16 | #ifndef _GNU_SOURCE 17 | #define _GNU_SOURCE 18 | #endif 19 | 20 | #include 21 | //#include 22 | #include 23 | #include 24 | //#include 25 | //#include 26 | //#include 27 | 28 | struct a_list_head { 29 | struct a_list_head *prev; 30 | struct a_list_head *next; 31 | }; 32 | 33 | static inline void a_list_init(struct a_list_head *list) 34 | { 35 | list->next = list; 36 | list->prev = list; 37 | } 38 | 39 | static inline void _link_list(struct a_list_head *list, 40 | struct a_list_head *prev, 41 | struct a_list_head *next) 42 | { 43 | next->prev = list; 44 | list->next = next; 45 | list->prev = prev; 46 | prev->next = list; 47 | } 48 | 49 | static inline void a_list_add(struct a_list_head *list, 50 | struct a_list_head *head) 51 | { 52 | _link_list(list, head, head->next); 53 | } 54 | 55 | static inline void a_list_add_tail(struct a_list_head *list, 56 | struct a_list_head *head) 57 | { 58 | _link_list(list, head->prev, head); 59 | } 60 | 61 | static inline void a_list_del(struct a_list_head *list) 62 | { 63 | list->next->prev = list->prev; 64 | list->prev->next = list->next; 65 | } 66 | 67 | static inline void a_list_del_init(struct a_list_head *list) 68 | { 69 | a_list_del(list); 70 | a_list_init(list); 71 | } 72 | 73 | static inline int a_list_empty(const struct a_list_head *head) 74 | { 75 | return (head->next == head); 76 | } 77 | 78 | #ifndef container_of 79 | #define container_of(ptr, type, member) ({ \ 80 | (type *)((void *)ptr - offsetof(type, member)); }) 81 | #endif /* container_of */ 82 | 83 | #define a_list_first_entry(ptr, type, member) \ 84 | container_of((ptr)->next, type, member) 85 | 86 | #define a_list_next_entry(pos, member) \ 87 | container_of((pos)->member.next, typeof(*(pos)), member) 88 | 89 | #define a_list_for_each_entry(pos, head, member) \ 90 | for (pos = a_list_first_entry(head, typeof(*pos), member); \ 91 | &pos->member != (head); \ 92 | pos = a_list_next_entry(pos, member)) 93 | 94 | #endif /* _LINUX_ZUFS_A_LIST_H */ 95 | -------------------------------------------------------------------------------- /b-minmax.h: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: BSD-3-Clause */ 2 | /* 3 | * Copyright (c) 2018 NetApp, Inc. All rights reserved. 4 | * 5 | * See module.c for LICENSE details. 6 | * 7 | * min()/max() macros that also do 8 | * strict type-checking.. See the 9 | * "unnecessary" pointer comparison. 10 | */ 11 | #define ___PASTE(a, b) a##b 12 | #define __PASTE(a, b) ___PASTE(a, b) 13 | 14 | #ifndef __UNIQUE_ID 15 | # define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __LINE__) 16 | #endif 17 | 18 | #define __min(t1, t2, min1, min2, x, y) ({ \ 19 | t1 min1 = (x); \ 20 | t2 min2 = (y); \ 21 | (void) (&min1 == &min2); \ 22 | min1 < min2 ? min1 : min2; }) 23 | #define min(x, y) \ 24 | __min(typeof(x), typeof(y), \ 25 | __UNIQUE_ID(min1_), __UNIQUE_ID(min2_), \ 26 | x, y) 27 | 28 | #define __max(t1, t2, max1, max2, x, y) ({ \ 29 | t1 max1 = (x); \ 30 | t2 max2 = (y); \ 31 | (void) (&max1 == &max2); \ 32 | max1 > max2 ? max1 : max2; }) 33 | #define max(x, y) \ 34 | __max(typeof(x), typeof(y), \ 35 | __UNIQUE_ID(max1_), __UNIQUE_ID(max2_), \ 36 | x, y) 37 | 38 | #define min_t(type, x, y) \ 39 | __min(type, type, \ 40 | __UNIQUE_ID(min1_), __UNIQUE_ID(min2_), \ 41 | x, y) 42 | 43 | #define max_t(type, x, y) \ 44 | __max(type, type, \ 45 | __UNIQUE_ID(min1_), __UNIQUE_ID(min2_), \ 46 | x, y) 47 | -------------------------------------------------------------------------------- /common.zus.mk: -------------------------------------------------------------------------------- 1 | ZDIR := $(dir $(lastword $(MAKEFILE_LIST))) 2 | CONFIG := $(ZDIR).config 3 | ifeq ($(M),) 4 | PROJ_DIR := $(ZDIR) 5 | BUILD_STR := Z 6 | else 7 | PROJ_DIR := $(M)/ 8 | BUILD_STR := M 9 | endif 10 | 11 | OBJS_DIR := $(PROJ_DIR)objs 12 | OBJS := $(addprefix $(OBJS_DIR)/, $(strip $(PROJ_OBJS))) 13 | OBJS_DEPS := $(OBJS:.o=.d) 14 | 15 | ifeq ($(PROJ_NAME),) 16 | $(error no name for project) 17 | endif 18 | 19 | ifeq ($(PROJ_LANG),) 20 | PROJ_LANG=C 21 | endif 22 | 23 | ifeq ($(filter $(PROJ_LANG),C CPP),) 24 | $(error Unknown ZUS project languege $(PROJ_LANG)) 25 | endif 26 | 27 | -include $(CONFIG) 28 | 29 | # What to warn about 30 | CWARNS := error all write-strings undef cast-qual missing-declarations 31 | CWARNS += cast-align extra unused shadow float-equal comment sign-compare 32 | CWARNS += address redundant-decls missing-include-dirs unknown-pragmas 33 | CWARNS += parentheses sequence-point unused-macros endif-labels 34 | CWARNS += overlength-strings unreachable-code missing-field-initializers 35 | CWARNS += aggregate-return init-self switch-default switch switch-enum 36 | CWARNS += frame-larger-than=4096 larger-than=4096 37 | 38 | ifeq ($(PROJ_LANG), C) 39 | CWARNS += missing-prototypes nested-externs bad-function-cast 40 | CWARNS += old-style-definition strict-prototypes declaration-after-statement 41 | endif 42 | 43 | CWARNS += $(PROJ_WARNS) 44 | # Turn off some warnings 45 | CWARNS += no-unused-parameter no-missing-field-initializers 46 | # Turn off clang-specific warnings we don't care about 47 | ifeq ($(CC),clang) 48 | CWARNS += no-gnu-variable-sized-type-not-at-end 49 | CWARNS += no-address-of-packed-member 50 | CWARNS += no-cast-align 51 | CWARNS += no-unused-function 52 | endif 53 | 54 | ifeq ($(CONFIG_PEDANTIC),1) 55 | CWARNS += format=2 sign-conversion conversion 56 | CWARNS += strict-prototypes old-style-definition 57 | CWARNS += pointer-arith 58 | endif 59 | CWARNS := -W $(addprefix -W,$(CWARNS)) 60 | 61 | CDEFS += $(CONFIG_GLOBAL_CDEFS) $(PROJ_CDEFS) 62 | CDEFS := $(addprefix -D,$(CDEFS)) 63 | 64 | INCLUDES := $(realpath $(CONFIG_GLOBAL_INCLUDES) $(PROJ_INCLUDES)) 65 | INCLUDES := $(addprefix -I,$(INCLUDES)) 66 | 67 | CFLAGS := -MMD $(CONFIG_GLOBAL_CFLAGS) $(PROJ_CFLAGS) 68 | ifeq ($(DEBUG), 1) 69 | CFLAGS += -g -ggdb 70 | endif 71 | # gcc optimization for now also debug with -O2 like 72 | # in Kernel (Can override in CONFIG_OPTIMIZE_FLAGS = ) 73 | ifdef CONFIG_OPTIMIZE_LEVEL 74 | CFLAGS += -O$(CONFIG_OPTIMIZE_LEVEL) 75 | else 76 | CFLAGS += -O2 77 | endif 78 | 79 | ifeq ($(PROJ_LANG),CPP) 80 | CC=g++ 81 | SRC_SUFFIX := cpp 82 | CFLAGS := -std=gnu++0x $(CFLAGS) 83 | else 84 | SRC_SUFFIX := c 85 | CFLAGS := -std=gnu11 $(CFLAGS) 86 | endif 87 | 88 | ifeq ($(CONFIG_PEDANTIC),1) 89 | CFLAGS += -pedantic 90 | endif 91 | 92 | CFLAGS += $(CWARNS) $(CDEFS) $(INCLUDES) 93 | 94 | LIBS := $(CONFIG_GLOBAL_LIBS) $(PROJ_LIBS) 95 | LIB_DIRS := $(realpath $(CONFIG_GLOBAL_LIB_DIRS) $(PROJ_LIB_DIRS)) 96 | LDFLAGS += $(CONFIG_GLOBAL_LDFLAGS) $(PROJ_LDFLAGS) -Wl,--no-undefined 97 | LDFLAGS += $(addprefix -L,$(LIB_DIRS)) 98 | LDFLAGS += $(addprefix -l,$(LIBS)) 99 | 100 | ifeq ($(PROJ_TARGET_TYPE),) 101 | PROJ_TARGET_TYPE := exec 102 | endif 103 | 104 | ifeq ($(filter $(PROJ_TARGET_TYPE),exec lib),) 105 | $(error Invalid project type $(PROJ_TARGET_TYPE)) 106 | endif 107 | 108 | ifeq ($(PROJ_TARGET_TYPE),lib) 109 | TARGET := $(PROJ_DIR)/lib$(PROJ_NAME).so 110 | CFLAGS := -fpic $(CFLAGS) 111 | LDFLAGS := -shared $(LDFLAGS) 112 | else 113 | TARGET := $(PROJ_DIR)/$(PROJ_NAME) 114 | endif 115 | 116 | # =============== common rules ================================================= 117 | PROJ_OBJS_DEPS += $(ZDIR)/Makefile $(ZDIR)/common.zus.mk 118 | ifneq ($(realpath $(CONFIG)),) 119 | PROJ_OBJS_DEPS += $(CONFIG) 120 | endif 121 | 122 | ifneq ($(CONFIG_BUILD_VERBOSE),1) 123 | Q := @ 124 | endif 125 | 126 | $(OBJS_DIR)/%.o: $(PROJ_DIR)%.$(SRC_SUFFIX) $(PROJ_OBJS_DEPS) 127 | @mkdir -p $(dir $@) 128 | $(if $(Q),@echo "CC [$(BUILD_STR)] $(notdir $@)") 129 | $(Q)$(CC) $(CFLAGS) -c $< -o $@ 130 | 131 | $(TARGET): $(TARGET_DEPS) $(OBJS) 132 | $(if $(Q),@echo "LD [$(BUILD_STR)] $(notdir $(TARGET))") 133 | $(Q)$(CC) $(OBJS) $(LDFLAGS) -o $(TARGET) 134 | @echo 135 | 136 | __clean: $(PROJ_CLEAN_DEPS) 137 | @rm -f $(OBJS_DEPS) $(TARGET) $(OBJS) 138 | 139 | -include $(OBJS_DEPS) 140 | 141 | .DEFAULT_GOAL := $(TARGET) 142 | .PHONY: __clean 143 | -------------------------------------------------------------------------------- /dot-config.sample: -------------------------------------------------------------------------------- 1 | # Zufs configuration file, Makefile syntax. 2 | # This file, in turn is included by the top-level Makfile 3 | # 4 | # Copy into ${ZUSDIR}/.config to let top Makefile include it 5 | # 6 | 7 | # Edit for the Kernel you are working with. 8 | # Default: /usr/src/kernel/$(uname -r)/ 9 | ZUF_KERN_DIR=../zuf/ 10 | 11 | # Uncomment in case kernel is built with 'O=.build' 12 | #ZUF_KERN_BUILD_DIR = ../zuf/.build 13 | 14 | # Enable debugging info Master switch 15 | DEBUG=1 16 | 17 | # Enable/disable pedantic flags 18 | CONFIG_PEDANTIC = 0 19 | 20 | # Set optimization level (unset, will default to 2) 21 | CONFIG_OPTIMIZE_LEVEL = 2 22 | 23 | # Try Using Anonymous mmap 24 | CONFIG_TRY_ANON_MMAP = 1 25 | 26 | # Control path zus-zuf 27 | CONFIG_ZUF_DEF_PATH = /sys/fs/zuf 28 | 29 | # Verbose build output 30 | CONFIG_BUILD_VERBOSE = 0 31 | 32 | # List of filesystems to build 33 | CONFIG_LIBFS_MODULES = foofs toyfs 34 | -------------------------------------------------------------------------------- /fs-loader.c: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: BSD-3-Clause */ 2 | /* 3 | * zus-vfs.c - Abstract FS interface that calls into the um-FS 4 | * 5 | * Copyright (c) 2018 NetApp, Inc. All rights reserved. 6 | * 7 | * See module.c for LICENSE details. 8 | * 9 | * Authors: 10 | * Yigal Korman 11 | */ 12 | 13 | #define _GNU_SOURCE 14 | #include 15 | #include 16 | #include 17 | 18 | #include "zus.h" 19 | #include "zuf_call.h" 20 | 21 | /* ~~~ called by FS code to add an FS-type ~~~ */ 22 | int zus_register_one(int fd, struct zus_fs_info *zfi) 23 | { 24 | int err; 25 | 26 | err = zuf_register_fs(fd, zfi); 27 | if (err) 28 | return err; 29 | 30 | return 0; 31 | } 32 | 33 | /* ~~~ dynamic loading of FS plugins ~~~ */ 34 | static void *g_dl_list[ZUS_LIBFS_MAX_NR] = {}; 35 | 36 | static int _try_load_from(void **handle, const char namefmt[], ...) 37 | { 38 | char libfs_path[ZUS_LIBFS_MAX_PATH]; 39 | void *dl_lib; 40 | va_list args; 41 | int err; 42 | 43 | va_start (args, namefmt); 44 | err = vsnprintf(libfs_path, sizeof(libfs_path), namefmt, args); 45 | va_end (args); 46 | 47 | if (err < 0) { 48 | ERROR("Path reconstruction too long [%s]\n", namefmt); 49 | return -EINVAL; 50 | } 51 | 52 | dl_lib = dlopen(libfs_path, RTLD_NOW); 53 | DBG("dlopen(%s) = %p, dlerror=%s\n", libfs_path, dl_lib, dlerror()); 54 | if (!dl_lib) 55 | return -ENOENT; 56 | 57 | *handle = dl_lib; 58 | return 0; 59 | } 60 | 61 | static int _load_one_fs(int fd, const char *fs_name, void **handle) 62 | { 63 | void *dl_lib; 64 | int (*register_fn)(int fd); 65 | char *dl_err; 66 | int err; 67 | 68 | DBG("p=%s\n", fs_name); 69 | /* try to load production path */ 70 | err = _try_load_from(&dl_lib, "%s/lib%s.so", ZUS_LIBFS_DIR, fs_name); 71 | if (!err) 72 | goto found; 73 | if (err != -ENOENT) 74 | return err; 75 | 76 | /* try to load from current dir or LD_LIBRARY_PATH */ 77 | err = _try_load_from(&dl_lib, "lib%s.so", fs_name); 78 | if (!err) 79 | goto found; 80 | if (err != -ENOENT) 81 | return err; 82 | 83 | /* try to load from full path name */ 84 | err = _try_load_from(&dl_lib, "%s", fs_name); 85 | if (!err) 86 | goto found; 87 | if (err) 88 | return err; 89 | 90 | found: 91 | /* clear existing errors (in the case DBG not compiled) */ 92 | dlerror(); 93 | 94 | register_fn = dlsym(dl_lib, REGISTER_FS_NAME); 95 | dl_err = dlerror(); 96 | if (dl_err) { 97 | ERROR("register_fs retrieval failed => %s\n", dl_err); 98 | dlclose(dl_lib); 99 | return -EBADF; 100 | } 101 | 102 | err = register_fn(fd); 103 | if (err) { 104 | ERROR("%s::register_fs failed => %d\n", fs_name, err); 105 | dlclose(dl_lib); 106 | return err; 107 | } 108 | err = zus_add_module_ddbg(fs_name, dl_lib); 109 | if (err) 110 | ERROR("%s:: dynamic debug load failed=> %d\n", fs_name, err); 111 | 112 | *handle = dl_lib; 113 | return 0; 114 | } 115 | 116 | static int _load_libfs(int fd) 117 | { 118 | const char *libfs_env = getenv(ZUFS_LIBFS_LIST); 119 | char *orig_libfs_str, *libfs_str, *p; 120 | int lib_no = 0; 121 | int err = 0; 122 | 123 | DBG("%s: %s\n", ZUFS_LIBFS_LIST, libfs_env); 124 | if (!libfs_env || !*libfs_env) 125 | return 0; 126 | 127 | libfs_str = orig_libfs_str = strdup(libfs_env); 128 | if (!orig_libfs_str) 129 | return -ENOMEM; 130 | 131 | while ((p = strsep(&libfs_str, ",")) != NULL) { 132 | if (!*p) 133 | continue; 134 | err = _load_one_fs(fd, p, &g_dl_list[lib_no]); 135 | if (unlikely(err)) 136 | break; 137 | ++lib_no; 138 | } 139 | free(orig_libfs_str); 140 | 141 | return err; 142 | } 143 | 144 | static void _unload_libfs(void *handle) 145 | { 146 | int err; 147 | 148 | if (handle) { 149 | err = dlclose(handle); 150 | if (err) 151 | ERROR("dlclose failed => %d\n", err); 152 | } 153 | } 154 | 155 | /* ~~~ called by zus thread ~~~ */ 156 | int zus_register_all(int fd) 157 | { 158 | int err; 159 | 160 | err = _load_libfs(fd); 161 | if (err) { 162 | ERROR("failed to load dynamic libfs modules => %d\n", err); 163 | return err; 164 | } 165 | 166 | return 0; 167 | } 168 | 169 | void zus_unregister_all(void) 170 | { 171 | int i; 172 | 173 | zus_free_ddbg_db(); 174 | for (i = 0; i < ZUS_LIBFS_MAX_NR; ++i) { 175 | if (g_dl_list[i]) 176 | _unload_libfs(g_dl_list[i]); 177 | } 178 | } 179 | -------------------------------------------------------------------------------- /fs/foofs/Makefile: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: BSD-3-Clause 2 | # 3 | # Makefile for foofs 4 | # 5 | # Copyright (C) 2018 NetApp, Inc. All rights reserved. 6 | # 7 | # See module.c for LICENSE details. 8 | # 9 | # Authors: 10 | # Omer Caspi 11 | 12 | FOOFS_DIR := $(dir $(lastword $(MAKEFILE_LIST))) 13 | ZDIR?=$(FOOFS_DIR)../.. 14 | ZM_NAME := foofs 15 | ZM_OBJS := foofs.o 16 | 17 | all: 18 | $(MAKE) M=$(PWD) -C $(ZDIR) module 19 | clean: 20 | $(MAKE) M=$(PWD) -C $(ZDIR) module_clean 21 | -------------------------------------------------------------------------------- /fs/foofs/foofs.c: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: BSD-3-Clause */ 2 | /* 3 | * foofs.c - A do nothing example of an zuFS FS 4 | * 5 | * Copyright (c) 2018 NetApp, Inc. All rights reserved. 6 | * 7 | * See module.c for LICENSE details. 8 | * 9 | * Authors: 10 | * Boaz Harrosh 11 | */ 12 | 13 | #define _GNU_SOURCE 14 | 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | 22 | #include "zus.h" 23 | #include "b-minmax.h" 24 | #include "iom_enc.h" 25 | 26 | // #define FOO_DEF_SBI_MODE (S_IRUGO | S_IXUGO | S_IWUSR) 27 | #define FOOFS_ROOT_NO 1 28 | #define FOOFS_INODES_RATIO 20 29 | #define FOOFS_INO_PER_BLOCK (PAGE_SIZE / ZUFS_INODE_SIZE) 30 | 31 | /* FooFS uses mkfs.m1fs for the device table so 32 | * keep the info sync with most current m1fs. 33 | * NOTE: If you use a single t1 device then u do not need 34 | * any version or device table. FooFS is very destructive 35 | * to a single t1 in that case. (Silently destroy anything there) 36 | */ 37 | enum { 38 | M1FS_MAJOR_VERSION = 17, 39 | M1FS_MINOR_VERSION = 0, 40 | M1FS_SUPER_MAGIC = 0x5346314d /* M1FS in BE */ 41 | }; 42 | 43 | static struct zus_inode *find_zi(struct zus_sb_info *sbi, ulong ino) 44 | { 45 | struct zus_inode *zi_array = md_baddr(&sbi->md, 1); 46 | 47 | return &zi_array[ino]; 48 | } 49 | 50 | static struct zus_inode *find_free_ino(struct zus_sb_info *sbi) 51 | { 52 | struct zus_inode *zi_array = md_baddr(&sbi->md, 1); 53 | ulong max_ino = md_t1_blocks(&sbi->md) / FOOFS_INODES_RATIO * 54 | FOOFS_INO_PER_BLOCK; 55 | ulong i; 56 | 57 | for (i = 1; i < max_ino; ++i) { 58 | if (!zi_array[i].i_mode) { 59 | zi_array[i].i_ino = i; 60 | return &zi_array[i]; 61 | } 62 | } 63 | 64 | return NULL; 65 | } 66 | 67 | static ulong _get_fill(struct zus_sb_info *sbi) 68 | { 69 | struct zus_inode *zi_array = md_baddr(&sbi->md, 1); 70 | ulong max_ino = md_t1_blocks(&sbi->md) / FOOFS_INODES_RATIO * 71 | FOOFS_INO_PER_BLOCK; 72 | ulong used_files = 0; 73 | ulong i; 74 | 75 | for (i = 1; i < max_ino; ++i) { 76 | if (zi_array[i].i_mode) 77 | ++used_files; 78 | } 79 | 80 | return used_files; 81 | } 82 | 83 | enum {MAX_NAME = 16}; 84 | enum {MAX_ENTS = PAGE_SIZE / (MAX_NAME + 8)}; 85 | struct foofs_dir { 86 | struct __foo_dir_ent { 87 | ulong ino; 88 | char name[MAX_NAME]; 89 | } ents[MAX_ENTS]; 90 | }; 91 | 92 | static struct foofs_dir *_foo_dir(struct zus_inode_info *dir_ii) 93 | { 94 | return md_baddr(&dir_ii->sbi->md, dir_ii->zi->i_ino + 1); 95 | } 96 | 97 | static struct __foo_dir_ent *_find_de(struct zus_inode_info *dir_ii, 98 | struct zufs_str *str) 99 | { 100 | struct foofs_dir *dir; 101 | int i; 102 | 103 | dir = _foo_dir(dir_ii); 104 | for (i = 0; i < MAX_ENTS; ++i) 105 | if (0 == strncmp(dir->ents[i].name, str->name, str->len)) 106 | return &dir->ents[i]; 107 | return NULL; /* NOT FOUND */ 108 | } 109 | 110 | static struct __foo_dir_ent *_find_empty_de(struct zus_inode_info *dir_ii) 111 | { 112 | struct foofs_dir *dir; 113 | int i; 114 | 115 | dir = _foo_dir(dir_ii); 116 | for (i = 0; i < MAX_ENTS; ++i) 117 | if (!dir->ents[i].ino) 118 | return &dir->ents[i]; 119 | return NULL; /* ENOSPC */ 120 | } 121 | 122 | static void _init_root(struct zus_sb_info *sbi) 123 | { 124 | struct zus_inode *root; 125 | struct timespec now; 126 | void *root_dir; 127 | 128 | root = find_zi(sbi, FOOFS_ROOT_NO); 129 | 130 | memset(root, 0, sizeof(*root)); 131 | root->i_ino = FOOFS_ROOT_NO; 132 | root->i_nlink = 2; 133 | root->i_mode = S_IFDIR | 0644; 134 | root->i_uid = 0; 135 | root->i_gid = 0; 136 | 137 | clock_gettime(CLOCK_REALTIME, &now); 138 | timespec_to_zt(&root->i_atime, &now); 139 | timespec_to_zt(&root->i_mtime, &now); 140 | timespec_to_zt(&root->i_ctime, &now); 141 | 142 | root->i_size = PAGE_SIZE; 143 | root->i_blocks = 1; 144 | root_dir = md_baddr(&sbi->md, FOOFS_ROOT_NO + 1); 145 | memset(root_dir, 0, PAGE_SIZE); 146 | } 147 | 148 | /* ~~~~~~~~~~~~~~~~ Vectors ~~~~~~~~~~~~~~~~~~~~~*/ 149 | static const struct zus_zii_operations foofs_zii_operations; 150 | static const struct zus_sbi_operations foofs_sbi_operations; 151 | static const struct zus_zfi_operations foofs_zfi_operations; 152 | 153 | /* ~~~~ foofs_sbi_operations ~~~~ */ 154 | static 155 | struct zus_sb_info *foofs_sbi_alloc(struct zus_fs_info *zfi) 156 | { 157 | struct zus_sb_info *sbi = calloc(1, sizeof(struct zus_sb_info)); 158 | 159 | if (!sbi) 160 | return NULL; 161 | 162 | sbi->op = &foofs_sbi_operations; 163 | return sbi; 164 | } 165 | 166 | static void foofs_sbi_free(struct zus_sb_info *sbi) 167 | { 168 | free(sbi); 169 | } 170 | 171 | static 172 | int foofs_sbi_init(struct zus_sb_info *sbi, struct zufs_mount_info *zmi) 173 | { 174 | _init_root(sbi); 175 | sbi->z_root = zus_iget(sbi, FOOFS_ROOT_NO); 176 | if (unlikely(!sbi->z_root)) 177 | return -ENOMEM; 178 | 179 | zmi->s_blocksize_bits = PAGE_SHIFT; 180 | return 0; 181 | } 182 | 183 | static int foofs_sbi_fini(struct zus_sb_info *sbi) 184 | { 185 | // zus_iput(sbi->z_root); was this done already 186 | return 0; 187 | } 188 | 189 | static 190 | struct zus_inode_info *foofs_zii_alloc(struct zus_sb_info *sbi) 191 | { 192 | struct zus_inode_info *zii = calloc(1, sizeof(struct zus_inode_info)); 193 | 194 | if (!zii) 195 | return NULL; 196 | 197 | zii->op = &foofs_zii_operations; 198 | return zii; 199 | } 200 | 201 | static 202 | void foofs_zii_free(struct zus_inode_info *zii) 203 | { 204 | free(zii); 205 | } 206 | 207 | static int foofs_statfs(struct zus_sb_info *sbi, struct zufs_ioc_statfs *ioc) 208 | { 209 | uint num_files; 210 | 211 | num_files = _get_fill(sbi); 212 | 213 | ioc->statfs_out.f_type = M1FS_SUPER_MAGIC; 214 | ioc->statfs_out.f_bsize = PAGE_SIZE; 215 | 216 | ioc->statfs_out.f_blocks = md_t1_blocks(&sbi->md); 217 | ioc->statfs_out.f_bfree = ioc->statfs_out.f_blocks - num_files; 218 | ioc->statfs_out.f_bavail = ioc->statfs_out.f_bfree; 219 | 220 | ioc->statfs_out.f_files = num_files; 221 | ioc->statfs_out.f_ffree = MAX_ENTS - num_files; 222 | 223 | // ioc->statfs_out.f_fsid.val[0] = 0x17; 224 | // ioc->statfs_out.f_fsid.val[1] = 0x17; 225 | 226 | ioc->statfs_out.f_namelen = MAX_NAME; 227 | 228 | ioc->statfs_out.f_frsize = 0; // ??? 229 | ioc->statfs_out.f_flags = 0; // ???? 230 | 231 | memset(ioc->statfs_out.f_spare, 0, sizeof(ioc->statfs_out.f_spare)); 232 | return 0; 233 | } 234 | 235 | static struct zus_inode_info * 236 | foofs_new_inode(struct zus_sb_info *sbi, 237 | void *app_ptr, struct zufs_ioc_new_inode *ioc_new) 238 | { 239 | struct zus_inode *zi = find_free_ino(sbi); 240 | ulong ino; 241 | struct zus_inode_info *zii; 242 | 243 | if (unlikely(!zi)) 244 | return NULL; 245 | 246 | zii = foofs_zii_alloc(sbi); 247 | if (!zii) 248 | return NULL; 249 | 250 | zii->zi = zi; 251 | 252 | ino = zi->i_ino; 253 | *zi = ioc_new->zi; 254 | zi->i_ino = ino; 255 | 256 | if (zi_isdir(zi)) { 257 | void *dir = _foo_dir(ioc_new->dir_ii); 258 | 259 | memset(dir, 0, PAGE_SIZE); 260 | zi->i_size = PAGE_SIZE; 261 | zi->i_blocks = 1; 262 | 263 | zus_std_new_dir(ioc_new->dir_ii->zi, zi); 264 | }/* else zi_issym(zi) { 265 | TODO: long symlink in app_ptr 266 | }*/ 267 | 268 | DBG("[%lld] size=0x%llx, blocks=0x%llx ct=0x%llx mt=0x%llx link=0x%x mode=0x%x\n", 269 | zi->i_ino, zi->i_size, zi->i_blocks, zi->i_ctime, zi->i_mtime, 270 | zi->i_nlink, zi->i_mode); 271 | 272 | return zii; 273 | } 274 | 275 | static void foofs_free_inode(struct zus_inode_info *zii) 276 | { 277 | DBG("\n"); 278 | zii->zi->i_mode = 0; 279 | zii->zi->i_ino = 0; 280 | /* Do we need to clean anything */ 281 | 282 | /* TODO: Have ref-count and free on last */ 283 | foofs_zii_free(zii); 284 | } 285 | 286 | static int foofs_iget(struct zus_sb_info *sbi, ulong ino, 287 | struct zus_inode_info **zii) 288 | { 289 | 290 | struct zus_inode *zi = find_zi(sbi, ino); 291 | 292 | if (!zi) 293 | return -ENOENT; 294 | 295 | *zii = foofs_zii_alloc(sbi); 296 | if (unlikely(!*zii)) 297 | return -ENOMEM; 298 | 299 | (*zii)->zi = zi; 300 | return 0; 301 | } 302 | 303 | static ulong foofs_lookup(struct zus_inode_info *dir_ii, struct zufs_str *str) 304 | { 305 | struct __foo_dir_ent *de; 306 | 307 | DBG("[%.*s]\n", str->len, str->name); 308 | 309 | de = _find_de(dir_ii, str); 310 | if (unlikely(!de)) 311 | return 0; /* NOT FOUND */ 312 | 313 | return de->ino; 314 | } 315 | 316 | static int foofs_add_dentry(struct zus_inode_info *dir_ii, 317 | struct zus_inode_info *zii, struct zufs_str *str) 318 | { 319 | uint nl = min_t(uint, MAX_NAME-1, str->len); 320 | struct __foo_dir_ent *de; 321 | 322 | de = _find_empty_de(dir_ii); 323 | if (unlikely(!de)) { 324 | DBG("[%ld] [%.*s] => -ENOSPC\n", 325 | zi_ino(dir_ii->zi), str->len, str->name); 326 | return -ENOSPC; 327 | } 328 | 329 | memcpy(de->name, str->name, nl); 330 | de->name[nl] = 0; /* C string for prints */ 331 | de->ino = zii->zi->i_ino; 332 | zus_std_add_dentry(dir_ii->zi, zii->zi); 333 | 334 | DBG("[%ld] [%.*s] ino=%ld\n", 335 | zi_ino(dir_ii->zi), str->len, str->name, de->ino); 336 | return 0; 337 | } 338 | 339 | static int foofs_remove_dentry(struct zus_inode_info *dir_ii, 340 | struct zus_inode_info *zii, 341 | struct zufs_str *str) 342 | { 343 | struct __foo_dir_ent *de; 344 | 345 | DBG("[%ld] [%.*s]\n", zi_ino(dir_ii->zi), str->len, str->name); 346 | 347 | de = _find_de(dir_ii, str); 348 | if (unlikely(!de)) 349 | return -ENOENT; 350 | 351 | zus_std_remove_dentry(dir_ii->zi, zii->zi); 352 | de->ino = 0; 353 | de->name[0] = 0; 354 | 355 | return 0; 356 | } 357 | 358 | static int foofs_readdir(void *app_ptr, struct zufs_ioc_readdir *zir) 359 | { 360 | struct zufs_readdir_iter rdi; 361 | struct foofs_dir *dir; 362 | uint start = zir->pos / sizeof(struct __foo_dir_ent); 363 | uint i; 364 | 365 | zufs_readdir_iter_init(&rdi, zir, app_ptr); 366 | 367 | DBG("[0x%ld] pos 0x%lx\n", zi_ino(zir->dir_ii->zi), zir->pos); 368 | 369 | if (zir->pos == 0) { 370 | zufs_zde_emit(&rdi, zir->dir_ii->zi->i_ino, DT_DIR, 0, ".", 2); 371 | zir->pos = 1; 372 | } 373 | if (zir->pos == 1) { 374 | zufs_zde_emit(&rdi, zir->dir_ii->zi->i_ino, DT_DIR, 1, "..", 3); 375 | zir->pos = 2; 376 | } 377 | 378 | dir = _foo_dir(zir->dir_ii); 379 | for (i = start; i < MAX_ENTS; ++i) { 380 | struct __foo_dir_ent *de = &dir->ents[i]; 381 | bool ok; 382 | 383 | zir->pos = i * sizeof(*de); 384 | if (!de->ino) 385 | continue; 386 | 387 | ok = zufs_zde_emit(&rdi, de->ino, 1, zir->pos, 388 | de->name, strlen(de->name) + 1); 389 | if (unlikely(!ok)) { 390 | DBG("long dir\n"); 391 | break; 392 | } 393 | DBG(" [%ld] <%s>\n", de->ino, de->name); 394 | } 395 | 396 | return 0; 397 | } 398 | 399 | /* ~~~~ foofs_zii_operations ~~~~ */ 400 | static void foofs_evict(struct zus_inode_info *zii) 401 | { 402 | } 403 | 404 | static int foofs_read(void *ptr, struct zufs_ioc_IO *op) 405 | { 406 | struct zus_inode_info *zii = op->zus_ii; 407 | ulong *app_ptr = ptr; 408 | ulong *app_end = app_ptr + op->hdr.len / sizeof(ulong); 409 | ulong start = op->filepos / sizeof(ulong); 410 | 411 | // INFO("READ start=0x%lx len=0x%lx offset=0x%x\n", 412 | // start, op->hdr.len / sizeof(ulong), op->hdr.offset); 413 | 414 | if (zii->zi->i_on_disk.a[0]) { 415 | *app_ptr = 0xB00DBAAD; 416 | return 0; 417 | } 418 | 419 | while (app_ptr < app_end) 420 | *app_ptr++ = start++; 421 | 422 | return 0; 423 | } 424 | 425 | static int foofs_write(void *ptr, struct zufs_ioc_IO *op) 426 | { 427 | struct zus_inode_info *zii = op->zus_ii; 428 | ulong *app_ptr = ptr; 429 | ulong *app_end = app_ptr + op->hdr.len / sizeof(ulong); 430 | ulong start = op->filepos / sizeof(ulong); 431 | ulong end_pos = op->filepos + op->hdr.len; 432 | 433 | zii->zi->i_on_disk.a[0] = 0; 434 | 435 | for (; app_ptr < app_end; ++app_ptr, ++start) { 436 | if (*app_ptr != start) { 437 | if (g_DBGMASK & 0x10) 438 | ERROR("*app_ptr(0x%lx) != start(0x%lx) offset=0x%x len=0x%x\n", 439 | *app_ptr, start, op->hdr.offset, op->hdr.len); 440 | } 441 | } 442 | 443 | if (zii->zi->i_size < end_pos) 444 | zii->zi->i_size = end_pos; 445 | 446 | return 0; 447 | } 448 | 449 | static int foofs_get_block(struct zus_inode_info *zii, 450 | struct zufs_ioc_IO *get_block) 451 | { 452 | struct zus_iomap_build iomb = {}; 453 | /* foo-fs stands for foo-l */ 454 | ulong bn = zii->zi->i_ino + 1; 455 | 456 | _zus_iom_init_4_ioc_io(&iomb, NULL, get_block, ZUS_MAX_OP_SIZE); 457 | _zus_iom_start(&iomb, NULL, NULL); 458 | _ziom_enc_t1_bn(&iomb, bn, 0); 459 | get_block->hdr.out_len = _ioc_IO_size(1); 460 | return 0; 461 | } 462 | 463 | static const struct zus_zii_operations foofs_zii_operations = { 464 | .evict = foofs_evict, 465 | .read = foofs_read, 466 | .write = foofs_write, 467 | .get_block = foofs_get_block, 468 | }; 469 | 470 | static const struct zus_sbi_operations foofs_sbi_operations = { 471 | .new_inode = foofs_new_inode, 472 | .free_inode = foofs_free_inode, 473 | 474 | .lookup = foofs_lookup, 475 | .add_dentry = foofs_add_dentry, 476 | .remove_dentry = foofs_remove_dentry, 477 | .iget = foofs_iget, 478 | 479 | // rename =, 480 | .readdir = foofs_readdir, 481 | // clone =, 482 | .statfs = foofs_statfs, 483 | }; 484 | 485 | static const struct zus_zfi_operations foofs_zfi_operations = { 486 | .sbi_alloc = foofs_sbi_alloc, 487 | .sbi_free = foofs_sbi_free, 488 | .sbi_init = foofs_sbi_init, 489 | .sbi_fini = foofs_sbi_fini, 490 | }; 491 | 492 | /* Is not const because it is hanged on a list_head */ 493 | static struct zus_fs_info foo_zfi = { 494 | .rfi.fsname = "foof", 495 | .rfi.FS_magic = M1FS_SUPER_MAGIC, 496 | .rfi.FS_ver_major = M1FS_MAJOR_VERSION, 497 | .rfi.FS_ver_minor = M1FS_MINOR_VERSION, 498 | .rfi.dt_offset = 0, 499 | 500 | .rfi.s_time_gran = 1, 501 | .rfi.def_mode = /*FOO_DEF_SBI_MODE*/0755, 502 | .rfi.s_maxbytes = MAX_LFS_FILESIZE, 503 | 504 | // .rfi.acl_on = 1, 505 | 506 | .op = &foofs_zfi_operations, 507 | .sbi_op = &foofs_sbi_operations, 508 | .user_page_size = 0, 509 | .next_sb_id = 0, 510 | }; 511 | 512 | static 513 | int foofs_register_fs(int fd) 514 | { 515 | return zus_register_one(fd, &foo_zfi); 516 | } 517 | 518 | int REGISTER_FS_FN(int fd) 519 | { 520 | return foofs_register_fs(fd); 521 | } 522 | -------------------------------------------------------------------------------- /fs/toyfs/.gitignore: -------------------------------------------------------------------------------- 1 | mkfs.toyfs 2 | -------------------------------------------------------------------------------- /fs/toyfs/Makefile: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: BSD-3-Clause 2 | # 3 | # Makefile for the toyfs file-system via zufs 4 | # 5 | # Copyright (C) 2018 NetApp, Inc. All rights reserved. 6 | # 7 | # See module.c for LICENSE details. 8 | # 9 | # Authors: 10 | # Shachar Sharon 11 | # 12 | TOYFS_DIR := $(dir $(lastword $(MAKEFILE_LIST))) 13 | ZDIR?=$(TOYFS_DIR)../.. 14 | TOYMKFS := $(TOYFS_DIR)mkfs.toyfs 15 | TOYMKFS_CLEAN := toymkfs_clean 16 | TOYMKFS_OBJS := $(TOYFS_DIR)objs/mkfs.o 17 | TOYMKFS_FLAGS := -I $(ZDIR) -L$(ZDIR) -luuid -lzus 18 | 19 | ZM_NAME := toyfs 20 | ZM_OBJS := common.o super.o inode.o dir.o namei.o symlink.o file.o xattr.o mmap.o 21 | ZM_LIBS := uuid 22 | ZM_PRE_BUILD := mkfs.toyfs 23 | ZM_PRE_CLEAN := mkfs.toyfs_clean 24 | 25 | all: 26 | @$(MAKE) M=$(PWD) -C $(ZDIR) module 27 | 28 | clean: 29 | @$(MAKE) M=$(PWD) -C $(ZDIR) module_clean 30 | 31 | mkfs.toyfs: 32 | $(CC) $(TOYMKFS_FLAGS) -o $@ $(TOYFS_DIR)mkfs.c 33 | 34 | mkfs.toyfs_clean: 35 | rm -vf $(TOYMKFS_OBJS) $(TOYMKFS) 36 | -------------------------------------------------------------------------------- /fs/toyfs/common.c: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: BSD-3-Clause */ 2 | /* 3 | * The toyfs reference file-system implementation via zufs 4 | * 5 | * Copyright (c) 2018 NetApp, Inc. All rights reserved. 6 | * 7 | * See module.c for LICENSE details. 8 | * 9 | * Authors: 10 | * Shachar Sharon 11 | */ 12 | #define _GNU_SOURCE 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include "toyfs.h" 22 | 23 | void toyfs_list_init(struct toyfs_list_head *list) 24 | { 25 | list->next = list; 26 | list->prev = list; 27 | } 28 | 29 | static void _link_elem(struct toyfs_list_head *elem, 30 | struct toyfs_list_head *prev, 31 | struct toyfs_list_head *next) 32 | { 33 | next->prev = elem; 34 | elem->next = next; 35 | elem->prev = prev; 36 | prev->next = elem; 37 | } 38 | 39 | void toyfs_list_add(struct toyfs_list_head *new, 40 | struct toyfs_list_head *head) 41 | { 42 | _link_elem(new, head->prev, head); 43 | } 44 | 45 | void toyfs_list_del(struct toyfs_list_head *elem) 46 | { 47 | elem->next->prev = elem->prev; 48 | elem->prev->next = elem->next; 49 | } 50 | 51 | int toyfs_list_empty(const struct toyfs_list_head *head) 52 | { 53 | return (head->next == head); 54 | } 55 | 56 | void toyfs_list_add_tail(struct toyfs_list_head *elem, 57 | struct toyfs_list_head *head) 58 | { 59 | _link_elem(elem, head->prev, head); 60 | } 61 | 62 | void toyfs_list_add_before(struct toyfs_list_head *elem, 63 | struct toyfs_list_head *head) 64 | { 65 | _link_elem(elem, head->prev, head); 66 | } 67 | 68 | 69 | /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/ 70 | 71 | void toyfs_panicf(const char *file, int line, const char *fmt, ...) 72 | { 73 | va_list ap; 74 | FILE *fp = stderr; 75 | 76 | flockfile(fp); 77 | fputs("toyfs: ", fp); 78 | va_start(ap, fmt); 79 | vfprintf(fp, fmt, ap); 80 | va_end(ap); 81 | fprintf(fp, " (%s:%d)\n", file, line); 82 | funlockfile(fp); 83 | abort(); 84 | } 85 | 86 | void toyfs_mutex_init(pthread_mutex_t *mutex) 87 | { 88 | int err, kind; 89 | pthread_mutexattr_t attr; 90 | 91 | err = pthread_mutexattr_init(&attr); 92 | toyfs_panic_if_err(err, "pthread_mutexattr_init"); 93 | 94 | kind = PTHREAD_MUTEX_RECURSIVE; /* PTHREAD_MUTEX_ERRORCHECK; */ 95 | err = pthread_mutexattr_settype(&attr, kind); 96 | toyfs_panic_if_err(err, "pthread_mutexattr_settype"); 97 | 98 | err = pthread_mutex_init(mutex, &attr); 99 | toyfs_panic_if_err(err, "pthread_mutex_init"); 100 | 101 | err = pthread_mutexattr_destroy(&attr); 102 | toyfs_panic_if_err(err, "pthread_mutexattr_destroy"); 103 | } 104 | 105 | void toyfs_mutex_destroy(pthread_mutex_t *mutex) 106 | { 107 | int err; 108 | 109 | err = pthread_mutex_destroy(mutex); 110 | toyfs_panic_if_err(err, "pthread_mutex_destroy"); 111 | } 112 | 113 | void toyfs_mutex_lock(pthread_mutex_t *mutex) 114 | { 115 | int err; 116 | 117 | err = pthread_mutex_lock(mutex); 118 | toyfs_panic_if_err(err, "pthread_mutex_lock"); 119 | } 120 | 121 | void toyfs_mutex_unlock(pthread_mutex_t *mutex) 122 | { 123 | int err; 124 | 125 | err = pthread_mutex_unlock(mutex); 126 | toyfs_panic_if_err(err, "pthread_mutex_unlock"); 127 | } 128 | 129 | struct toyfs_sb_info *toyfs_zsbi_to_sbi(struct zus_sb_info *zsbi) 130 | { 131 | return container_of(zsbi, struct toyfs_sb_info, s_zus_sbi); 132 | } 133 | 134 | struct toyfs_inode_info *toyfs_zii_to_tii(struct zus_inode_info *zii) 135 | { 136 | struct toyfs_inode_info *tii = NULL; 137 | 138 | if (zii) { 139 | toyfs_assert(zii->op != NULL); 140 | tii = container_of(zii, struct toyfs_inode_info, zii); 141 | toyfs_assert(tii->valid); 142 | } 143 | return tii; 144 | } 145 | 146 | /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/ 147 | 148 | const struct zus_zii_operations toyfs_zii_op = { 149 | .evict = toyfs_evict, 150 | .read = toyfs_read, 151 | .pre_read = toyfs_pre_read, 152 | .write = toyfs_write, 153 | .setattr = toyfs_setattr, 154 | .get_symlink = toyfs_get_symlink, 155 | .sync = toyfs_sync, 156 | .fallocate = toyfs_fallocate, 157 | .seek = toyfs_seek, 158 | .get_put_multy = toyfs_get_put_multy, 159 | .mmap_close = toyfs_mmap_close, 160 | .getxattr = toyfs_getxattr, 161 | .setxattr = toyfs_setxattr, 162 | .listxattr = toyfs_listxattr, 163 | .fiemap = toyfs_fiemap, 164 | }; 165 | 166 | const struct zus_sbi_operations toyfs_sbi_op = { 167 | .new_inode = toyfs_new_inode, 168 | .free_inode = toyfs_evict, 169 | .add_dentry = toyfs_add_dentry, 170 | .remove_dentry = toyfs_remove_dentry, 171 | .lookup = toyfs_lookup, 172 | .iget = toyfs_iget, 173 | .rename = toyfs_rename, 174 | .readdir = toyfs_readdir, 175 | .clone = toyfs_clone, 176 | .statfs = toyfs_statfs, 177 | }; 178 | 179 | static const struct zus_zfi_operations toyfs_zfi_op = { 180 | .sbi_alloc = toyfs_sbi_alloc, 181 | .sbi_free = toyfs_sbi_free, 182 | .sbi_init = toyfs_sbi_init, 183 | .sbi_fini = toyfs_sbi_fini, 184 | }; 185 | 186 | /* Is not const because it is hanged on a list_head */ 187 | static struct zus_fs_info toyfs_zfi = { 188 | .rfi.fsname = "toyfs", 189 | .rfi.FS_magic = TOYFS_SUPER_MAGIC, 190 | .rfi.FS_ver_major = TOYFS_MAJOR_VERSION, 191 | .rfi.FS_ver_minor = TOYFS_MINOR_VERSION, 192 | .rfi.dt_offset = 0, 193 | .rfi.s_time_gran = 1, 194 | .rfi.def_mode = 0755, 195 | .rfi.s_maxbytes = MAX_LFS_FILESIZE, 196 | .op = &toyfs_zfi_op, 197 | .sbi_op = &toyfs_sbi_op, 198 | .user_page_size = 0, 199 | .next_sb_id = 0, 200 | }; 201 | 202 | static 203 | int toyfs_register_fs(int fd) 204 | { 205 | return zus_register_one(fd, &toyfs_zfi); 206 | } 207 | 208 | int REGISTER_FS_FN(int fd) 209 | { 210 | return toyfs_register_fs(fd); 211 | } 212 | 213 | -------------------------------------------------------------------------------- /fs/toyfs/dir.c: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: BSD-3-Clause */ 2 | /* 3 | * The toyfs reference file-system implementation via zufs 4 | * 5 | * Copyright (c) 2018 NetApp, Inc. All rights reserved. 6 | * 7 | * See module.c for LICENSE details. 8 | * 9 | * Authors: 10 | * Shachar Sharon 11 | */ 12 | 13 | #define _GNU_SOURCE 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | 20 | #include "zus.h" 21 | #include "toyfs.h" 22 | 23 | static mode_t _mode_of(const struct toyfs_inode_info *tii) 24 | { 25 | return tii->ti->i_mode; 26 | } 27 | 28 | static size_t _namelen_to_nde(const struct toyfs_dirent *de, size_t nlen) 29 | { 30 | size_t nde = 1; 31 | const size_t base = sizeof(de->d_name); 32 | const size_t desz = sizeof(*de); 33 | 34 | if (nlen > base) 35 | nde += (nlen - base + desz - 1) / desz; 36 | 37 | return nde; 38 | } 39 | 40 | static void _set_dirent(struct toyfs_dirent *dirent, 41 | const char *name, size_t nlen, 42 | const struct toyfs_inode_info *tii, loff_t off) 43 | { 44 | const size_t nde = _namelen_to_nde(NULL, nlen); 45 | 46 | memset(dirent, 0, nde * sizeof(*dirent)); 47 | memcpy(dirent->d_name, name, nlen); 48 | dirent->d_nlen = nlen; 49 | dirent->d_ino = tii->ino; 50 | dirent->d_type = IFTODT(_mode_of(tii)); 51 | dirent->d_off = off; 52 | } 53 | 54 | static bool _is_active(const struct toyfs_dirent *dirent) 55 | { 56 | return (dirent->d_nlen > 0) && (dirent->d_ino != 0); 57 | } 58 | 59 | struct toyfs_list_head *toyfs_childs_list_of(struct toyfs_inode_info *dir_tii) 60 | { 61 | struct toyfs_inode *ti = dir_tii->ti; 62 | 63 | return &ti->list_head; 64 | } 65 | 66 | static int _hasname(const struct toyfs_dirent *dirent, 67 | const struct zufs_str *str) 68 | { 69 | return (dirent->d_nlen == str->len) && 70 | !strncmp(dirent->d_name, str->name, dirent->d_nlen); 71 | } 72 | 73 | static struct toyfs_dentries *_dentries_of(struct toyfs_list_head *head) 74 | { 75 | return container_of(head, struct toyfs_dentries, head); 76 | } 77 | 78 | static struct toyfs_dirent *_next_dirent(struct toyfs_dirent *de) 79 | { 80 | size_t step; 81 | 82 | step = _is_active(de) ? _namelen_to_nde(de, de->d_nlen) : 1; 83 | return de + step; 84 | } 85 | 86 | static size_t _count_free_de(const struct toyfs_dirent *itr, 87 | const struct toyfs_dirent *end) 88 | { 89 | size_t count = 0; 90 | 91 | while (itr < end) { 92 | if (itr->d_nlen) 93 | break; 94 | ++count; 95 | ++itr; 96 | } 97 | return count; 98 | } 99 | 100 | static struct toyfs_dirent * 101 | _search_free(struct toyfs_dentries *dentries, size_t nlen) 102 | { 103 | size_t count, required = _namelen_to_nde(NULL, nlen); 104 | struct toyfs_dirent *itr = &dentries->de[0]; 105 | struct toyfs_dirent *end = itr + ARRAY_SIZE(dentries->de); 106 | 107 | while (itr < end) { 108 | count = _count_free_de(itr, end); 109 | if (count >= required) 110 | return itr; 111 | itr += count ? count : _namelen_to_nde(itr, itr->d_nlen); 112 | } 113 | return NULL; 114 | } 115 | 116 | static struct toyfs_dirent * 117 | _find_dirent(struct toyfs_dentries *dentries, const struct zufs_str *str) 118 | { 119 | struct toyfs_dirent *itr = &dentries->de[0]; 120 | struct toyfs_dirent *end = itr + ARRAY_SIZE(dentries->de); 121 | 122 | while (itr < end) { 123 | if (_hasname(itr, str)) 124 | return itr; 125 | itr = _next_dirent(itr); 126 | } 127 | return NULL; 128 | } 129 | 130 | static void _reset_dirent(struct toyfs_dirent *de) 131 | { 132 | const size_t nde = _namelen_to_nde(de, de->d_nlen); 133 | 134 | toyfs_assert(de->d_nlen > 0); 135 | memset(de, 0, nde * sizeof(*de)); 136 | } 137 | 138 | struct toyfs_dirent *toyfs_lookup_dirent(struct toyfs_inode_info *dir_tii, 139 | const struct zufs_str *str) 140 | { 141 | struct toyfs_dirent *dirent; 142 | struct toyfs_list_head *childs, *itr; 143 | 144 | childs = toyfs_childs_list_of(dir_tii); 145 | itr = childs->next; 146 | while (itr != childs) { 147 | dirent = _find_dirent(_dentries_of(itr), str); 148 | if (dirent != NULL) 149 | return dirent; 150 | itr = itr->next; 151 | } 152 | return NULL; 153 | } 154 | 155 | static struct toyfs_dirent * 156 | _acquire_dirent(struct toyfs_inode_info *dir_tii, size_t nlen) 157 | { 158 | int64_t d_off = 2; 159 | struct toyfs_dirent *dirent; 160 | struct toyfs_list_head *childs, *itr; 161 | struct toyfs_pmemb *pmemb; 162 | struct toyfs_dentries *dentries; 163 | 164 | childs = toyfs_childs_list_of(dir_tii); 165 | itr = childs->next; 166 | while (itr != childs) { 167 | dentries = _dentries_of(itr); 168 | dirent = _search_free(dentries, nlen); 169 | if (dirent != NULL) { 170 | d_off += dirent - dentries->de; 171 | goto out; 172 | } 173 | itr = itr->next; 174 | d_off += (int64_t)ARRAY_SIZE(dentries->de); 175 | } 176 | 177 | pmemb = toyfs_acquire_pmemb(dir_tii->sbi); 178 | if (!pmemb) 179 | return NULL; 180 | 181 | dir_tii->ti->i_blocks += 1; 182 | 183 | dentries = (struct toyfs_dentries *)pmemb; 184 | toyfs_list_add_tail(&dentries->head, childs); 185 | dirent = dentries->de; 186 | 187 | out: 188 | dirent->d_off = d_off; 189 | return dirent; 190 | } 191 | 192 | static void _add_dirent(struct toyfs_inode_info *dir_tii, 193 | struct toyfs_inode_info *tii, struct zufs_str *str, 194 | struct toyfs_dirent *dirent) 195 | { 196 | _set_dirent(dirent, str->name, str->len, tii, dirent->d_off); 197 | /* Can not inc/dec by 1 because readdir will fail (it checks i_size) */ 198 | dir_tii->ti->i_size += PAGE_SIZE; 199 | zus_std_add_dentry(dir_tii->zii.zi, tii->zii.zi); 200 | } 201 | 202 | int toyfs_add_dirent(struct toyfs_inode_info *dir_tii, 203 | struct toyfs_inode_info *tii, struct zufs_str *str, 204 | struct toyfs_dirent **out_dirent) 205 | { 206 | struct toyfs_dirent *dirent; 207 | 208 | dirent = _acquire_dirent(dir_tii, str->len); 209 | if (!dirent) 210 | return -ENOSPC; 211 | 212 | _add_dirent(dir_tii, tii, str, dirent); 213 | *out_dirent = dirent; 214 | return 0; 215 | } 216 | 217 | int toyfs_add_dentry(struct zus_inode_info *dir_zii, 218 | struct zus_inode_info *zii, struct zufs_str *str) 219 | { 220 | struct toyfs_dirent *dirent; 221 | struct toyfs_inode_info *dir_tii = Z2II(dir_zii); 222 | struct toyfs_inode_info *tii = Z2II(zii); 223 | const ino_t dirino = dir_tii->ino; 224 | const ino_t ino = tii->ino; 225 | 226 | DBG("add_dentry: dirino=%lu %.*s ino=%lu mode=%o\n", 227 | dirino, str->len, str->name, ino, _mode_of(tii)); 228 | 229 | return toyfs_add_dirent(dir_tii, tii, str, &dirent); 230 | } 231 | 232 | void toyfs_remove_dirent(struct toyfs_inode_info *dir_tii, 233 | struct toyfs_inode_info *tii, 234 | struct toyfs_dirent *dirent) 235 | { 236 | _reset_dirent(dirent); 237 | dir_tii->ti->i_size -= PAGE_SIZE; 238 | zus_std_remove_dentry(dir_tii->zii.zi, tii->zii.zi); 239 | } 240 | 241 | int toyfs_remove_dentry(struct zus_inode_info *dir_zii, 242 | struct zus_inode_info *zii, struct zufs_str *str) 243 | { 244 | struct zus_inode *zi; 245 | struct toyfs_dirent *dirent; 246 | struct toyfs_inode_info *dir_tii = Z2II(dir_zii); 247 | struct toyfs_inode_info *tii = Z2II(zii); 248 | 249 | DBG("remove_dentry: dirino=%lu %.*s\n", 250 | dir_tii->ino, str->len, str->name); 251 | 252 | dirent = toyfs_lookup_dirent(dir_tii, str); 253 | if (!dirent) 254 | return -ENOENT; 255 | 256 | zi = tii->zii.zi; 257 | if (zi_isdir(zi) && tii->ti->i_size) 258 | return -ENOTEMPTY; 259 | 260 | DBG("remove_dentry: ino=%lu mode=%o\n", dirent->d_ino, zi->i_mode); 261 | 262 | toyfs_remove_dirent(dir_tii, tii, dirent); 263 | 264 | /* 265 | * XXX: Force free_inode by setting i_nlink to 0 266 | * TODO: Maybe in zus? Maybe in zuf? 267 | */ 268 | if (zi_isdir(zi) && (zi->i_nlink == 1) && !tii->ti->i_size) 269 | zi->i_nlink = 0; 270 | 271 | return 0; 272 | } 273 | 274 | 275 | struct toyfs_dir_context; 276 | typedef bool (*toyfs_filldir_t)(struct toyfs_dir_context *, const char *, 277 | size_t, loff_t, ino_t, mode_t); 278 | 279 | struct toyfs_dir_context { 280 | toyfs_filldir_t actor; 281 | loff_t pos; 282 | }; 283 | 284 | struct toyfs_getdents_ctx { 285 | struct toyfs_dir_context dir_ctx; 286 | struct zufs_readdir_iter rdi; 287 | struct toyfs_inode_info *dir_tii; 288 | size_t emit_count; 289 | }; 290 | 291 | static bool _filldir(struct toyfs_dir_context *dir_ctx, const char *name, 292 | size_t len, loff_t pos, ino_t ino, mode_t dt) 293 | { 294 | bool status; 295 | struct toyfs_getdents_ctx *ctx = 296 | container_of(dir_ctx, struct toyfs_getdents_ctx, dir_ctx); 297 | 298 | status = zufs_zde_emit(&ctx->rdi, ino, (uint8_t)dt, 299 | (uint64_t)pos, name, (uint8_t)len); 300 | if (status) 301 | ctx->emit_count++; 302 | DBG("filldir: %.*s ino=%ld dt=%d emit_count=%d status=%d\n", 303 | (int)len, name, ino, dt, (int)ctx->emit_count, (int)status); 304 | return status; 305 | } 306 | 307 | static void _init_getdents_ctx(struct toyfs_getdents_ctx *ctx, 308 | struct toyfs_inode_info *dir_tii, 309 | struct zufs_ioc_readdir *ioc_readdir, 310 | void *app_ptr) 311 | { 312 | zufs_readdir_iter_init(&ctx->rdi, ioc_readdir, app_ptr); 313 | ctx->dir_ctx.actor = _filldir; 314 | ctx->dir_ctx.pos = ioc_readdir->pos; 315 | ctx->dir_tii = dir_tii; 316 | ctx->emit_count = 0; 317 | } 318 | 319 | static bool _emit(struct toyfs_dir_context *ctx, const char *name, 320 | size_t namelen, ino_t ino, mode_t type) 321 | { 322 | return ctx->actor(ctx, name, namelen, ctx->pos, ino, type); 323 | } 324 | 325 | static bool _emit_dirent(struct toyfs_dir_context *ctx, 326 | const struct toyfs_dirent *dirent) 327 | { 328 | bool ok; 329 | 330 | ok = _emit(ctx, dirent->d_name, dirent->d_nlen, 331 | dirent->d_ino, dirent->d_type); 332 | if (ok) 333 | ctx->pos = (dirent->d_off + 1); 334 | return ok; 335 | } 336 | 337 | static bool _iterate_dentries(struct toyfs_dentries *dentries, 338 | struct toyfs_dir_context *ctx) 339 | { 340 | bool ok = true; 341 | struct toyfs_dirent *itr = &dentries->de[0]; 342 | struct toyfs_dirent *end = itr + ARRAY_SIZE(dentries->de); 343 | 344 | while ((itr < end) && ok) { 345 | if (itr->d_nlen && (itr->d_off >= ctx->pos)) 346 | ok = _emit_dirent(ctx, itr); 347 | itr = _next_dirent(itr); 348 | } 349 | return ok; 350 | } 351 | 352 | static bool _iterate_dir(struct toyfs_inode_info *dir_tii, 353 | struct toyfs_dir_context *ctx) 354 | { 355 | bool ok = true; 356 | struct toyfs_list_head *itr, *childs; 357 | struct toyfs_inode *dir_ti = dir_tii->ti; 358 | 359 | if (ctx->pos == 0) { 360 | ok = _emit(ctx, ".", 1, dir_ti->i_ino, DT_DIR); 361 | ctx->pos = 1; 362 | } 363 | if ((ctx->pos == 1) && ok) { 364 | ok = _emit(ctx, "..", 2, dir_ti->i_dir.parent, DT_DIR); 365 | ctx->pos = 2; 366 | } 367 | childs = toyfs_childs_list_of(dir_tii); 368 | itr = childs->next; 369 | while (ok && (itr != childs)) { 370 | ok = _iterate_dentries(_dentries_of(itr), ctx); 371 | if (ok) 372 | itr = itr->next; 373 | } 374 | return (itr != childs); 375 | } 376 | 377 | int toyfs_iterate_dir(struct toyfs_inode_info *dir_tii, 378 | struct zufs_ioc_readdir *zir, void *buf) 379 | { 380 | struct toyfs_getdents_ctx ctx; 381 | 382 | _init_getdents_ctx(&ctx, dir_tii, zir, buf); 383 | zir->more = _iterate_dir(dir_tii, &ctx.dir_ctx); 384 | zir->pos = ctx.dir_ctx.pos; 385 | DBG("iterate_dir: dir-ino=%lu emit_count=%lu more=%d pos=%ld\n", 386 | dir_tii->ino, ctx.emit_count, (int)zir->more, (long)zir->pos); 387 | return 0; 388 | } 389 | 390 | int toyfs_readdir(void *app_ptr, struct zufs_ioc_readdir *zir) 391 | { 392 | return toyfs_iterate_dir(Z2II(zir->dir_ii), zir, app_ptr); 393 | } 394 | 395 | void toyfs_release_dir(struct toyfs_inode_info *dir_tii) 396 | { 397 | struct toyfs_list_head *itr, *next, *childs; 398 | struct toyfs_dentries *dentries; 399 | struct toyfs_pmemb *pmemb; 400 | 401 | childs = toyfs_childs_list_of(dir_tii); 402 | itr = childs->next; 403 | while (itr != childs) { 404 | toyfs_assert(dir_tii->ti->i_blocks > 0); 405 | 406 | dentries = _dentries_of(itr); 407 | next = itr->next; 408 | pmemb = (struct toyfs_pmemb *)dentries; 409 | 410 | toyfs_list_del(itr); 411 | toyfs_release_pmemb(dir_tii->sbi, pmemb); 412 | 413 | dir_tii->ti->i_blocks -= 1; 414 | itr = next; 415 | } 416 | 417 | toyfs_assert(dir_tii->ti->i_blocks == 0); 418 | } 419 | -------------------------------------------------------------------------------- /fs/toyfs/inode.c: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: BSD-3-Clause */ 2 | /* 3 | * The toyfs reference file-system implementation via zufs 4 | * 5 | * Copyright (c) 2018 NetApp, Inc. All rights reserved. 6 | * 7 | * See module.c for LICENSE details. 8 | * 9 | * Authors: 10 | * Shachar Sharon 11 | */ 12 | 13 | #define _GNU_SOURCE 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | 24 | #include "zus.h" 25 | #include "toyfs.h" 26 | 27 | 28 | static ino_t _next_ino(struct toyfs_sb_info *sbi) 29 | { 30 | return __atomic_fetch_add(&sbi->s_top_ino, 1, __ATOMIC_CONSUME); 31 | } 32 | 33 | static bool issupported(const struct zus_inode *zi) 34 | { 35 | const mode_t mode = zi->i_mode; 36 | 37 | return zi_isdir(zi) || zi_isreg(zi) || zi_islnk(zi) || 38 | S_ISCHR(mode) || S_ISBLK(mode) || 39 | S_ISFIFO(mode) || S_ISSOCK(mode); 40 | } 41 | 42 | struct zus_inode_info * 43 | toyfs_new_inode(struct zus_sb_info *zsbi, 44 | void *app_ptr, struct zufs_ioc_new_inode *ioc_new) 45 | { 46 | ino_t ino; 47 | mode_t mode; 48 | size_t symlen; 49 | struct toyfs_inode *ti; 50 | struct toyfs_sb_info *sbi = Z2SBI(zsbi); 51 | struct toyfs_inode_info *tii = NULL; 52 | struct zus_inode *zi = &ioc_new->zi; 53 | struct toyfs_inode_info *dir_tii = Z2II(ioc_new->dir_ii); 54 | struct toyfs_pmemb *pmemb; 55 | bool symlong; 56 | const char *symname = (const char *)app_ptr; 57 | struct zus_inode_info *zii; 58 | 59 | zii = toyfs_zii_alloc(zsbi); 60 | if (!zii) 61 | return NULL; 62 | 63 | tii = Z2II(zii); 64 | mode = zi->i_mode; 65 | DBG("new_inode:sbi=%p tii=%p mode=%o\n", 66 | (void *)sbi, (void *)tii, mode); 67 | 68 | if (!issupported(zi)) 69 | goto out_err; 70 | if (zi->i_size >= PAGE_SIZE) 71 | goto out_err; 72 | 73 | ti = toyfs_acquire_inode(sbi); 74 | if (!ti) 75 | goto out_err; 76 | 77 | ino = _next_ino(tii->sbi); 78 | memset(ti, 0, sizeof(*ti)); 79 | memcpy(ti, zi, sizeof(*ti)); 80 | tii->ti = ti; 81 | tii->ino = ino; 82 | tii->zii.zi = toyfs_ti2zi(tii->ti); 83 | ti->i_ino = ino; 84 | 85 | if (zi_isdir(zi)) { 86 | DBG("new_inode(dir): ino=%lu\n", ino); 87 | toyfs_list_init(toyfs_childs_list_of(tii)); 88 | ti->i_size = 0; 89 | ti->i_dir.parent = dir_tii->ti->i_ino; 90 | zus_std_new_dir(dir_tii->zii.zi, toyfs_ti2zi(ti)); 91 | } else if (zi_isreg(zi)) { 92 | DBG("new_inode(reg): ino=%lu\n", ino); 93 | toyfs_list_init(toyfs_iblkrefs_list_of(tii)); 94 | if (ioc_new->flags & ZI_TMPFILE) 95 | ti->i_nlink = 1; 96 | } else if (zi_islnk(zi)) { 97 | symlen = ti->i_size; 98 | symlong = symlen >= sizeof(ti->i_symlink); 99 | symname = symlong ? (const char *)symname : 100 | (const char *)zi->i_symlink; 101 | DBG("new_inode(symlnk): ino=%lu lnk=%.*s\n", 102 | ino, (int)symlen, symname); 103 | if (symlong) { 104 | pmemb = toyfs_acquire_pmemb(sbi); 105 | if (!pmemb) { 106 | toyfs_release_inode(sbi, ti); 107 | goto out_err; 108 | } 109 | memcpy(pmemb->dat, symname, symlen); 110 | tii->ti->i_sym_dpp = toyfs_page2dpp(sbi, pmemb); 111 | } 112 | } else 113 | DBG("new_inode: ino=%lu mode=%o\n", ino, mode); 114 | 115 | toyfs_lock_inodes(sbi); 116 | toyfs_i_track(tii); 117 | tii->ref++; 118 | toyfs_unlock_inodes(sbi); 119 | 120 | return zii; 121 | 122 | out_err: 123 | if (tii) 124 | toyfs_tii_free(tii); 125 | return NULL; 126 | } 127 | 128 | void toyfs_free_inode(struct toyfs_inode_info *tii) 129 | { 130 | struct toyfs_sb_info *sbi = tii->sbi; 131 | struct toyfs_inode *ti = tii->ti; 132 | struct zus_inode *zi = tii->zii.zi; 133 | 134 | DBG("free_inode: ino=%lu mode=%o nlink=%ld size=%ld\n", 135 | tii->ino, (int)zi->i_mode, 136 | (long)zi->i_nlink, (long)zi->i_size); 137 | 138 | if (zi_isdir(zi)) { 139 | DBG("free_inode(dir): ino=%lu\n", tii->ino); 140 | if (tii->ti->i_size) 141 | return; 142 | toyfs_release_dir(tii); 143 | zi->i_dir.parent = 0; /* TODO: Maybe zus_std helper ? */ 144 | } else if (zi_islnk(zi)) { 145 | DBG("free_inode(symlink): ino=%lu \n", tii->ino); 146 | toyfs_release_symlink(tii); 147 | } else if (zi_isreg(zi)) { 148 | DBG("free_inode(reg): ino=%lu\n", tii->ino); 149 | toyfs_truncate(tii, 0); 150 | } else { 151 | DBG("free_inode: ino=%lu mode=%o\n", tii->ino, zi->i_mode); 152 | zi->i_rdev = 0; 153 | } 154 | toyfs_drop_xattr(tii); 155 | toyfs_release_inode(sbi, ti); 156 | } 157 | 158 | int toyfs_iget(struct zus_sb_info *zsbi, ulong ino, struct zus_inode_info **zii) 159 | { 160 | int err = 0; 161 | struct toyfs_sb_info *sbi = Z2SBI(zsbi); 162 | struct toyfs_inode_info *tii; 163 | struct toyfs_inode_ref *tir; 164 | 165 | DBG("iget: ino=%lu\n", ino); 166 | 167 | tir = toyfs_find_inode_ref_by_ino(sbi, ino); 168 | if (!tir) { 169 | *zii = NULL; 170 | DBG("iget: ino=%lu => -ENOENT\n", ino); 171 | return -ENOENT; 172 | } 173 | 174 | toyfs_lock_inodes(sbi); 175 | tii = tir->tii; 176 | if (!tir->tii) { 177 | tii = toyfs_alloc_ii(sbi); 178 | if (unlikely(!tii)) { 179 | DBG("iget: ino=%lu => ENOMEM\n", ino); 180 | err = -ENOMEM; 181 | goto out; 182 | } 183 | tir->tii = tii; 184 | tii->mapped = true; 185 | } 186 | ++tii->ref; 187 | 188 | *zii = &tii->zii; 189 | DBG("iget: ino=%lu zi=%p\n", ino, (void *)tii->zii.zi); 190 | out: 191 | toyfs_unlock_inodes(sbi); 192 | return err; 193 | } 194 | 195 | void toyfs_evict(struct zus_inode_info *zii) 196 | { 197 | struct toyfs_inode_info *tii = Z2II(zii); 198 | struct toyfs_sb_info *sbi = tii->sbi; 199 | struct toyfs_inode *ti = tii->ti; 200 | 201 | DBG("evict: ino=%lu\n", tii->ino); 202 | 203 | toyfs_lock_inodes(sbi); 204 | if (--tii->ref) 205 | goto out; 206 | 207 | toyfs_sbi_lock(tii->sbi); 208 | if (!ti->i_nlink) { 209 | toyfs_free_inode(tii); 210 | if (tii->mapped) 211 | toyfs_i_untrack(tii, true); 212 | } else { 213 | if (tii->mapped) 214 | toyfs_i_untrack(tii, false); 215 | } 216 | 217 | toyfs_tii_free(tii); 218 | toyfs_sbi_unlock(sbi); 219 | 220 | out: 221 | toyfs_unlock_inodes(sbi); 222 | } 223 | 224 | static int _setattr(struct toyfs_inode_info *tii, uint enable_bits) 225 | { 226 | int err = 0; 227 | struct zus_inode *zi = tii->zii.zi; 228 | 229 | 230 | DBG("setattr: ino=%lu enable_bits=%x \n", tii->ino, enable_bits); 231 | 232 | /* TODO: CL-FLUSH */ 233 | if (enable_bits & STATX_MODE) 234 | DBG("setattr: mode=%o\n", zi->i_mode); 235 | if (enable_bits & STATX_NLINK) 236 | DBG("setattr: nlink=%o\n", zi->i_nlink); 237 | if (enable_bits & (STATX_UID | STATX_GID)) 238 | DBG("setattr: uid=%u gid=%u\n", zi->i_uid, zi->i_gid); 239 | if (enable_bits & (STATX_ATIME | STATX_MTIME | STATX_CTIME)) 240 | DBG("setattr: atime=%lu mtime=%lu ctime=%lu\n", 241 | (uint64_t)zi->i_atime, 242 | (uint64_t)zi->i_mtime, 243 | (uint64_t)zi->i_ctime); 244 | return err; 245 | } 246 | 247 | int toyfs_setattr(struct zus_inode_info *zii, uint enable_bits) 248 | { 249 | return _setattr(Z2II(zii), enable_bits); 250 | } 251 | 252 | -------------------------------------------------------------------------------- /fs/toyfs/mkfs.c: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: BSD-3-Clause */ 2 | /* 3 | * Minimal mkfs utility for the toyfs file-system 4 | * 5 | * Copyright (c) 2018 NetApp, Inc. All rights reserved. 6 | * 7 | * See module.c for LICENSE details. 8 | * 9 | * Authors: 10 | * Shachar Sharon 11 | */ 12 | #define _GNU_SOURCE 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | 29 | #include "zus.h" 30 | #include "toyfs.h" 31 | 32 | static int toyfs_open_blkdev(const char *path, loff_t *sz) 33 | { 34 | int fd, err; 35 | size_t bdev_size = 0, min_size = 1UL << 20; 36 | struct stat st; 37 | 38 | fd = open(path, O_RDWR); 39 | if (fd <= 0) 40 | error(EXIT_FAILURE, -errno, "open failed: %s", path); 41 | 42 | err = fstat(fd, &st); 43 | if (err) 44 | error(EXIT_FAILURE, -errno, "fstat failed: %s", path); 45 | 46 | if (!S_ISBLK(st.st_mode) && !S_ISREG(st.st_mode)) 47 | error(EXIT_FAILURE, -1, "not block or regualr file: %s", path); 48 | 49 | if (S_ISBLK(st.st_mode)) { 50 | err = ioctl(fd, BLKGETSIZE64, &bdev_size); 51 | if (err) 52 | error(EXIT_FAILURE, err, 53 | "ioctl(BLKGETSIZE64) failed: %s", path); 54 | if (bdev_size < min_size) 55 | error(EXIT_FAILURE, 0, 56 | "illegal device size: %s %lu", path, bdev_size); 57 | *sz = (loff_t)bdev_size; 58 | } else { 59 | if (st.st_size < (loff_t)min_size) 60 | error(EXIT_FAILURE, 0, 61 | "illegal size: %s %ld", path, st.st_size); 62 | *sz = st.st_size; 63 | } 64 | printf("open device: %s size=%ld fd=%d\n", path, *sz, fd); 65 | return fd; 66 | } 67 | 68 | static void toyfs_close_blkdev(const char *path, int fd) 69 | { 70 | printf("close device: %s fd=%d\n", path, fd); 71 | close(fd); 72 | } 73 | 74 | static void toyfs_fill_dev_table(struct md_dev_table *dev_table, 75 | loff_t dev_size, const char *uu) 76 | { 77 | int err; 78 | struct timespec now; 79 | uuid_t super_uuid, dev_uuid; 80 | struct md_dev_id *dev_id; 81 | uint64_t align_mask = ZUFS_ALLOC_MASK; 82 | 83 | uuid_generate(super_uuid); 84 | err = uuid_parse(uu, dev_uuid); 85 | if (err) 86 | error(EXIT_FAILURE, 0, "illegal uuid: %s", uu); 87 | 88 | memset(dev_table, 0, sizeof(*dev_table)); 89 | memcpy(&dev_table->s_uuid, super_uuid, sizeof(dev_table->s_uuid)); 90 | dev_table->s_version = (TOYFS_MAJOR_VERSION * ZUFS_MINORS_PER_MAJOR) + 91 | TOYFS_MINOR_VERSION; 92 | dev_table->s_magic = TOYFS_SUPER_MAGIC; 93 | dev_table->s_flags = 0; 94 | dev_table->s_t1_blocks = md_o2p(dev_size & ~align_mask); 95 | dev_table->s_dev_list.id_index = 0; 96 | dev_table->s_dev_list.t1_count = 1; 97 | 98 | dev_id = &dev_table->s_dev_list.dev_ids[0]; 99 | memcpy(&dev_id->uuid, dev_uuid, sizeof(dev_id->uuid)); 100 | dev_id->blocks = dev_table->s_t1_blocks; 101 | printf("device: uuid=%s blocks=%lu\n", uu, (size_t)dev_id->blocks); 102 | 103 | clock_gettime(CLOCK_REALTIME, &now); 104 | timespec_to_zt(&dev_table->s_wtime, &now); 105 | dev_table->s_sum = md_calc_csum(dev_table); 106 | } 107 | 108 | static void 109 | toyfs_write_super_block(int fd, struct toyfs_super_block *super_block) 110 | { 111 | int err; 112 | loff_t off; 113 | 114 | off = lseek(fd, 0, SEEK_SET); 115 | if (off != 0) 116 | error(EXIT_FAILURE, -errno, 117 | "failed to lseek to offset=%ld", off); 118 | 119 | err = write(fd, super_block, sizeof(*super_block)); 120 | if (err != (int)sizeof(*super_block)) 121 | error(EXIT_FAILURE, -errno, "failed to write super block"); 122 | 123 | err = fsync(fd); 124 | if (err) 125 | error(EXIT_FAILURE, -errno, "failed to fsync"); 126 | } 127 | 128 | static void toyfs_fill_root_inode(struct toyfs_inode *rooti) 129 | { 130 | memset(rooti, 0, sizeof(*rooti)); 131 | 132 | rooti->i_ino = TOYFS_ROOT_INO; 133 | rooti->i_nlink = 2; 134 | rooti->i_size = 0; 135 | } 136 | 137 | static void toyfs_write_root_inode(int fd, struct toyfs_inode *rooti) 138 | { 139 | int err; 140 | loff_t off; 141 | 142 | off = lseek(fd, PAGE_SIZE, SEEK_SET); 143 | if (off != PAGE_SIZE) 144 | error(EXIT_FAILURE, -errno, 145 | "failed to lseek to offset=%ld", off); 146 | 147 | err = write(fd, rooti, sizeof(*rooti)); 148 | if (err != (int)sizeof(*rooti)) 149 | error(EXIT_FAILURE, -errno, "failed to write root inode"); 150 | 151 | err = fsync(fd); 152 | if (err) 153 | error(EXIT_FAILURE, -errno, "failed to fsync"); 154 | } 155 | 156 | 157 | static struct toyfs_super_block g_super_block; 158 | static struct toyfs_inode g_root_inode; 159 | 160 | int main(int argc, char *argv[]) 161 | { 162 | int fd; 163 | loff_t dev_size = 0; 164 | struct toyfs_super_block *sb = &g_super_block; 165 | struct toyfs_inode *rooti = &g_root_inode; 166 | 167 | if (argc != 3) 168 | error(EXIT_FAILURE, -1, "usage: mkfs "); 169 | 170 | fd = toyfs_open_blkdev(argv[2], &dev_size); 171 | toyfs_fill_dev_table(&sb->head.dev_table, dev_size, argv[1]); 172 | toyfs_fill_root_inode(rooti); 173 | toyfs_write_super_block(fd, sb); 174 | toyfs_write_root_inode(fd, rooti); 175 | toyfs_close_blkdev(argv[1], fd); 176 | return 0; 177 | } 178 | -------------------------------------------------------------------------------- /fs/toyfs/mmap.c: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: BSD-3-Clause */ 2 | /* 3 | * The toyfs reference file-system implementation via zufs 4 | * 5 | * Copyright (c) 2018 NetApp, Inc. All rights reserved. 6 | * 7 | * See module.c for LICENSE details. 8 | * 9 | * Authors: 10 | * Shachar Sharon 11 | */ 12 | 13 | #define _GNU_SOURCE 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | 24 | #include "zus.h" 25 | #include "iom_enc.h" 26 | #include "toyfs.h" 27 | 28 | #define GB_WRITE 1 29 | 30 | 31 | static uint64_t _resolve_bn(const struct toyfs_inode_info *tii, 32 | struct toyfs_pmemb *pmemb) 33 | { 34 | return pmemb ? toyfs_addr2bn(tii->sbi, (void *)pmemb) : 0; 35 | } 36 | 37 | static int _get_block_rd(struct toyfs_inode_info *tii, loff_t off, 38 | struct zufs_ioc_IO *get_block) 39 | { 40 | struct toyfs_pmemb *pmemb; 41 | struct zus_iomap_build iomb = {}; 42 | 43 | _zus_iom_init_4_ioc_io(&iomb, &tii->sbi->s_zus_sbi, 44 | get_block, ZUS_MAX_OP_SIZE); 45 | pmemb = toyfs_resolve_pmemb(tii, off); 46 | 47 | _zus_iom_start(&iomb, NULL, NULL); 48 | _ziom_enc_t1_bn(&iomb, _resolve_bn(tii, pmemb), 0); 49 | _zus_iom_end(&iomb); 50 | get_block->ret_flags = 0; 51 | get_block->hdr.out_len = _ioc_IO_size(1); 52 | 53 | return 0; 54 | } 55 | 56 | static int _get_block_wr(struct toyfs_inode_info *tii, loff_t off, 57 | struct zufs_ioc_IO *get_block) 58 | { 59 | uint64_t pmem_bn; 60 | struct toyfs_pmemb *pmemb; 61 | struct zus_iomap_build iomb = {}; 62 | 63 | _zus_iom_init_4_ioc_io(&iomb, &tii->sbi->s_zus_sbi, 64 | get_block, ZUS_MAX_OP_SIZE); 65 | 66 | pmemb = toyfs_resolve_pmemb(tii, off); 67 | if (pmemb) { 68 | _zus_iom_start(&iomb, NULL, NULL); 69 | _ziom_enc_t1_bn(&iomb, _resolve_bn(tii, pmemb), 0); 70 | _zus_iom_end(&iomb); 71 | get_block->ret_flags = 0; 72 | get_block->hdr.out_len = _ioc_IO_size(1); 73 | 74 | return 0; 75 | } 76 | pmem_bn = toyfs_require_pmem_bn(tii, off); 77 | if (pmem_bn) { 78 | _zus_iom_start(&iomb, NULL, NULL); 79 | _ziom_enc_t1_bn(&iomb, pmem_bn, 0); 80 | _zus_iom_end(&iomb); 81 | get_block->ret_flags = ZUFS_RET_NEW; 82 | get_block->hdr.out_len = _ioc_IO_size(1); 83 | 84 | return 0; 85 | } 86 | return -ENOSPC; 87 | } 88 | 89 | static int _get_multy(struct zus_inode_info *zii, struct zufs_ioc_IO *io) 90 | { 91 | int err; 92 | const loff_t off = (loff_t)io->filepos; 93 | struct toyfs_inode_info *tii = Z2II(zii); 94 | 95 | if (!zi_isreg(tii->zii.zi)) 96 | return -ENOTSUP; 97 | 98 | if (!(io->rw & ZUFS_RW_MMAP)) 99 | return -ENOTSUP; 100 | 101 | if (io->rw & GB_WRITE) 102 | err = _get_block_wr(tii, off, io); 103 | else 104 | err = _get_block_rd(tii, off, io); 105 | 106 | DBG("get_block: ino=%ld off=%ld err=%d\n", 107 | (long)tii->ino, (long)io->filepos, err); 108 | 109 | return err; 110 | } 111 | 112 | static int _put_multy(struct zus_inode_info *zii, struct zufs_ioc_IO *io) 113 | { 114 | struct toyfs_inode_info *tii = Z2II(zii); 115 | 116 | DBG("put_block: ino=%ld off=%ld\n", 117 | (long)tii->ino, (long)io->filepos); 118 | 119 | if (!(io->rw & ZUFS_RW_MMAP)) 120 | return -ENOTSUP; 121 | 122 | return 0; 123 | } 124 | 125 | int toyfs_get_put_multy(struct zus_inode_info *zii, 126 | struct zufs_ioc_IO *io) 127 | { 128 | if (io->hdr.operation == ZUFS_OP_GET_MULTY) 129 | return _get_multy(zii, io); 130 | 131 | return _put_multy(zii, io); 132 | } 133 | 134 | int toyfs_mmap_close(struct zus_inode_info *zii, 135 | struct zufs_ioc_mmap_close *mmap_close) 136 | { 137 | struct toyfs_inode_info *tii = Z2II(zii); 138 | 139 | DBG("mmap_close: ino=%ld rw=%lx\n", 140 | (long)tii->ino, (long)mmap_close->rw); 141 | return 0; 142 | } 143 | -------------------------------------------------------------------------------- /fs/toyfs/namei.c: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: BSD-3-Clause */ 2 | /* 3 | * The toyfs reference file-system implementation via zufs 4 | * 5 | * Copyright (c) 2018 NetApp, Inc. All rights reserved. 6 | * 7 | * See module.c for LICENSE details. 8 | * 9 | * Authors: 10 | * Shachar Sharon 11 | */ 12 | 13 | #define _GNU_SOURCE 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | 23 | #include "zus.h" 24 | #include "toyfs.h" 25 | 26 | 27 | static ino_t _lookup(struct toyfs_inode_info *dir_tii, struct zufs_str *str) 28 | { 29 | struct toyfs_dirent *dirent; 30 | 31 | DBG("lookup: dirino=%lu %.*s\n", 32 | dir_tii->ino, str->len, str->name); 33 | 34 | dirent = toyfs_lookup_dirent(dir_tii, str); 35 | return dirent ? dirent->d_ino : TOYFS_NULL_INO; 36 | } 37 | 38 | ulong toyfs_lookup(struct zus_inode_info *dir_zii, struct zufs_str *str) 39 | { 40 | return _lookup(Z2II(dir_zii), str); 41 | } 42 | 43 | static int _do_rename(struct toyfs_inode_info *old_dir_ii, 44 | struct toyfs_inode_info *new_dir_ii, 45 | struct toyfs_inode_info *old_ii, 46 | struct toyfs_inode_info *new_ii, 47 | struct zufs_str *old_name, 48 | struct zufs_str *new_name, 49 | uint64_t time, uint flags) 50 | { 51 | int err; 52 | struct toyfs_dirent *old_de, *new_de; 53 | 54 | DBG("rename: olddir_ino=%lu newdir_ino=%lu " 55 | "old_name=%.*s new_name=%.*s time=%lu\n", 56 | old_dir_ii->ino, new_dir_ii->ino, 57 | old_name->len, old_name->name, 58 | new_name->len, new_name->name, time); 59 | 60 | if (!old_ii) 61 | return -EINVAL; 62 | 63 | if (flags) /* RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT */ 64 | return -ENOTSUP; 65 | 66 | old_de = toyfs_lookup_dirent(old_dir_ii, old_name); 67 | if (unlikely(!old_de)) 68 | return -ENOENT; 69 | 70 | new_de = toyfs_lookup_dirent(new_dir_ii, new_name); 71 | if (!new_de) { 72 | err = toyfs_add_dirent(new_dir_ii, old_ii, new_name, &new_de); 73 | if (err) 74 | return err; 75 | } 76 | toyfs_remove_dirent(old_dir_ii, old_ii, old_de); 77 | 78 | if (S_ISDIR(old_ii->ti->i_mode)) 79 | old_ii->ti->i_nlink += 1; 80 | 81 | old_dir_ii->ti->i_mtime = time; 82 | old_dir_ii->ti->i_ctime = time; 83 | new_dir_ii->ti->i_mtime = time; 84 | new_dir_ii->ti->i_ctime = time; 85 | old_ii->ti->i_ctime = time; 86 | 87 | return 0; 88 | } 89 | 90 | int toyfs_rename(struct zufs_ioc_rename *zir) 91 | { 92 | int err; 93 | struct toyfs_inode_info *old_dir_ii = Z2II(zir->old_dir_ii); 94 | struct toyfs_inode_info *new_dir_ii = Z2II(zir->new_dir_ii); 95 | struct toyfs_inode_info *old_ii = Z2II(zir->old_zus_ii); 96 | struct toyfs_inode_info *new_ii = Z2II(zir->new_zus_ii); 97 | struct zufs_str *old_name = &zir->old_d_str; 98 | struct zufs_str *new_name = &zir->new_d_str; 99 | 100 | if (!old_ii) 101 | return -EINVAL; 102 | 103 | err = _do_rename(old_dir_ii, new_dir_ii, 104 | old_ii, new_ii, old_name, new_name, 105 | zir->time, zir->flags); 106 | return err; 107 | } 108 | -------------------------------------------------------------------------------- /fs/toyfs/symlink.c: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: BSD-3-Clause */ 2 | /* 3 | * The toyfs reference file-system implementation via zufs 4 | * 5 | * Copyright (c) 2018 NetApp, Inc. All rights reserved. 6 | * 7 | * See module.c for LICENSE details. 8 | * 9 | * Authors: 10 | * Shachar Sharon 11 | */ 12 | 13 | #define _GNU_SOURCE 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | 21 | #include "zus.h" 22 | #include "toyfs.h" 23 | 24 | 25 | static int _get_symlink(struct toyfs_inode_info *tii, void **symlink) 26 | { 27 | struct toyfs_inode *ti = tii->ti; 28 | struct toyfs_pmemb *pmemb; 29 | 30 | DBG("get_symlink: ino=%lu\n", tii->ino); 31 | 32 | if (!zi_islnk(toyfs_ti2zi(ti))) 33 | return -EINVAL; 34 | 35 | if (ti->i_size < sizeof(ti->i_symlink)) { 36 | *symlink = ti->i_symlink; 37 | return 0; 38 | } 39 | 40 | pmemb = toyfs_dpp2pmemb(tii->sbi, ti->i_sym_dpp); 41 | *symlink = pmemb; 42 | 43 | return 0; 44 | } 45 | 46 | int toyfs_get_symlink(struct zus_inode_info *zii, void **symlink) 47 | { 48 | return _get_symlink(Z2II(zii), symlink); 49 | } 50 | 51 | void toyfs_release_symlink(struct toyfs_inode_info *tii) 52 | { 53 | struct toyfs_inode *ti = tii->ti; 54 | const size_t symlen = ti->i_size; 55 | struct toyfs_pmemb *pmemb; 56 | 57 | if (symlen > sizeof(ti->i_symlink)) { 58 | pmemb = toyfs_dpp2pmemb(tii->sbi, ti->i_sym_dpp); 59 | toyfs_release_pmemb(tii->sbi, pmemb); 60 | } 61 | ti->i_size = 0; 62 | ti->i_sym_dpp = 0; 63 | } 64 | -------------------------------------------------------------------------------- /fs/toyfs/toyfs.h: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: BSD-3-Clause */ 2 | /* 3 | * toyfs.h - The toyfs reference file-system implementation via zufs 4 | * 5 | * Copyright (c) 2018 NetApp, Inc. All rights reserved. 6 | * 7 | * See module.c for LICENSE details. 8 | * 9 | * Authors: 10 | * Shachar Sharon 11 | */ 12 | #ifndef TOYFS_H_ 13 | #define TOYFS_H_ 14 | 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | 26 | #include "zus.h" 27 | 28 | #ifndef ARRAY_SIZE 29 | #define ARRAY_SIZE(x_) (sizeof(x_) / sizeof(x_[0])) 30 | #endif 31 | #ifndef MAKESTR 32 | #define MAKESTR(x_) #x_ 33 | #endif 34 | #ifndef STR 35 | #define STR(x_) MAKESTR(x_) 36 | #endif 37 | #ifndef container_of 38 | #define container_of(ptr, type, member) \ 39 | (type *)((void *)((char *)ptr - offsetof(type, member))) 40 | #endif 41 | 42 | #define TOYFS_STATICASSERT(expr) _Static_assert(expr, #expr) 43 | #define TOYFS_STATICASSERT_EQ(a, b) TOYFS_STATICASSERT(a == b) 44 | #define TOYFS_BUILD_BUG_ON(expr) TOYFS_STATICASSERT(expr) 45 | 46 | #define toyfs_panic(fmt, ...) \ 47 | toyfs_panicf(__FILE__, __LINE__, fmt, __VA_ARGS__) 48 | #define toyfs_panic_if_err(err, msg) \ 49 | do { if (err) toyfs_panic("%s: %d", msg, err); } while (0) 50 | #define toyfs_assert(cond) \ 51 | do { if (!(cond)) toyfs_panic("assert failed: %s", #cond); } while (0) 52 | 53 | 54 | #define TOYFS_NULL_INO (0) 55 | #define TOYFS_ROOT_INO (1) 56 | #define TOYFS_MAJOR_VERSION (14) 57 | #define TOYFS_MINOR_VERSION (1) 58 | #define TOYFS_SUPER_MAGIC (0x5346314d) 59 | 60 | #define Z2SBI(zsbi) toyfs_zsbi_to_sbi(zsbi) 61 | #define Z2II(zii) toyfs_zii_to_tii(zii) 62 | 63 | struct toyfs_list_head { 64 | struct toyfs_list_head *next; 65 | struct toyfs_list_head *prev; 66 | }; 67 | 68 | void toyfs_list_init(struct toyfs_list_head *list); 69 | 70 | void toyfs_list_add(struct toyfs_list_head *elem, struct toyfs_list_head *head); 71 | 72 | void toyfs_list_del(struct toyfs_list_head *entry); 73 | 74 | int toyfs_list_empty(const struct toyfs_list_head *head); 75 | 76 | void toyfs_list_add_tail(struct toyfs_list_head *elem, 77 | struct toyfs_list_head *head); 78 | 79 | void toyfs_list_add_before(struct toyfs_list_head *elem, 80 | struct toyfs_list_head *head); 81 | 82 | 83 | /* "raw" 4k pmem page/block */ 84 | struct toyfs_pmemb { 85 | uint8_t dat[PAGE_SIZE]; 86 | }; 87 | 88 | struct toyfs_pool { 89 | pthread_mutex_t mutex; 90 | union toyfs_pool_pmemb *pages; 91 | struct toyfs_list_head free_dblkrefs; 92 | struct toyfs_list_head free_iblkrefs; 93 | struct toyfs_list_head free_inodes; 94 | void *mem; 95 | size_t msz; 96 | }; 97 | 98 | struct toyfs_inode_ref { 99 | struct toyfs_inode_ref *next; 100 | struct toyfs_inode_info *tii; 101 | struct toyfs_inode *ti; 102 | ino_t ino; 103 | }; 104 | 105 | struct toyfs_itable { 106 | pthread_mutex_t mutex; 107 | size_t icount; 108 | struct toyfs_inode_ref *imap[33377]; /* TODO: Variable size */ 109 | }; 110 | 111 | union toyfs_super_block_head { 112 | struct md_dev_table dev_table; 113 | uint8_t reserved[MDT_SIZE]; 114 | }; 115 | 116 | 117 | struct toyfs_super_block { 118 | union toyfs_super_block_head head; 119 | }; 120 | 121 | struct toyfs_sb_info { 122 | struct zus_sb_info s_zus_sbi; 123 | struct statvfs s_statvfs; 124 | pthread_mutex_t s_mutex; 125 | pthread_mutex_t s_inodes_lock; 126 | struct toyfs_pool s_pool; 127 | struct toyfs_itable s_itable; 128 | struct toyfs_inode_info *s_root; 129 | ino_t s_top_ino; 130 | }; 131 | 132 | struct toyfs_inode { 133 | uint16_t i_flags; 134 | uint16_t i_mode; 135 | uint32_t i_nlink; 136 | uint64_t i_size; 137 | struct toyfs_list_head list_head; 138 | uint64_t i_blocks; 139 | uint64_t i_mtime; 140 | uint64_t i_ctime; 141 | uint64_t i_atime; 142 | uint64_t i_ino; 143 | uint32_t i_uid; 144 | uint32_t i_gid; 145 | uint64_t i_xattr; 146 | uint64_t i_generation; 147 | union { 148 | uint32_t i_rdev; 149 | uint8_t i_symlink[32]; 150 | uint64_t i_sym_dpp; 151 | struct _t_dir { 152 | uint64_t reserved; 153 | uint64_t parent; 154 | } i_dir; 155 | }; 156 | }; 157 | 158 | struct toyfs_inode_info { 159 | struct zus_inode_info zii; 160 | struct toyfs_sb_info *sbi; 161 | struct toyfs_inode *ti; 162 | ino_t ino; 163 | unsigned long imagic; 164 | int ref; 165 | bool mapped; 166 | bool valid; 167 | }; 168 | 169 | struct toyfs_dirent { 170 | int64_t d_off; 171 | uint64_t d_ino; 172 | uint8_t d_type; 173 | uint8_t d_nlen; 174 | char d_name[14]; 175 | }; 176 | 177 | struct toyfs_dentries { 178 | struct toyfs_list_head head; 179 | uint8_t reserved[16]; 180 | struct toyfs_dirent de[127]; 181 | }; 182 | 183 | struct toyfs_dblkref { 184 | struct toyfs_list_head head; 185 | size_t refcnt; 186 | size_t bn; 187 | }; 188 | 189 | struct toyfs_iblkref { 190 | struct toyfs_list_head head; 191 | struct toyfs_dblkref *dblkref; 192 | loff_t off; 193 | }; 194 | 195 | struct toyfs_xattr_entry { 196 | uint16_t value_size; 197 | uint8_t name_len; 198 | uint8_t data[1]; 199 | }; 200 | 201 | struct toyfs_xattr { 202 | struct toyfs_xattr_entry xe[1024]; 203 | }; 204 | 205 | /* super.c */ 206 | void toyfs_check_types(void); 207 | struct zus_inode *toyfs_ti2zi(struct toyfs_inode *ti); 208 | void toyfs_sbi_lock(struct toyfs_sb_info *sbi); 209 | void toyfs_sbi_unlock(struct toyfs_sb_info *sbi); 210 | void toyfs_lock_inodes(struct toyfs_sb_info *sbi); 211 | void toyfs_unlock_inodes(struct toyfs_sb_info *sbi); 212 | int toyfs_sbi_init(struct zus_sb_info *zsbi, struct zufs_mount_info *zim); 213 | int toyfs_sbi_fini(struct zus_sb_info *zsbi); 214 | struct zus_sb_info *toyfs_sbi_alloc(struct zus_fs_info *zfi); 215 | void toyfs_sbi_free(struct zus_sb_info *zsbi); 216 | size_t toyfs_addr2bn(struct toyfs_sb_info *sbi, void *ptr); 217 | void *toyfs_bn2addr(struct toyfs_sb_info *sbi, size_t bn); 218 | struct toyfs_pmemb *toyfs_bn2pmemb(struct toyfs_sb_info *sbi, size_t bn); 219 | zu_dpp_t toyfs_page2dpp(struct toyfs_sb_info *sbi, struct toyfs_pmemb *); 220 | struct toyfs_pmemb *toyfs_dpp2pmemb(struct toyfs_sb_info *sbi, zu_dpp_t dpp); 221 | struct toyfs_inode *toyfs_acquire_inode(struct toyfs_sb_info *sbi); 222 | void toyfs_release_inode(struct toyfs_sb_info *sbi, struct toyfs_inode *inode); 223 | void toyfs_i_track(struct toyfs_inode_info *tii); 224 | void toyfs_i_untrack(struct toyfs_inode_info *tii, bool); 225 | struct toyfs_inode_ref * 226 | toyfs_find_inode_ref_by_ino(struct toyfs_sb_info *sbi, ino_t ino); 227 | struct toyfs_dirent *toyfs_acquire_dirent(struct toyfs_sb_info *sbi); 228 | struct toyfs_pmemb *toyfs_acquire_pmemb(struct toyfs_sb_info *sbi); 229 | int toyfs_statfs(struct zus_sb_info *zsbi, struct zufs_ioc_statfs *ioc_statfs); 230 | int toyfs_sync(struct zus_inode_info *zii, struct zufs_ioc_sync *); 231 | struct toyfs_inode_info *toyfs_alloc_ii(struct toyfs_sb_info *sbi); 232 | struct zus_inode_info *toyfs_zii_alloc(struct zus_sb_info *zsbi); 233 | void toyfs_tii_free(struct toyfs_inode_info *zii); 234 | void toyfs_release_pmemb(struct toyfs_sb_info *sbi, struct toyfs_pmemb *); 235 | struct toyfs_dblkref *toyfs_acquire_dblkref(struct toyfs_sb_info *sbi); 236 | void toyfs_release_dblkref(struct toyfs_sb_info *sbi, 237 | struct toyfs_dblkref *dblkref); 238 | struct toyfs_iblkref *toyfs_acquire_iblkref(struct toyfs_sb_info *sbi); 239 | void toyfs_release_iblkref(struct toyfs_sb_info *sbi, 240 | struct toyfs_iblkref *iblkref); 241 | 242 | /* inode.c */ 243 | void toyfs_evict(struct zus_inode_info *zii); 244 | struct zus_inode_info * 245 | toyfs_new_inode(struct zus_sb_info *zsbi, 246 | void *app_ptr, struct zufs_ioc_new_inode *ioc_new); 247 | void toyfs_free_inode(struct toyfs_inode_info *zii); 248 | int toyfs_iget(struct zus_sb_info *zsbi, ulong ino, 249 | struct zus_inode_info **zii); 250 | int toyfs_setattr(struct zus_inode_info *zii, uint enable_bits); 251 | 252 | /* dir.c */ 253 | int toyfs_add_dirent(struct toyfs_inode_info *dir_tii, 254 | struct toyfs_inode_info *tii, struct zufs_str *str, 255 | struct toyfs_dirent **out_dirent); 256 | void toyfs_remove_dirent(struct toyfs_inode_info *dir_tii, 257 | struct toyfs_inode_info *tii, 258 | struct toyfs_dirent *dirent); 259 | int toyfs_add_dentry(struct zus_inode_info *dir_zii, 260 | struct zus_inode_info *zii, struct zufs_str *str); 261 | int toyfs_remove_dentry(struct zus_inode_info *dir_zii, 262 | struct zus_inode_info *zii, struct zufs_str *str); 263 | int toyfs_readdir(void *app_ptr, struct zufs_ioc_readdir *zir); 264 | int toyfs_iterate_dir(struct toyfs_inode_info *dir_tii, 265 | struct zufs_ioc_readdir *zir, void *buf); 266 | void toyfs_release_dir(struct toyfs_inode_info *dir_tii); 267 | 268 | struct toyfs_dirent * 269 | toyfs_lookup_dirent(struct toyfs_inode_info *dir_ii, const struct zufs_str *); 270 | struct toyfs_list_head *toyfs_childs_list_of(struct toyfs_inode_info *dir_tii); 271 | 272 | /* file.c */ 273 | int toyfs_read(void *buf, struct zufs_ioc_IO *ioc_io); 274 | int toyfs_pre_read(void *buf, struct zufs_ioc_IO *ioc_io); 275 | int toyfs_write(void *buf, struct zufs_ioc_IO *ioc_io); 276 | int toyfs_fallocate(struct zus_inode_info *zii, struct zufs_ioc_IO *); 277 | int toyfs_seek(struct zus_inode_info *zii, struct zufs_ioc_seek *zis); 278 | int toyfs_truncate(struct toyfs_inode_info *tii, size_t size); 279 | int toyfs_clone(struct zufs_ioc_clone *ioc_clone); 280 | int toyfs_fiemap(void *app_ptr, struct zufs_ioc_fiemap *zif); 281 | struct toyfs_list_head *toyfs_iblkrefs_list_of(struct toyfs_inode_info *tii); 282 | struct toyfs_pmemb *toyfs_resolve_pmemb(struct toyfs_inode_info *tii, 283 | loff_t off); 284 | uint64_t toyfs_require_pmem_bn(struct toyfs_inode_info *tii, loff_t off); 285 | 286 | /* symlink.c */ 287 | void toyfs_release_symlink(struct toyfs_inode_info *tii); 288 | int toyfs_get_symlink(struct zus_inode_info *zii, void **symlink); 289 | 290 | /* namei.c */ 291 | ulong toyfs_lookup(struct zus_inode_info *dir_ii, struct zufs_str *str); 292 | int toyfs_rename(struct zufs_ioc_rename *zir); 293 | 294 | /* xattr.c */ 295 | int toyfs_getxattr(struct zus_inode_info *zii, struct zufs_ioc_xattr *); 296 | int toyfs_setxattr(struct zus_inode_info *zii, struct zufs_ioc_xattr *); 297 | int toyfs_listxattr(struct zus_inode_info *zii, struct zufs_ioc_xattr *); 298 | void toyfs_drop_xattr(struct toyfs_inode_info *tii); 299 | 300 | /* mmap.c */ 301 | int toyfs_get_put_multy(struct zus_inode_info *zii, struct zufs_ioc_IO *io); 302 | int toyfs_mmap_close(struct zus_inode_info *zii, 303 | struct zufs_ioc_mmap_close *mmap_close); 304 | 305 | /* common.c */ 306 | void toyfs_panicf(const char *file, int line, const char *fmt, ...); 307 | void toyfs_mutex_init(pthread_mutex_t *mutex); 308 | void toyfs_mutex_destroy(pthread_mutex_t *mutex); 309 | void toyfs_mutex_lock(pthread_mutex_t *mutex); 310 | void toyfs_mutex_unlock(pthread_mutex_t *mutex); 311 | struct toyfs_sb_info *toyfs_zsbi_to_sbi(struct zus_sb_info *zsbi); 312 | struct toyfs_inode_info *toyfs_zii_to_tii(struct zus_inode_info *zii); 313 | extern const struct zus_sbi_operations toyfs_sbi_op; 314 | extern const struct zus_zii_operations toyfs_zii_op; 315 | 316 | #endif /* TOYFS_H_*/ 317 | -------------------------------------------------------------------------------- /fs/toyfs/xattr.c: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: BSD-3-Clause */ 2 | /* 3 | * The toyfs reference file-system implementation via zufs 4 | * 5 | * Copyright (c) 2018 NetApp, Inc. All rights reserved. 6 | * 7 | * See module.c for LICENSE details. 8 | * 9 | * Authors: 10 | * Shachar Sharon 11 | */ 12 | 13 | #define _GNU_SOURCE 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | 25 | #include "zus.h" 26 | #include "toyfs.h" 27 | 28 | static bool _has_xattr(const struct toyfs_inode_info *tii) 29 | { 30 | const struct toyfs_inode *ti = tii->ti; 31 | 32 | return (ti->i_xattr != 0); 33 | } 34 | 35 | static int _require_xattr(struct toyfs_inode_info *tii) 36 | { 37 | struct toyfs_pmemb *pmemb; 38 | struct toyfs_inode *ti = tii->ti; 39 | 40 | if (_has_xattr(tii)) 41 | return 0; 42 | 43 | pmemb = toyfs_acquire_pmemb(tii->sbi); 44 | if (unlikely(!pmemb)) 45 | return -ENOSPC; 46 | 47 | ti->i_xattr = toyfs_addr2bn(tii->sbi, pmemb); 48 | return 0; 49 | } 50 | 51 | static struct toyfs_xattr *_xattr_of(const struct toyfs_inode_info *tii) 52 | { 53 | const struct toyfs_inode *ti = tii->ti; 54 | 55 | return toyfs_bn2addr(tii->sbi, ti->i_xattr); 56 | } 57 | 58 | static bool _has_data(const struct toyfs_xattr_entry *xe) 59 | { 60 | return (xe->name_len > 0); 61 | } 62 | 63 | static bool _has_name(const struct toyfs_xattr_entry *xe, 64 | const char *name, size_t name_len) 65 | { 66 | return (xe->name_len == name_len) && 67 | !memcmp(xe->data, name, name_len); 68 | } 69 | 70 | static struct toyfs_xattr_entry *_next_of(struct toyfs_xattr_entry *xe) 71 | { 72 | void *next, *base = (void *)xe; 73 | 74 | next = (uint8_t *)base + xe->name_len + xe->value_size; 75 | return (struct toyfs_xattr_entry *)next; 76 | } 77 | 78 | static ssize_t _copy_value_to_buf(const struct toyfs_xattr_entry *xe, 79 | void *buffer, size_t size) 80 | { 81 | const uint8_t *value = xe->data + xe->name_len; 82 | 83 | if (xe->value_size > size) 84 | return -ERANGE; 85 | 86 | memcpy(buffer, value, xe->value_size); 87 | return (ssize_t)xe->value_size; 88 | } 89 | 90 | static struct toyfs_xattr_entry * 91 | _find_xe(const struct toyfs_inode_info *tii, const char *name, size_t name_len) 92 | { 93 | struct toyfs_xattr *xattr = _xattr_of(tii); 94 | struct toyfs_xattr_entry *xe = xattr->xe; 95 | struct toyfs_xattr_entry *xe_end = 96 | xattr->xe + ARRAY_SIZE(xattr->xe); 97 | 98 | while ((xe < xe_end) && _has_data(xe)) { 99 | if (_has_name(xe, name, name_len)) 100 | return xe; 101 | xe = _next_of(xe); 102 | } 103 | return NULL; 104 | } 105 | 106 | static ssize_t _do_getxattr(const struct toyfs_inode_info *tii, 107 | const char *name, size_t name_len, 108 | void *buffer, size_t size) 109 | { 110 | ssize_t ret = -ENODATA; 111 | struct toyfs_xattr_entry *xe = _find_xe(tii, name, name_len); 112 | 113 | if (xe) 114 | ret = _copy_value_to_buf(xe, buffer, size); 115 | 116 | return ret; 117 | } 118 | 119 | int toyfs_getxattr(struct zus_inode_info *zii, 120 | struct zufs_ioc_xattr *ioc_xattr) 121 | { 122 | ssize_t size; 123 | const char *name = ioc_xattr->buf; 124 | struct toyfs_inode_info *tii = Z2II(zii); 125 | 126 | if (!_has_xattr(tii)) 127 | return -ENODATA; 128 | 129 | size = _do_getxattr(tii, name, strlen(name), ioc_xattr->buf, 130 | ioc_xattr->user_buf_size); 131 | if (unlikely(size < 0)) 132 | return size; 133 | 134 | if (ioc_xattr->user_buf_size) 135 | ioc_xattr->hdr.out_len += size; 136 | ioc_xattr->user_buf_size = size; 137 | return 0; 138 | } 139 | 140 | static size_t _nbytes_distance(struct toyfs_xattr_entry *beg, 141 | struct toyfs_xattr_entry *end) 142 | { 143 | return (size_t)((uint8_t *)end - (uint8_t *)beg); 144 | } 145 | 146 | static int _discard_xattr(const struct toyfs_inode_info *tii, 147 | struct toyfs_xattr_entry *xe) 148 | { 149 | size_t cnt; 150 | struct toyfs_xattr *xattr = _xattr_of(tii); 151 | struct toyfs_xattr_entry *xe_next = _next_of(xe); 152 | struct toyfs_xattr_entry *xe_end = 153 | xattr->xe + ARRAY_SIZE(xattr->xe); 154 | 155 | cnt = _nbytes_distance(xe_next, xe_end); 156 | memcpy(xe, xe_next, cnt); 157 | return 0; 158 | } 159 | 160 | static int _do_removexattr(const struct toyfs_inode_info *tii, 161 | const char *name, size_t name_len) 162 | { 163 | ssize_t ret = -ENODATA; 164 | struct toyfs_xattr_entry *xe = _find_xe(tii, name, name_len); 165 | 166 | if (xe) 167 | ret = _discard_xattr(tii, xe); 168 | return ret; 169 | } 170 | 171 | static int _append_xattr(const struct toyfs_inode_info *tii, 172 | const char *name, size_t name_len, 173 | const void *value, size_t size) 174 | { 175 | size_t cnt; 176 | struct toyfs_xattr *xattr = _xattr_of(tii); 177 | struct toyfs_xattr_entry *xe = xattr->xe; 178 | struct toyfs_xattr_entry *xe_end = 179 | xattr->xe + ARRAY_SIZE(xattr->xe); 180 | 181 | while ((xe < xe_end) && _has_data(xe)) 182 | xe = _next_of(xe); 183 | 184 | cnt = _nbytes_distance(xe, xe_end); 185 | if (cnt < (name_len + size)) 186 | return -ENOSPC; 187 | 188 | memcpy(xe->data, name, name_len); 189 | memcpy(xe->data + name_len, value, size); 190 | xe->name_len = name_len; 191 | xe->value_size = size; 192 | return 0; 193 | } 194 | 195 | static int _do_setxattr(const struct toyfs_inode_info *tii, 196 | const char *name, size_t name_len, 197 | const void *value, size_t size, unsigned int flags) 198 | { 199 | struct toyfs_xattr_entry *xe = _find_xe(tii, name, name_len); 200 | 201 | if ((flags & XATTR_CREATE) && xe) 202 | return -EEXIST; 203 | if ((flags & XATTR_REPLACE) && !xe) 204 | return -ENODATA; 205 | 206 | /* Naive impl */ 207 | _do_removexattr(tii, name, name_len); 208 | return _append_xattr(tii, name, name_len, value, size); 209 | } 210 | 211 | int toyfs_setxattr(struct zus_inode_info *zii, 212 | struct zufs_ioc_xattr *ioc_xattr) 213 | { 214 | int err; 215 | const void *value = NULL; 216 | const char *name = ioc_xattr->buf; 217 | struct toyfs_inode_info *tii = Z2II(zii); 218 | 219 | err = _require_xattr(tii); 220 | if (unlikely(err)) 221 | return err; 222 | 223 | if (ioc_xattr->user_buf_size || 224 | (ioc_xattr->ioc_flags & ZUFS_XATTR_SET_EMPTY)) 225 | value = ioc_xattr->buf + ioc_xattr->name_len; 226 | 227 | if (!value) 228 | return _do_removexattr(tii, name, strlen(name)); 229 | 230 | return _do_setxattr(tii, name, strlen(name), 231 | value, ioc_xattr->user_buf_size, ioc_xattr->flags); 232 | } 233 | 234 | static void _copy_name_to_buf(const struct toyfs_xattr_entry *xe, 235 | char **buf, size_t *size) 236 | { 237 | memcpy(*buf, xe->data, xe->name_len); 238 | (*buf)[xe->name_len] = '\0'; 239 | *size -= (size_t)xe->name_len + 1; 240 | } 241 | 242 | static ssize_t _do_listxattr(const struct toyfs_inode_info *tii, 243 | char *buf, size_t size) 244 | { 245 | ssize_t ret = 0; 246 | struct toyfs_xattr *xattr = _xattr_of(tii); 247 | struct toyfs_xattr_entry *xe = xattr->xe; 248 | struct toyfs_xattr_entry *xe_end = 249 | xattr->xe + ARRAY_SIZE(xattr->xe); 250 | 251 | while ((xe < xe_end) && _has_data(xe)) { 252 | if (size) { 253 | if (size <= xe->name_len) 254 | return -ERANGE; 255 | _copy_name_to_buf(xe, &buf, &size); 256 | } 257 | ret += xe->name_len + 1; 258 | xe = _next_of(xe); 259 | } 260 | return ret; 261 | } 262 | 263 | int toyfs_listxattr(struct zus_inode_info *zii, 264 | struct zufs_ioc_xattr *ioc_xattr) 265 | { 266 | ssize_t size; 267 | struct toyfs_inode_info *tii = Z2II(zii); 268 | 269 | if (!_has_xattr(tii)) 270 | return -ENODATA; 271 | 272 | size = _do_listxattr(tii, ioc_xattr->buf, ioc_xattr->user_buf_size); 273 | 274 | if (unlikely(size < 0)) 275 | return size; 276 | 277 | if (ioc_xattr->user_buf_size) 278 | ioc_xattr->hdr.out_len += size; 279 | ioc_xattr->user_buf_size = size; 280 | return 0; 281 | } 282 | 283 | void toyfs_drop_xattr(struct toyfs_inode_info *tii) 284 | { 285 | struct toyfs_pmemb *pmemb; 286 | struct toyfs_inode *ti = tii->ti; 287 | 288 | if (_has_xattr(tii)) { 289 | pmemb = toyfs_bn2pmemb(tii->sbi, ti->i_xattr); 290 | toyfs_release_pmemb(tii->sbi, pmemb); 291 | ti->i_xattr = 0; 292 | } 293 | } 294 | -------------------------------------------------------------------------------- /iom_enc.h: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: BSD-3-Clause */ 2 | /* 3 | * iom_enc.h - Encodes an iom_elemets array to send to Kernel 4 | * 5 | * Encoding is only done in user-mode. And decoding only in Kernel. 6 | * This is the encoding side. Common stuff come from zus_api.h 7 | * 8 | * Copyright (c) 2018 NetApp, Inc. All rights reserved. 9 | * 10 | * See module.c for LICENSE details. 11 | * 12 | * Authors: 13 | * Boaz Harrosh 14 | */ 15 | #ifndef __ZUS_IOM_H__ 16 | #define __ZUS_IOM_H__ 17 | 18 | #include 19 | 20 | #include "zus.h" 21 | 22 | 23 | typedef void (*iomd_done_fn)(struct zus_iomap_done *iomd, int err); 24 | struct zus_iomap_done { 25 | iomd_done_fn done; 26 | }; 27 | 28 | struct zus_iomap_build; 29 | typedef void (*iomb_submit_fn)(struct zus_iomap_build *iomb, bool sync); 30 | struct zus_iomap_build { 31 | iomb_submit_fn submit; 32 | struct zus_iomap_done *iomd; 33 | struct zus_sb_info *sbi; /* needed if for ioc_exec */ 34 | int fd; 35 | int err; 36 | 37 | void *cur_iom_e; 38 | void *end_iom_e; 39 | 40 | struct zufs_iomap *ziom; 41 | union { 42 | struct zufs_ioc_iomap_exec *ioc_exec; 43 | struct zufs_ioc_IO *ioc_io; 44 | }; 45 | }; 46 | 47 | void _zus_iom_ioc_exec_submit(struct zus_iomap_build *iomb, bool sync); 48 | 49 | static inline ulong _zus_iom_len(struct zus_iomap_build *iomb) 50 | { 51 | return (__u64 *)iomb->cur_iom_e - iomb->ziom->iom_e; 52 | } 53 | 54 | static inline bool _zus_iom_empty(struct zus_iomap_build *iomb) 55 | { 56 | return !_zus_iom_len(iomb); 57 | } 58 | 59 | static inline void _zus_iom_enc_type_val(__u64 *ptr, enum ZUFS_IOM_TYPE type, 60 | ulong val) 61 | { 62 | ZUS_WARN_ON(val & ~ZUFS_IOM_FIRST_VAL_MASK); 63 | _zufs_iom_enc_type_val(ptr, type, val); 64 | } 65 | 66 | /* iomb comes ZEROed! */ 67 | static inline void _zus_iom_common_init(struct zus_iomap_build *iomb, 68 | struct zus_sb_info *sbi, 69 | struct zufs_iomap *ziom, void *end_ptr) 70 | { 71 | memset(ziom, 0, sizeof(*ziom)); 72 | ziom->iom_max = (end_ptr - (void *)ziom->iom_e) / sizeof(__u64); 73 | 74 | iomb->sbi = sbi; 75 | iomb->ziom = ziom; 76 | iomb->end_iom_e = end_ptr; 77 | } 78 | 79 | static inline void _zus_iom_init_4_ioc_exec(struct zus_iomap_build *iomb, 80 | struct zus_sb_info *sbi, int fd, 81 | struct zufs_ioc_iomap_exec *ioc_exec, 82 | uint max_bytes) 83 | { 84 | _zus_iom_common_init(iomb, sbi, &ioc_exec->ziom, 85 | (void *)ioc_exec + max_bytes); 86 | iomb->fd = fd; 87 | iomb->submit = _zus_iom_ioc_exec_submit; 88 | iomb->ioc_exec = ioc_exec; 89 | } 90 | 91 | static inline void _zus_iom_init_4_ioc_io(struct zus_iomap_build *iomb, 92 | struct zus_sb_info *sbi, 93 | struct zufs_ioc_IO *ioc_io, 94 | uint max_bytes) 95 | { 96 | _zus_iom_common_init(iomb, sbi, &ioc_io->ziom, 97 | (void *)ioc_io + max_bytes); 98 | iomb->ioc_io = ioc_io; 99 | } 100 | 101 | static inline void _zus_iom_start(struct zus_iomap_build *iomb, 102 | struct zus_iomap_done *iomd) 103 | { 104 | iomb->cur_iom_e = iomb->ziom->iom_e; 105 | iomb->ziom->iom_n = 0; 106 | iomb->ziom->iom_e[0] = 0; 107 | iomb->iomd = iomd; 108 | } 109 | static inline void _zus_iom_end(struct zus_iomap_build *iomb) 110 | { 111 | /* NULL terminated list */ 112 | if (iomb->cur_iom_e < iomb->end_iom_e) 113 | _zus_iom_enc_type_val(iomb->cur_iom_e, 0, 0); 114 | 115 | if (iomb->ziom) 116 | iomb->ziom->iom_n = _zus_iom_len(iomb); 117 | } 118 | 119 | static inline int _zus_iom_enc_wbinv(struct zus_iomap_build *iomb) 120 | { 121 | __u64 *iom_e = iomb->cur_iom_e; 122 | 123 | if (unlikely(iomb->end_iom_e < (void*)(iom_e + 1))) 124 | return -ENOSPC; 125 | 126 | iomb->cur_iom_e = iom_e + 1; 127 | _zus_iom_enc_type_val(iom_e, IOM_WBINV, 0); 128 | return 0; 129 | } 130 | 131 | static inline int _zus_iom_enc_discard(struct zus_iomap_build *iomb, 132 | __u64 t2_bn, __u64 num_pages) 133 | { 134 | struct zufs_iom_t2_io_len *iom_io_range = iomb->cur_iom_e; 135 | void *next_iom_e = iom_io_range + 1; 136 | 137 | if (unlikely(iomb->end_iom_e < next_iom_e)) 138 | return -ENOSPC; 139 | 140 | _zus_iom_enc_type_val((__u64*)iom_io_range, IOM_DISCARD, t2_bn); 141 | iom_io_range->num_pages = num_pages; 142 | 143 | iomb->cur_iom_e = next_iom_e; 144 | 145 | return 0; 146 | } 147 | 148 | static inline int _zus_iom_enc_unmap(struct zus_iomap_build *iomb, ulong index, 149 | ulong n, ulong ino) 150 | { 151 | struct zufs_iom_unmap *iom_unmap = iomb->cur_iom_e; 152 | void *next_iom_e = iom_unmap + 1; 153 | 154 | if (unlikely(iomb->end_iom_e < next_iom_e)) 155 | return -ENOSPC; 156 | 157 | _zus_iom_enc_type_val(&iom_unmap->unmap_index, IOM_UNMAP, index); 158 | iom_unmap->unmap_n = n; 159 | iom_unmap->ino = ino; 160 | iomb->cur_iom_e = next_iom_e; 161 | return 0; 162 | } 163 | 164 | static inline int _zus_iom_enc_t2_io(struct zus_iomap_build *iomb, ulong t2_bn, 165 | zu_dpp_t t1_val, enum ZUFS_IOM_TYPE type) 166 | { 167 | struct zufs_iom_t2_io *iom_io = iomb->cur_iom_e; 168 | void *next_iom_e = iom_io + 1; 169 | 170 | if (unlikely(iomb->end_iom_e < next_iom_e)) 171 | return -ENOSPC; 172 | 173 | _zus_iom_enc_type_val(&iom_io->t2_val, type, t2_bn); 174 | iom_io->t1_val = t1_val; 175 | 176 | iomb->cur_iom_e = next_iom_e; 177 | return 0; 178 | } 179 | 180 | static inline int _zus_iom_enc_t2_write(struct zus_iomap_build *iomb, ulong t2_bn, 181 | zu_dpp_t t1_val) 182 | { 183 | return _zus_iom_enc_t2_io(iomb, t2_bn, t1_val, IOM_T2_WRITE); 184 | } 185 | 186 | static inline int _zus_iom_enc_t2_read(struct zus_iomap_build *iomb, ulong t2_bn, 187 | zu_dpp_t t1_val) 188 | { 189 | return _zus_iom_enc_t2_io(iomb, t2_bn, t1_val, IOM_T2_READ); 190 | } 191 | 192 | static inline int _zus_iom_enc_t2_zusmem_io(struct zus_iomap_build *iomb, 193 | ulong t2_bn, void *ptr, ulong len, 194 | enum ZUFS_IOM_TYPE type) 195 | { 196 | struct zufs_iom_t2_zusmem_io *iom_io = iomb->cur_iom_e; 197 | void *next_iom_e = iom_io + 1; 198 | 199 | if (unlikely(iomb->end_iom_e < next_iom_e)) 200 | return -ENOSPC; 201 | 202 | _zus_iom_enc_type_val(&iom_io->t2_val, type, t2_bn); 203 | iom_io->zus_mem_ptr = (__u64)ptr; 204 | iom_io->len = len; 205 | 206 | iomb->cur_iom_e = next_iom_e; 207 | return 0; 208 | } 209 | 210 | static inline int _zus_iom_enc_t2_zusmem_write(struct zus_iomap_build *iomb, 211 | ulong t2_bn, void *ptr, 212 | ulong len) 213 | { 214 | return _zus_iom_enc_t2_zusmem_io(iomb, t2_bn, ptr, len, 215 | IOM_T2_ZUSMEM_WRITE); 216 | } 217 | 218 | static inline int _zus_iom_enc_t2_zusmem_read(struct zus_iomap_build *iomb, 219 | ulong t2_bn, void *ptr, 220 | ulong len) 221 | { 222 | return _zus_iom_enc_t2_zusmem_io(iomb, t2_bn, ptr, len, 223 | IOM_T2_ZUSMEM_READ); 224 | } 225 | 226 | static inline bool _ziom_enc_t1_bn(struct zus_iomap_build *iomb, ulong bn, 227 | uint pool) 228 | { 229 | __u64 *iom_e = iomb->cur_iom_e; 230 | 231 | iomb->cur_iom_e = iom_e + 1; 232 | _zufs_iom_enc_bn(iom_e, bn, pool); 233 | ++iomb->ziom->iom_n; 234 | 235 | /* Special no need to call _zus_iom_end, Returns true if more space */ 236 | return iomb->cur_iom_e < iomb->end_iom_e; 237 | } 238 | #endif /* define __ZUS_IOM_H__ */ 239 | -------------------------------------------------------------------------------- /main.c: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: BSD-3-Clause */ 2 | /* 3 | * main.c - A CUI for the ZUS daemon 4 | * 5 | * Copyright (c) 2018 NetApp, Inc. All rights reserved. 6 | * 7 | * See module.c for LICENSE details. 8 | * 9 | * Authors: 10 | * Boaz Harrosh 11 | */ 12 | 13 | #define _GNU_SOURCE 14 | 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | 21 | #include "zus.h" 22 | #include "zusd.h" 23 | 24 | static void usage(int argc, char *argv[]) 25 | { 26 | static char msg[] = { 27 | "usage: zus [options] [FILE_PATH]\n" 28 | " --policyRR=[PRIORITY]\n" 29 | " Set threads policy to SCHED_RR.\n" 30 | " Optional PRIORITY is between 1-99. Default is 20\n" 31 | " Only one of --policyRR --policyFIFO or --nice should be\n" 32 | " specified, last one catches\n" 33 | " --policyFIFO=[PRIORITY]\n" 34 | " Set threads policy to SCHED_FIFO.(The default)\n" 35 | " Optional PRIORITY is between 1-99. Default is 20\n" 36 | " Only one of --policyRR --policyFIFO or --nice should be\n" 37 | " specified, last one catches\n" 38 | " --policyFIFO=20 is the default\n" 39 | " --nice=[NICE_VAL]\n" 40 | " Set threads policy to SCHED_OTHER.\n" 41 | " And sets the nice value to NICE_VAL. Default NICE_VAL is 0\n" 42 | " Only one of --policyRR --policyFIFO or --nice should be\n" 43 | " specified, last one catches\n" 44 | " --mlock=[VAL]\n" 45 | " 0 - do not call mlockall.\n" 46 | " 1 - use MCL_CURRENT flag for mlockall.\n" 47 | " 2 - use (MCL_CURRENT | MCL_FUTURE) falgs for mlockall.\n" 48 | " other VAL is same as 0.\n" 49 | "\n" 50 | " FILE_PATH is the path to a mounted zuf-root directory\n" 51 | "\n" 52 | }; 53 | FILE *fp = stderr; 54 | int i; 55 | 56 | fprintf(fp, "%s", msg); 57 | fprintf(fp, "got: %s ", argv[0]); 58 | for (i = 1; i < argc; ++i) 59 | fprintf(fp, "%s ", argv[i]); 60 | fputs("\n", fp); 61 | } 62 | 63 | int main(int argc, char *argv[]) 64 | { 65 | struct option opt[] = { 66 | {.name = "policyRR", .has_arg = 2, .flag = NULL, .val = 'r'}, 67 | {.name = "policyFIFO", .has_arg = 2, .flag = NULL, .val = 'f'}, 68 | {.name = "nice", .has_arg = 2, .flag = NULL, .val = 'n'}, 69 | {.name = "verbose", .has_arg = 2, .flag = NULL, .val = 'd'}, 70 | {.name = "mlock", .has_arg = 2, .flag = NULL, .val = 'l'}, 71 | {.name = "mcheck", .has_arg = 0, .flag = NULL, .val = 'm'}, 72 | {.name = "pa_size", .has_arg = 2, .flag = NULL, .val = 'p'}, 73 | {.name = 0, .has_arg = 0, .flag = 0, .val = 0}, 74 | }; 75 | const char *shortopt = "r::f::n::d::l::p::m"; 76 | char op; 77 | struct zus_thread_params tp; 78 | const char *path = NULL; 79 | int err, flags = 0; 80 | ssize_t pa_size = 0; 81 | 82 | ZTP_INIT(&tp); 83 | while ((op = getopt_long(argc, argv, shortopt, opt, NULL)) != -1) { 84 | switch (op) { 85 | case 'r': 86 | tp.policy = SCHED_RR; 87 | if (optarg) 88 | tp.rr_priority = atoi(optarg); 89 | break; 90 | case 'f': 91 | tp.policy = SCHED_FIFO; 92 | if (optarg) 93 | tp.rr_priority = atoi(optarg); 94 | break; 95 | case 'n': 96 | tp.policy = SCHED_OTHER; 97 | if (optarg) 98 | tp.rr_priority = atoi(optarg); 99 | break; 100 | case 'd': 101 | if (optarg) 102 | g_DBGMASK = strtol(optarg, NULL, 0); 103 | else 104 | g_DBGMASK = 0x1; 105 | break; 106 | case 'l': 107 | if (optarg) 108 | g_mlock = atoi(optarg); 109 | break; 110 | case 'm': 111 | mallopt(M_CHECK_ACTION, 3); 112 | break; 113 | case 'p': 114 | if (optarg) 115 | pa_size = atol(optarg); 116 | break; 117 | default: 118 | /* Just ignore we are not the police */ 119 | break; 120 | } 121 | } 122 | 123 | argc -= optind; 124 | argv += optind; 125 | 126 | if ((argc < 0) || (argc > 1)) { 127 | usage(argc + optind, argv - optind); 128 | return 1; 129 | } else if (argc == 1) { 130 | path = argv[0]; 131 | } 132 | 133 | switch (g_mlock) { 134 | case MLOCK_ALL: { 135 | flags = MCL_CURRENT | MCL_FUTURE; 136 | break; 137 | } 138 | case MLOCK_CURRENT: { 139 | flags = MCL_CURRENT; 140 | break; 141 | } 142 | case MLOCK_NONE: 143 | default: { 144 | INFO("--mlock=0 is set, potential pagefault deadlock!\n"); 145 | break; 146 | } 147 | } 148 | 149 | if (flags) { 150 | err = mlockall(flags); 151 | if (unlikely(err)) 152 | return err; 153 | } 154 | 155 | zus_register_sigactions(); 156 | 157 | err = zus_increase_max_files(); 158 | if (unlikely(err)) 159 | return err; 160 | 161 | err = zus_setup_pa_size(pa_size); 162 | if (unlikely(err)) 163 | return err; 164 | 165 | err = zus_slab_init(); 166 | if (unlikely(err)) 167 | return err; 168 | 169 | err = zus_mount_thread_start(&tp, path); 170 | if (unlikely(err)) 171 | goto stop; 172 | 173 | DBG("waiting for sigint ...\n"); 174 | zus_join(); 175 | 176 | stop: 177 | zus_mount_thread_stop(); 178 | return err; 179 | } 180 | -------------------------------------------------------------------------------- /md_zus.c: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: BSD-3-Clause */ 2 | /* 3 | * md.c - The user-mode imp of what we need from md.h 4 | * 5 | * Copyright (c) 2018 NetApp, Inc. All rights reserved. 6 | * 7 | * See module.c for LICENSE details. 8 | * 9 | * Authors: 10 | * Boaz Harrosh 11 | */ 12 | 13 | #include 14 | #include 15 | 16 | #include "zus.h" 17 | #include "movnt.h" 18 | #include "md.h" 19 | #include "iom_enc.h" 20 | #include "zuf_call.h" 21 | 22 | /* 23 | * python pycrc.py --model crc-16 --algorithm table-driven --generate c 24 | */ 25 | static const uint16_t crc_table[256] = { 26 | 0x0000, 0xc0c1, 0xc181, 0x0140, 0xc301, 0x03c0, 0x0280, 0xc241, 27 | 0xc601, 0x06c0, 0x0780, 0xc741, 0x0500, 0xc5c1, 0xc481, 0x0440, 28 | 0xcc01, 0x0cc0, 0x0d80, 0xcd41, 0x0f00, 0xcfc1, 0xce81, 0x0e40, 29 | 0x0a00, 0xcac1, 0xcb81, 0x0b40, 0xc901, 0x09c0, 0x0880, 0xc841, 30 | 0xd801, 0x18c0, 0x1980, 0xd941, 0x1b00, 0xdbc1, 0xda81, 0x1a40, 31 | 0x1e00, 0xdec1, 0xdf81, 0x1f40, 0xdd01, 0x1dc0, 0x1c80, 0xdc41, 32 | 0x1400, 0xd4c1, 0xd581, 0x1540, 0xd701, 0x17c0, 0x1680, 0xd641, 33 | 0xd201, 0x12c0, 0x1380, 0xd341, 0x1100, 0xd1c1, 0xd081, 0x1040, 34 | 0xf001, 0x30c0, 0x3180, 0xf141, 0x3300, 0xf3c1, 0xf281, 0x3240, 35 | 0x3600, 0xf6c1, 0xf781, 0x3740, 0xf501, 0x35c0, 0x3480, 0xf441, 36 | 0x3c00, 0xfcc1, 0xfd81, 0x3d40, 0xff01, 0x3fc0, 0x3e80, 0xfe41, 37 | 0xfa01, 0x3ac0, 0x3b80, 0xfb41, 0x3900, 0xf9c1, 0xf881, 0x3840, 38 | 0x2800, 0xe8c1, 0xe981, 0x2940, 0xeb01, 0x2bc0, 0x2a80, 0xea41, 39 | 0xee01, 0x2ec0, 0x2f80, 0xef41, 0x2d00, 0xedc1, 0xec81, 0x2c40, 40 | 0xe401, 0x24c0, 0x2580, 0xe541, 0x2700, 0xe7c1, 0xe681, 0x2640, 41 | 0x2200, 0xe2c1, 0xe381, 0x2340, 0xe101, 0x21c0, 0x2080, 0xe041, 42 | 0xa001, 0x60c0, 0x6180, 0xa141, 0x6300, 0xa3c1, 0xa281, 0x6240, 43 | 0x6600, 0xa6c1, 0xa781, 0x6740, 0xa501, 0x65c0, 0x6480, 0xa441, 44 | 0x6c00, 0xacc1, 0xad81, 0x6d40, 0xaf01, 0x6fc0, 0x6e80, 0xae41, 45 | 0xaa01, 0x6ac0, 0x6b80, 0xab41, 0x6900, 0xa9c1, 0xa881, 0x6840, 46 | 0x7800, 0xb8c1, 0xb981, 0x7940, 0xbb01, 0x7bc0, 0x7a80, 0xba41, 47 | 0xbe01, 0x7ec0, 0x7f80, 0xbf41, 0x7d00, 0xbdc1, 0xbc81, 0x7c40, 48 | 0xb401, 0x74c0, 0x7580, 0xb541, 0x7700, 0xb7c1, 0xb681, 0x7640, 49 | 0x7200, 0xb2c1, 0xb381, 0x7340, 0xb101, 0x71c0, 0x7080, 0xb041, 50 | 0x5000, 0x90c1, 0x9181, 0x5140, 0x9301, 0x53c0, 0x5280, 0x9241, 51 | 0x9601, 0x56c0, 0x5780, 0x9741, 0x5500, 0x95c1, 0x9481, 0x5440, 52 | 0x9c01, 0x5cc0, 0x5d80, 0x9d41, 0x5f00, 0x9fc1, 0x9e81, 0x5e40, 53 | 0x5a00, 0x9ac1, 0x9b81, 0x5b40, 0x9901, 0x59c0, 0x5880, 0x9841, 54 | 0x8801, 0x48c0, 0x4980, 0x8941, 0x4b00, 0x8bc1, 0x8a81, 0x4a40, 55 | 0x4e00, 0x8ec1, 0x8f81, 0x4f40, 0x8d01, 0x4dc0, 0x4c80, 0x8c41, 56 | 0x4400, 0x84c1, 0x8581, 0x4540, 0x8701, 0x47c0, 0x4680, 0x8641, 57 | 0x8201, 0x42c0, 0x4380, 0x8341, 0x4100, 0x81c1, 0x8081, 0x4040 58 | }; 59 | 60 | 61 | static uint16_t crc16(uint16_t crc, const void *data, size_t data_len) 62 | { 63 | const unsigned char *d = (const unsigned char *)data; 64 | unsigned int tbl_idx; 65 | 66 | while (data_len--) { 67 | tbl_idx = (crc ^ *d) & 0xff; 68 | crc = (crc_table[tbl_idx] ^ (crc >> 8)) & 0xffff; 69 | d++; 70 | } 71 | return crc & 0xffff; 72 | } 73 | 74 | static ulong _gcd(ulong _x, ulong _y) 75 | { 76 | ulong tmp; 77 | 78 | if (_x < _y) { 79 | ulong c = _x; 80 | 81 | _x = _y; 82 | _y = c; 83 | } 84 | 85 | if (!_y) 86 | return _x; 87 | 88 | while ((tmp = _x % _y) != 0) { 89 | _x = _y; 90 | _y = tmp; 91 | } 92 | return _y; 93 | } 94 | 95 | short md_calc_csum(struct md_dev_table *mdt) 96 | { 97 | uint n = MDT_STATIC_SIZE(mdt) - sizeof(mdt->s_sum); 98 | /* FIXME: We should skip s_version so we can change it after 99 | * mount, once we start using the new structures 100 | * So below should be &mdt->s_version => &mdt->s_magic 101 | * PXS-240. 102 | */ 103 | return crc16(~0, (__u8 *)&mdt->s_version, n); 104 | return 0; 105 | } 106 | 107 | static void _init_dev_info(struct md_dev_info *mdi, struct md_dev_id *id, 108 | int index, __u64 offset, void *pmem_addr) 109 | { 110 | mdi->offset = offset; 111 | mdi->index = index; 112 | mdi->size = md_p2o(__dev_id_blocks(id)); 113 | mdi->nid = __dev_id_nid(id); 114 | 115 | if (pmem_addr) { /* We are t1*/ 116 | mdi->t1i.virt_addr = pmem_addr + offset; 117 | } 118 | 119 | DBG("[%d] mdi(offset=0x%lx, size=0x%lx, nid=%d) @%p\n", 120 | mdi->index, mdi->offset, mdi->size, mdi->nid, 121 | pmem_addr ? pmem_addr + offset : 0); 122 | } 123 | 124 | static int _map_setup(struct multi_devices *md, ulong blocks, int dev_start, 125 | struct md_dev_larray *larray) 126 | { 127 | ulong map_size, bn_end; 128 | uint i, dev_index = dev_start; 129 | 130 | map_size = blocks / larray->bn_gcd; 131 | larray->map = calloc(map_size, sizeof(*larray->map)); 132 | if (!larray->map) { 133 | md_dbg_err("failed to allocate dev map\n"); 134 | return -ENOMEM; 135 | } 136 | 137 | bn_end = md_o2p(md->devs[dev_index].size); 138 | for (i = 0; i < map_size; ++i) { 139 | if ((i * larray->bn_gcd) >= bn_end) 140 | bn_end += md_o2p(md->devs[++dev_index].size); 141 | larray->map[i] = &md->devs[dev_index]; 142 | } 143 | 144 | return 0; 145 | } 146 | 147 | int md_init_from_pmem_info(struct multi_devices *md) 148 | { 149 | struct md_dev_list *dev_list = &md->pmem_info.mdt.s_dev_list; 150 | ulong offset = 0; 151 | int i, err; 152 | 153 | md->t1_count = dev_list->t1_count; 154 | md->t2_count = dev_list->t2_count; 155 | 156 | for (i = 0; i < md->t1_count; ++i) { 157 | struct md_dev_info *mdi = &md->devs[i]; 158 | 159 | _init_dev_info(mdi, &dev_list->dev_ids[i], i, offset, 160 | md->p_pmem_addr); 161 | offset += mdi->size; 162 | md->t1a.bn_gcd = _gcd(md->t1a.bn_gcd, md_o2p(mdi->size)); 163 | } 164 | 165 | offset = 0; 166 | for (; i < md->t1_count + md->t2_count; ++i) { 167 | struct md_dev_info *mdi = &md->devs[i]; 168 | 169 | _init_dev_info(mdi, &dev_list->dev_ids[i], i, offset, NULL); 170 | offset += mdi->size; 171 | md->t2a.bn_gcd = _gcd(md->t2a.bn_gcd, md_o2p(mdi->size)); 172 | } 173 | 174 | if (md->t1_count) { 175 | err = _map_setup(md, md_t1_blocks(md), 0, &md->t1a); 176 | if (unlikely(err)) 177 | return err; 178 | } 179 | 180 | if (md->t2_count) { 181 | err = _map_setup(md, md_t2_blocks(md), md->t1_count, &md->t2a); 182 | if (unlikely(err)) 183 | return err; 184 | } 185 | return 0; 186 | } 187 | 188 | void md_fini(struct multi_devices *md, bool put_all) 189 | { 190 | if (md->t2_count) 191 | free(md->t2a.map); 192 | if (md->t1_count) 193 | free(md->t1a.map); 194 | } 195 | 196 | static bool _csum_mismatch(struct md_dev_table *mdt, int silent) 197 | { 198 | ushort crc = md_calc_csum(mdt); 199 | 200 | if (mdt->s_sum == cpu_to_le16(crc)) 201 | return false; 202 | 203 | md_warn_cnd(silent, "expected(0x%x) != s_sum(0x%x)\n", 204 | cpu_to_le16(crc), mdt->s_sum); 205 | return true; 206 | } 207 | 208 | static bool _uuid_le_equal(uuid_le *uuid1, uuid_le *uuid2) 209 | { 210 | return (memcmp(uuid1, uuid2, sizeof(uuid_le)) == 0); 211 | } 212 | 213 | static bool _mdt_compare_uuids(struct md_dev_table *mdt, 214 | struct md_dev_table *main_mdt, int silent) 215 | { 216 | int i, dev_count; 217 | 218 | if (!_uuid_le_equal(&mdt->s_uuid, &main_mdt->s_uuid)) { 219 | md_warn_cnd(silent, "mdt uuid (%pUb != %pUb) mismatch\n", 220 | &mdt->s_uuid, &main_mdt->s_uuid); 221 | return false; 222 | } 223 | 224 | dev_count = mdt->s_dev_list.t1_count + mdt->s_dev_list.t2_count + 225 | mdt->s_dev_list.rmem_count; 226 | for (i = 0; i < dev_count; ++i) { 227 | struct md_dev_id *dev_id1 = &mdt->s_dev_list.dev_ids[i]; 228 | struct md_dev_id *dev_id2 = &main_mdt->s_dev_list.dev_ids[i]; 229 | 230 | if (!_uuid_le_equal(&dev_id1->uuid, &dev_id2->uuid)) { 231 | md_warn_cnd(silent, "mdt dev %d uuid (%pUb != %pUb) mismatch\n", 232 | i, &dev_id1->uuid, &dev_id2->uuid); 233 | return false; 234 | } 235 | 236 | if (dev_id1->blocks != dev_id2->blocks) { 237 | md_warn_cnd(silent, "mdt dev %d blocks (0x%llx != 0x%llx) mismatch\n", 238 | i, le64_to_cpu(dev_id1->blocks), 239 | le64_to_cpu(dev_id2->blocks)); 240 | return false; 241 | } 242 | } 243 | 244 | return true; 245 | } 246 | 247 | bool md_mdt_check(struct md_dev_table *mdt, 248 | struct md_dev_table *main_mdt, struct block_device *bdev, 249 | struct mdt_check *mc) 250 | { 251 | struct md_dev_id *dev_id; 252 | ulong super_size; 253 | int id_index; 254 | 255 | // BUILD_BUG_ON(MDT_STATIC_SIZE(mdt) & (SMP_CACHE_BYTES - 1)); 256 | 257 | /* Do sanity checks on the superblock */ 258 | if (le32_to_cpu(mdt->s_magic) != mc->magic) { 259 | md_warn_cnd(mc->silent, "Magic error in super block: please run fsck\n"); 260 | return false; 261 | } 262 | 263 | if ((mc->major_ver != (uint)mdt_major_version(mdt)) || 264 | (mc->minor_ver < (uint)mdt_minor_version(mdt))) { 265 | md_warn_cnd(mc->silent, 266 | "mkfs-mount versions mismatch! %d.%d != %d.%d\n", 267 | mdt_major_version(mdt), mdt_minor_version(mdt), 268 | mc->major_ver, mc->minor_ver); 269 | return false; 270 | } 271 | 272 | if (_csum_mismatch(mdt, mc->silent)) { 273 | md_warn_cnd(mc->silent, "crc16 error in super block: please run fsck\n"); 274 | return false; 275 | } 276 | 277 | if (main_mdt) { 278 | if (mdt->s_dev_list.t1_count != main_mdt->s_dev_list.t1_count) { 279 | md_warn_cnd(mc->silent, "mdt t1 count mismatch\n"); 280 | return false; 281 | } 282 | 283 | if (mdt->s_dev_list.t2_count != main_mdt->s_dev_list.t2_count) { 284 | md_warn_cnd(mc->silent, "mdt t2 count mismatch\n"); 285 | return false; 286 | } 287 | 288 | if (mdt->s_dev_list.rmem_count != main_mdt->s_dev_list.rmem_count) { 289 | md_warn_cnd(mc->silent, "mdt rmem dev count mismatch\n"); 290 | return false; 291 | } 292 | 293 | if (!_mdt_compare_uuids(mdt, main_mdt, mc->silent)) 294 | return false; 295 | } 296 | 297 | id_index = mdt->s_dev_list.id_index; 298 | 299 | /* check id_index */ 300 | if (id_index < 0 || 301 | mdt->s_dev_list.t1_count + mdt->s_dev_list.t2_count < id_index) { 302 | md_warn_cnd(mc->silent, "invalid device index %d\n", id_index); 303 | return false; 304 | } 305 | 306 | /* check alignment */ 307 | dev_id = &mdt->s_dev_list.dev_ids[id_index]; 308 | super_size = md_p2o(__dev_id_blocks(dev_id)); 309 | if (unlikely(!super_size || super_size & mc->alloc_mask)) { 310 | md_warn_cnd(mc->silent, "super_size(0x%lx) ! 2_M aligned\n", 311 | super_size); 312 | return false; 313 | } 314 | 315 | return true; 316 | } 317 | 318 | static void _done(struct zus_iomap_done *iomd, int err) 319 | { 320 | if (unlikely(err)) 321 | ERROR("T2 I/O failed => %d\n", err); 322 | } 323 | static struct zus_iomap_done _iomd = { 324 | .done = _done, 325 | }; 326 | 327 | static struct fba g_io_fba; 328 | 329 | static int _init_g_fba(void) 330 | { 331 | if (g_io_fba.fd) 332 | return 0; 333 | 334 | return zus_alloc_exec_buff(NULL, PAGE_SIZE, 0, &g_io_fba); 335 | } 336 | 337 | static int _iomb_start(struct zus_iomap_build *iomb, struct multi_devices *md) 338 | { 339 | iomb->err = _init_g_fba(); 340 | if (iomb->err) 341 | return iomb->err; 342 | 343 | _zus_iom_init_4_ioc_exec(iomb, md->sbi, g_io_fba.fd, g_io_fba.ptr, 344 | PAGE_SIZE); 345 | _zus_iom_start(iomb, &_iomd); 346 | return 0; 347 | } 348 | 349 | int md_t2_mdt_read(struct multi_devices *md, int dev_index, 350 | struct md_dev_table *mdt) 351 | { 352 | struct zus_iomap_build iomb = {}; 353 | 354 | iomb.err = _iomb_start(&iomb, md); 355 | if (unlikely(iomb.err)) 356 | return iomb.err; 357 | 358 | _zus_iom_enc_t2_zusmem_read(&iomb, 0, mdt, PAGE_SIZE); 359 | _zus_iom_ioc_exec_submit(&iomb, true); 360 | 361 | return iomb.err; 362 | } 363 | 364 | int md_t2_mdt_write(struct multi_devices *md, struct md_dev_table *mdt) 365 | { 366 | struct zus_iomap_build iomb = {}; 367 | int i; 368 | 369 | /* FIXME: must make copies and execute at end. one by one for now */ 370 | for (i = 0; i < md->t2_count; ++i) { 371 | ulong bn = md_o2p(md_t2_dev(md, i)->offset); 372 | 373 | iomb.err = _iomb_start(&iomb, md); 374 | if (unlikely(iomb.err)) 375 | return iomb.err; 376 | 377 | mdt->s_dev_list.id_index = mdt->s_dev_list.t1_count + i; 378 | mdt->s_sum = cpu_to_le16(md_calc_csum(mdt)); 379 | 380 | _zus_iom_enc_t2_zusmem_write(&iomb, bn, mdt, PAGE_SIZE); 381 | _zus_iom_ioc_exec_submit(&iomb, true); 382 | if (iomb.err) 383 | break; 384 | } 385 | 386 | return iomb.err; 387 | } 388 | 389 | /* ~~~ _zus_iom facility (imp of iom_enc.h) ~~~ */ 390 | void _zus_iom_ioc_exec_submit(struct zus_iomap_build *iomb, bool sync) 391 | { 392 | struct zufs_ioc_iomap_exec *ziome = iomb->ioc_exec; 393 | int err; 394 | 395 | _zus_iom_end(iomb); 396 | 397 | if (ZUS_WARN_ON(!iomb->ziom)) 398 | return; 399 | 400 | ziome->zus_sbi = iomb->sbi; 401 | ziome->sb_id = iomb->sbi->kern_sb_id; 402 | ziome->ziom.iomd = iomb->iomd; 403 | ziome->wait_for_done = sync; 404 | DBG("{%p} sb_id=%lld, iom_n=0x%x wait=%d\n", 405 | iomb->iomd, ziome->sb_id, ziome->ziom.iom_n, sync); 406 | 407 | err = zuf_iomap_exec(iomb->fd, ziome); 408 | 409 | iomb->err = iomb->ioc_exec->hdr.err; 410 | if (unlikely(err && !iomb->err)) 411 | iomb->err = -errno; 412 | 413 | if (sync && iomb->iomd) 414 | iomb->iomd->done(iomb->iomd, iomb->err); 415 | } 416 | -------------------------------------------------------------------------------- /module.c: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: BSD-3-Clause */ 2 | /* 3 | * BSD-3-Clause License 4 | * 5 | * Copyright (c) 2018 NetApp, Inc. All rights reserved. 6 | * 7 | * Redistribution and use in source and binary forms, with or without 8 | * modification, are permitted provided that the following conditions 9 | * are met: 10 | * 11 | * 1. Redistributions of source code must retain the above copyright 12 | * notice, this list of conditions and the following disclaimer. 13 | * 14 | * 2. Redistributions in binary form must reproduce the above copyright 15 | * notice, this list of conditions and the following disclaimer in the 16 | * documentation and/or other materials provided with the distribution. 17 | * 18 | * 3. Neither the name of the copyright holder nor the names of its 19 | * contributors may be used to endorse or promote products derived from 20 | * this software without specific prior written permission. 21 | * 22 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 23 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 24 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 25 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 26 | * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 27 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 28 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 29 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 30 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 31 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 32 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 33 | */ 34 | #include "zus.h" 35 | 36 | const ushort VERSION[2] = { 37 | ZUFS_MAJOR_VERSION, ZUFS_MINOR_VERSION 38 | }; 39 | const char AUTHOR1[] = "Boaz Harrosh "; 40 | const char AUTHOR2[] = "Shachar Sharon "; 41 | const char LICENSE[] = "BSD-3-Clause"; 42 | -------------------------------------------------------------------------------- /movnt.h: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: BSD-3-Clause */ 2 | /* 3 | * BRIEF DESCRIPTION 4 | * 5 | * Some General x86_64 operations. 6 | * NT means Non-Temporal (Intel's terminology) 7 | * 8 | * Copyright (c) 2018 NetApp, Inc. All rights reserved. 9 | * 10 | * See module.c for LICENSE details. 11 | */ 12 | 13 | #ifndef __ZUS_MOVENT_H 14 | #define __ZUS_MOVENT_H 15 | 16 | #define CACHELINE_SHIFT (6) 17 | #define CACHELINE_SIZE (1UL << CACHELINE_SHIFT) 18 | 19 | #include 20 | 21 | #include "zus.h" 22 | 23 | static inline void a_clflushopt(void *p) 24 | { 25 | asm volatile(".byte 0x66; clflush %0" : "+m" (*(volatile char *)p)); 26 | } 27 | 28 | static inline void a_clwb(void *p) 29 | { 30 | asm volatile(".byte 0x66; xsaveopt %0" : "+m" (*(volatile char *)p)); 31 | } 32 | 33 | static inline void cl_flush(void *buf, uint32_t len) 34 | { 35 | uint32_t i; 36 | 37 | len = len + ((unsigned long)(buf) & (CACHELINE_SIZE - 1)); 38 | for (i = 0; i < len; i += CACHELINE_SIZE) 39 | _mm_clflush(buf + i); 40 | } 41 | 42 | /* 43 | * clwb writes back cachelines concurrently and require a store 44 | * barrier (sfence) to verify completeness. 45 | * 46 | * WARNING: don't use directly, will crash old unsupported CPUs! 47 | */ 48 | static inline void __cl_flush_wb(void *buf, uint32_t len) 49 | { 50 | uint32_t i; 51 | 52 | len = len + ((unsigned long)(buf) & (CACHELINE_SIZE - 1)); 53 | for (i = 0; i < len; i += CACHELINE_SIZE) 54 | a_clwb(buf + i); 55 | 56 | _mm_sfence(); 57 | } 58 | 59 | /* 60 | * clflushopt flushes cachelines concurrently and require a store 61 | * barrier (sfence) to verify completeness. 62 | * 63 | * WARNING: don't use directly, will crash old unsupported CPUs! 64 | */ 65 | static inline void __cl_flush_opt(void *buf, uint32_t len) 66 | { 67 | uint32_t i; 68 | 69 | len = len + ((unsigned long)(buf) & (CACHELINE_SIZE - 1)); 70 | for (i = 0; i < len; i += CACHELINE_SIZE) 71 | a_clflushopt(buf + i); 72 | 73 | _mm_sfence(); 74 | } 75 | 76 | extern void (*cl_flush_opt)(void *buf, uint32_t len); 77 | extern void (*cl_flush_wb)(void *buf, uint32_t len); 78 | 79 | /* TODO use AVX-512 instructions if available PXS-245 */ 80 | static inline void _memzero_nt_cachelines(void *dst, size_t cachelines) 81 | { 82 | /* must use dummy outputs so not to clobber inputs */ 83 | ulong dummy1, dummy2; 84 | 85 | asm volatile ( 86 | "xor %%rax,%%rax\n" 87 | "1: movnti %%rax,(%0)\n" 88 | "movnti %%rax,1*8(%0)\n" 89 | "movnti %%rax,2*8(%0)\n" 90 | "movnti %%rax,3*8(%0)\n" 91 | "movnti %%rax,4*8(%0)\n" 92 | "movnti %%rax,5*8(%0)\n" 93 | "movnti %%rax,6*8(%0)\n" 94 | "movnti %%rax,7*8(%0)\n" 95 | "leaq 64(%0),%0\n" 96 | "dec %1\n" 97 | "jnz 1b\n" 98 | : "=D" (dummy1), "=d" (dummy2) : 99 | "D" (dst), "d" (cachelines) : "memory", "rax"); 100 | } 101 | 102 | static inline void memzero_nt(void *dst, size_t len) 103 | { 104 | size_t cachelines, prefix_len; 105 | 106 | /* if dst is not cacheline aligned, fill with memset */ 107 | if (unlikely((ulong)dst & (CACHELINE_SIZE-1))) { 108 | prefix_len = CACHELINE_SIZE - ((ulong)dst & (CACHELINE_SIZE-1)); 109 | if (prefix_len > len) 110 | prefix_len = len; 111 | memset(dst, 0, prefix_len); 112 | cl_flush(dst, prefix_len); 113 | len -= prefix_len; 114 | dst += prefix_len; 115 | } 116 | 117 | cachelines = len >> CACHELINE_SHIFT; 118 | if (likely(cachelines)) 119 | _memzero_nt_cachelines(dst, cachelines); 120 | 121 | /* fill remaining bytes with memset */ 122 | len -= cachelines << CACHELINE_SHIFT; 123 | dst += cachelines << CACHELINE_SHIFT; 124 | if (unlikely(len > 0)) { 125 | memset(dst, 0, len); 126 | cl_flush(dst, len); 127 | } 128 | } 129 | 130 | /* zus: nvml_movnt.c */ 131 | void *pmem_memmove_persist(void *pmemdest, const void *src, size_t len); 132 | 133 | #define memcpy_to_pmem pmem_memmove_persist 134 | 135 | #endif /* ifndef __ZUS_MOVENT_H */ 136 | -------------------------------------------------------------------------------- /nvml_movnt.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2014-2016, Intel Corporation 3 | * 4 | * Redistribution and use in source and binary forms, with or without 5 | * modification, are permitted provided that the following conditions 6 | * are met: 7 | * 8 | * * Redistributions of source code must retain the above copyright 9 | * notice, this list of conditions and the following disclaimer. 10 | * 11 | * * Redistributions in binary form must reproduce the above copyright 12 | * notice, this list of conditions and the following disclaimer in 13 | * the documentation and/or other materials provided with the 14 | * distribution. 15 | * 16 | * * Neither the name of the copyright holder nor the names of its 17 | * contributors may be used to endorse or promote products derived 18 | * from this software without specific prior written permission. 19 | * 20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | * 32 | * Some changes and fixes made by: 33 | * Boaz Harrosh 34 | */ 35 | 36 | #include 37 | 38 | #include "movnt.h" 39 | 40 | #define EAX_IDX 0 41 | #define EBX_IDX 1 42 | #define ECX_IDX 2 43 | #define EDX_IDX 3 44 | 45 | #define CLFLUSHOPT_FUNC 0x7 46 | #define CLFLUSHOPT_BIT (1 << 23) 47 | 48 | #define CLWB_FUNC 0x7 49 | #define CLWB_BIT (1 << 24) 50 | 51 | #define CACHELINE_ALIGN ((uintptr_t)64) 52 | #define CACHELINE_MASK (CACHELINE_ALIGN - 1) 53 | 54 | #define CHUNK_SIZE 128 /* 16*8 */ 55 | #define CHUNK_SHIFT 7 56 | #define CHUNK_MASK (CHUNK_SIZE - 1) 57 | 58 | #define DWORD_SIZE 4 59 | #define DWORD_SHIFT 2 60 | #define DWORD_MASK (DWORD_SIZE - 1) 61 | 62 | #define MOVNT_SIZE 16 63 | #define MOVNT_MASK (MOVNT_SIZE - 1) 64 | #define MOVNT_SHIFT 4 65 | 66 | #define MOVNT_THRESHOLD 256 67 | 68 | /* 69 | * flush_clflush -- (internal) flush the CPU cache, using clflush 70 | * (Boaz: Is only used here for the none aligned tails of movnt, clflush 71 | * Is always better than clflushopt in this case, even if clflushopt is 72 | * available) 73 | */ 74 | static void 75 | flush_clflush(const void *addr, size_t len) 76 | { 77 | uintptr_t uptr; 78 | 79 | /* 80 | * Loop through cache-line-size (typically 64B) aligned chunks 81 | * covering the given range. 82 | */ 83 | for (uptr = (uintptr_t)addr & ~(CACHELINE_ALIGN - 1); 84 | uptr < (uintptr_t)addr + len; uptr += CACHELINE_ALIGN) 85 | _mm_clflush((char *)uptr); 86 | } 87 | 88 | static void 89 | pmem_flush(const void *addr, size_t len) 90 | { 91 | flush_clflush(addr, len); 92 | } 93 | 94 | /* 95 | * memmove_nodrain_movnt -- (internal) memmove to pmem without hw drain, movnt 96 | */ 97 | static void * 98 | memmove_nodrain_movnt(void *pmemdest, const void *src, size_t len) 99 | { 100 | __m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7; 101 | size_t i; 102 | __m128i *d; 103 | const __m128i *s; 104 | void *dest1 = pmemdest; 105 | size_t cnt; 106 | 107 | if (len == 0 || src == pmemdest) 108 | return pmemdest; 109 | 110 | if (len < MOVNT_THRESHOLD) { 111 | memmove(pmemdest, src, len); 112 | pmem_flush(pmemdest, len); 113 | return pmemdest; 114 | } 115 | 116 | if ((uintptr_t)dest1 - (uintptr_t)src >= len) { 117 | /* 118 | * Copy the range in the forward direction. 119 | * 120 | * This is the most common, most optimized case, used unless 121 | * the overlap specifically prevents it. 122 | */ 123 | 124 | /* copy up to CACHELINE_ALIGN boundary */ 125 | cnt = (uint64_t)dest1 & CACHELINE_MASK; 126 | if (cnt > 0) { 127 | uint8_t *d8; 128 | const uint8_t *s8; 129 | 130 | cnt = CACHELINE_ALIGN - cnt; 131 | 132 | /* never try to copy more the len bytes */ 133 | if (cnt > len) 134 | cnt = len; 135 | 136 | d8 = dest1; 137 | s8 = src; 138 | for (i = 0; i < cnt; i++) { 139 | *d8 = *s8; 140 | d8++; 141 | s8++; 142 | } 143 | pmem_flush(dest1, cnt); 144 | dest1 += cnt; 145 | src += cnt; 146 | len -= cnt; 147 | } 148 | 149 | d = dest1; 150 | s = src; 151 | 152 | cnt = len >> CHUNK_SHIFT; 153 | for (i = 0; i < cnt; i++) { 154 | xmm0 = _mm_loadu_si128(s); 155 | xmm1 = _mm_loadu_si128(s + 1); 156 | xmm2 = _mm_loadu_si128(s + 2); 157 | xmm3 = _mm_loadu_si128(s + 3); 158 | xmm4 = _mm_loadu_si128(s + 4); 159 | xmm5 = _mm_loadu_si128(s + 5); 160 | xmm6 = _mm_loadu_si128(s + 6); 161 | xmm7 = _mm_loadu_si128(s + 7); 162 | s += 8; 163 | _mm_stream_si128(d, xmm0); 164 | _mm_stream_si128(d + 1, xmm1); 165 | _mm_stream_si128(d + 2, xmm2); 166 | _mm_stream_si128(d + 3, xmm3); 167 | _mm_stream_si128(d + 4, xmm4); 168 | _mm_stream_si128(d + 5, xmm5); 169 | _mm_stream_si128(d + 6, xmm6); 170 | _mm_stream_si128(d + 7, xmm7); 171 | d += 8; 172 | } 173 | 174 | /* copy the tail (<128 bytes) in 16 bytes chunks */ 175 | len &= CHUNK_MASK; 176 | if (len != 0) { 177 | cnt = len >> MOVNT_SHIFT; 178 | for (i = 0; i < cnt; i++) { 179 | xmm0 = _mm_loadu_si128(s); 180 | _mm_stream_si128(d, xmm0); 181 | s++; 182 | d++; 183 | } 184 | } 185 | 186 | /* copy the last bytes (<16), first dwords then bytes */ 187 | len &= MOVNT_MASK; 188 | if (len != 0) { 189 | int32_t *d32 = (int32_t *)d; 190 | const int32_t *s32 = (const int32_t *)s; 191 | uint8_t *d8; 192 | const uint8_t *s8; 193 | 194 | cnt = len >> DWORD_SHIFT; 195 | for (i = 0; i < cnt; i++) { 196 | _mm_stream_si32(d32, *s32); 197 | d32++; 198 | s32++; 199 | } 200 | cnt = len & DWORD_MASK; 201 | d8 = (uint8_t *)d32; 202 | s8 = (const uint8_t *)s32; 203 | 204 | for (i = 0; i < cnt; i++) { 205 | *d8 = *s8; 206 | d8++; 207 | s8++; 208 | } 209 | pmem_flush(d32, cnt); 210 | } 211 | } else { 212 | /* 213 | * Copy the range in the backward direction. 214 | * 215 | * This prevents overwriting source data due to an 216 | * overlapped destination range. 217 | */ 218 | 219 | dest1 += len; 220 | src += len; 221 | 222 | cnt = (uint64_t)dest1 & CACHELINE_MASK; 223 | if (cnt > 0) { 224 | uint8_t *d8; 225 | const uint8_t *s8; 226 | 227 | /* never try to copy more the len bytes */ 228 | if (cnt > len) 229 | cnt = len; 230 | 231 | d8 = dest1; 232 | s8 = src; 233 | for (i = 0; i < cnt; i++) { 234 | d8--; 235 | s8--; 236 | *d8 = *s8; 237 | } 238 | pmem_flush(d8, cnt); 239 | dest1 = (char *)dest1 - cnt; 240 | src = (const char *)src - cnt; 241 | len -= cnt; 242 | } 243 | 244 | d = (__m128i *)dest1; 245 | s = (const __m128i *)src; 246 | 247 | cnt = len >> CHUNK_SHIFT; 248 | for (i = 0; i < cnt; i++) { 249 | xmm0 = _mm_loadu_si128(s - 1); 250 | xmm1 = _mm_loadu_si128(s - 2); 251 | xmm2 = _mm_loadu_si128(s - 3); 252 | xmm3 = _mm_loadu_si128(s - 4); 253 | xmm4 = _mm_loadu_si128(s - 5); 254 | xmm5 = _mm_loadu_si128(s - 6); 255 | xmm6 = _mm_loadu_si128(s - 7); 256 | xmm7 = _mm_loadu_si128(s - 8); 257 | s -= 8; 258 | _mm_stream_si128(d - 1, xmm0); 259 | _mm_stream_si128(d - 2, xmm1); 260 | _mm_stream_si128(d - 3, xmm2); 261 | _mm_stream_si128(d - 4, xmm3); 262 | _mm_stream_si128(d - 5, xmm4); 263 | _mm_stream_si128(d - 6, xmm5); 264 | _mm_stream_si128(d - 7, xmm6); 265 | _mm_stream_si128(d - 8, xmm7); 266 | d -= 8; 267 | } 268 | 269 | /* copy the tail (<128 bytes) in 16 bytes chunks */ 270 | len &= CHUNK_MASK; 271 | if (len != 0) { 272 | cnt = len >> MOVNT_SHIFT; 273 | for (i = 0; i < cnt; i++) { 274 | d--; 275 | s--; 276 | xmm0 = _mm_loadu_si128(s); 277 | _mm_stream_si128(d, xmm0); 278 | } 279 | } 280 | 281 | /* copy the last bytes (<16), first dwords then bytes */ 282 | len &= MOVNT_MASK; 283 | if (len != 0) { 284 | int32_t *d32 = (int32_t *)d; 285 | const int32_t *s32 = (const int32_t *)s; 286 | uint8_t *d8; 287 | const uint8_t *s8; 288 | 289 | cnt = len >> DWORD_SHIFT; 290 | 291 | for (i = 0; i < cnt; i++) { 292 | d32--; 293 | s32--; 294 | _mm_stream_si32(d32, *s32); 295 | } 296 | 297 | cnt = len & DWORD_MASK; 298 | d8 = (uint8_t *)d32; 299 | s8 = (const uint8_t *)s32; 300 | 301 | for (i = 0; i < cnt; i++) { 302 | d8--; 303 | s8--; 304 | *d8 = *s8; 305 | } 306 | pmem_flush(d8, cnt); 307 | } 308 | } 309 | 310 | /* serialize non-temporal store instructions */ 311 | _mm_sfence(); 312 | 313 | return pmemdest; 314 | } 315 | 316 | /* 317 | * pmem_memmove_persist -- memmove to pmem 318 | */ 319 | void * 320 | pmem_memmove_persist(void *pmemdest, const void *src, size_t len) 321 | { 322 | memmove_nodrain_movnt(pmemdest, src, len); 323 | 324 | return pmemdest; 325 | } 326 | 327 | static inline void 328 | cpuid(unsigned func, unsigned subfunc, unsigned cpuinfo[4]) 329 | { 330 | __cpuid_count(func, subfunc, cpuinfo[EAX_IDX], cpuinfo[EBX_IDX], 331 | cpuinfo[ECX_IDX], cpuinfo[EDX_IDX]); 332 | } 333 | 334 | static int cpuid_check(unsigned func, unsigned reg, unsigned bit) 335 | { 336 | unsigned int cpuinfo[4] = {}; 337 | 338 | /* func check */ 339 | cpuid(0x0, 0x0, cpuinfo); 340 | if (cpuinfo[EAX_IDX] < func) 341 | return 0; 342 | 343 | cpuid(func, 0x0, cpuinfo); 344 | 345 | return (cpuinfo[reg] & bit) != 0; 346 | } 347 | 348 | static int clflushopt_avail(void) 349 | { 350 | return cpuid_check(CLFLUSHOPT_FUNC, EBX_IDX, CLFLUSHOPT_BIT); 351 | } 352 | 353 | static int clwb_avail(void) 354 | { 355 | return cpuid_check(CLWB_FUNC, EBX_IDX, CLWB_BIT); 356 | } 357 | 358 | /* Old processors don't support clflushopt/clwb, so we default to clflush */ 359 | void (*cl_flush_opt)(void *buf, uint32_t len) = cl_flush; 360 | void (*cl_flush_wb)(void *buf, uint32_t len) = cl_flush; 361 | 362 | __attribute__((constructor)) 363 | static void clflush_init(void) 364 | { 365 | if (clwb_avail()) { 366 | cl_flush_wb = __cl_flush_wb; 367 | cl_flush_opt = __cl_flush_opt; 368 | } else if (clflushopt_avail()) { 369 | cl_flush_wb = __cl_flush_opt; 370 | cl_flush_opt = __cl_flush_opt; 371 | } 372 | } 373 | -------------------------------------------------------------------------------- /pa.c: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: BSD-3-Clause */ 2 | /* 3 | * pa.c - Page Allocator 4 | * 5 | * Copyright (c) 2018 NetApp, Inc. All rights reserved. 6 | * 7 | * See module.c for LICENSE details. 8 | * 9 | * Authors: 10 | * Boaz Harrosh 11 | * Sagi Manole 12 | */ 13 | #define _GNU_SOURCE 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | 20 | #include "zus.h" 21 | #include "zuf_call.h" 22 | 23 | /* PA_SIZE - allowed data held in pages; 4G by default, setup upon zusd init */ 24 | /* TODO: get this param from FS 25 | * TODO2: grow dynamically 26 | */ 27 | #define PA_SIZE (g_pa_size.pa_size) 28 | #define MEGA (1UL << 20) 29 | 30 | union _pa_size { 31 | const size_t pa_size; 32 | size_t __pa_size_wr; 33 | }; 34 | static union _pa_size g_pa_size = { .pa_size = 1L << 32, }; /* 4GB */ 35 | 36 | int zus_setup_pa_size(size_t size) 37 | { 38 | const char *env_pa_size; 39 | size_t avail_ram; 40 | long p_size, pages; 41 | 42 | /* user-defined size */ 43 | if (size) 44 | goto out; 45 | 46 | env_pa_size = getenv(ZUFS_PA_SIZE); 47 | if (env_pa_size) { 48 | size = atol(env_pa_size); 49 | goto out; 50 | } 51 | 52 | /* defines PA_SIZE to be half of physical RAM */ 53 | p_size = sysconf(_SC_PAGE_SIZE); 54 | if (unlikely(p_size == -1)) 55 | return -errno; 56 | pages = sysconf(_SC_PHYS_PAGES); 57 | if (unlikely(pages == -1)) 58 | return -errno; 59 | 60 | avail_ram = p_size * pages; 61 | if (unlikely(avail_ram < (64 * MEGA))) 62 | return -ENOMEM; 63 | size = avail_ram / 2; 64 | 65 | out: 66 | /* require PA_SIZE to be 2M aligned */ 67 | g_pa_size.__pa_size_wr = size & ~((2 * MEGA) - 1); 68 | return 0; 69 | } 70 | 71 | /* ~~~~ fba ~~~~ */ 72 | 73 | /* 74 | * Force fba allocations to be 2M aligned. We don't care for out-of-range pages 75 | * as they are never touched and therefore remains unallocated. 76 | */ 77 | #define FBA_ALIGNSIZE (ZUFS_ALLOC_MASK + 1) 78 | 79 | static int _fba_alloc(struct fba *fba, size_t size, int flags) 80 | { 81 | int err; 82 | 83 | /* Our buffers are allocated from a tmpfile so all is aligned and easy 84 | */ 85 | fba->fd = open("/dev/shm/", O_RDWR | O_TMPFILE | O_EXCL, 0666); 86 | if (fba->fd < 0) { 87 | if (!(flags & MAP_HUGETLB)) 88 | ERROR("Error opening <%s>: %s\n","/tmp/", 89 | strerror(errno)); 90 | return errno ? -errno : -EPERM; 91 | } 92 | 93 | err = ftruncate(fba->fd, size); 94 | if (unlikely(err)) { 95 | err = -errno; 96 | if (!(flags & MAP_HUGETLB)) 97 | ERROR("ftruncate failed size=0x%lx => %d\n", size, err); 98 | close(fba->fd); 99 | return err; 100 | } 101 | 102 | fba->ptr = mmap(NULL, size, PROT_WRITE | PROT_READ, flags, 103 | fba->fd, 0); 104 | if (fba->ptr == MAP_FAILED) { 105 | if (!(flags & MAP_HUGETLB)) 106 | ERROR("mmap failed=> %d: %s\n", errno, strerror(errno)); 107 | fba_free(fba); 108 | return errno ? -errno: -ENOMEM; 109 | } 110 | 111 | err = madvise(fba->ptr, size, MADV_DONTDUMP); 112 | if (err == -1 && !(flags & MAP_HUGETLB)) 113 | ERROR("madvise(DONTDUMP) failed=> %d: %s\n", errno, 114 | strerror(errno)); 115 | 116 | fba->size = size; 117 | 118 | DBG("fba allocated flags=0x%x fd=%d ptr=%p size=0x%lx\n", flags, 119 | fba->fd, fba->ptr, size); 120 | 121 | return 0; 122 | } 123 | 124 | int fba_alloc(struct fba *fba, size_t size) 125 | { 126 | int err = _fba_alloc(fba, size, MAP_SHARED); 127 | 128 | if (err) 129 | return err; 130 | 131 | if (NEED_MLOCK && size < PA_SIZE) { 132 | err = mlock(fba->ptr, size); 133 | ZUS_WARN_ON(err); 134 | } 135 | return err; 136 | } 137 | 138 | static int _fba_alloc_huge(struct fba *fba, size_t size) 139 | { 140 | int err; 141 | 142 | err = _fba_alloc(fba, size, MAP_ANONYMOUS | MAP_SHARED | MAP_HUGETLB); 143 | if (unlikely(err)) { 144 | /* fallback to 4k pages */ 145 | INFO("mmap failed huge=> %d: %s\n", errno, strerror(errno)); 146 | return _fba_alloc(fba, size, MAP_SHARED); 147 | } 148 | 149 | return 0; 150 | } 151 | 152 | int fba_alloc_align(struct fba *fba, size_t size, bool huge) 153 | { 154 | size_t aligned_size; 155 | ulong addr; 156 | int err; 157 | 158 | aligned_size = ALIGN(size + FBA_ALIGNSIZE, FBA_ALIGNSIZE); 159 | 160 | if (huge) 161 | err = _fba_alloc_huge(fba, aligned_size); 162 | else 163 | err = _fba_alloc(fba, aligned_size, MAP_SHARED); 164 | 165 | if (unlikely(err)) 166 | return err; 167 | 168 | addr = ALIGN((ulong)fba->ptr, FBA_ALIGNSIZE); 169 | if (fba->ptr != (void *)addr) { 170 | size_t start_len, end_len; 171 | 172 | DBG("fba: fd=%d mmap-addr=0x%lx addr=0x%lx msize=0x%lx aligned_size=0x%lx\n", 173 | fba->fd, (ulong)fba->ptr, addr, size, aligned_size); 174 | 175 | /* unmap the unaligned edges and fix the ptr and size */ 176 | start_len = addr - (ulong)fba->ptr; 177 | end_len = aligned_size - size - start_len; 178 | 179 | munmap(fba->ptr, start_len); 180 | munmap((void *)(addr + size), end_len); 181 | 182 | fba->ptr = (void *)addr; 183 | fba->size = size; 184 | } 185 | 186 | if (NEED_MLOCK && size < PA_SIZE) { 187 | err = mlock(fba->ptr, size); 188 | ZUS_WARN_ON(err); 189 | } 190 | 191 | return 0; 192 | } 193 | 194 | void fba_free(struct fba *fba) 195 | { 196 | if (fba->fd >= 0) { 197 | munmap(fba->ptr, fba->size); 198 | close(fba->fd); 199 | fba->fd = -1; 200 | } 201 | } 202 | 203 | int fba_punch_hole(struct fba *fba, ulong index, uint nump) 204 | { 205 | int ret = fallocate(fba->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 206 | md_p2o(index), md_p2o(nump)); 207 | 208 | if (unlikely(ret)) 209 | return -errno; 210 | return 0; 211 | } 212 | 213 | /* ~~~ pa - Page Allocator ~~~ */ 214 | 215 | /* 2MB worth of pages (= 32k pages) */ 216 | #define PA_PAGES_AT_A_TIME ((1UL << 21) / sizeof(struct pa_page)) 217 | 218 | static void _init_one_page(struct zus_sb_info *sbi, struct pa *pa, 219 | struct pa_page *page) 220 | { 221 | a_list_init(&page->list); 222 | a_list_add_tail(&page->list, &pa->head); 223 | pa_set_page_zone(page, POOL_NUM); 224 | page->owner = sbi; 225 | } 226 | 227 | static int _init_page_of_pages(struct zus_sb_info *sbi, struct pa *pa) 228 | { 229 | struct pa_page *page; 230 | uint i; 231 | 232 | /* Better check here before we SIG_BUS on access of data */ 233 | if (unlikely(PA_SIZE < ((pa->size + PA_PAGES_AT_A_TIME) * PAGE_SIZE))) { 234 | DBG("PA_SIZE too small pa->size=0x%lx\n", pa->size); 235 | return -ENOMEM; 236 | } 237 | 238 | page = pa->pages.ptr + pa->size * sizeof(*page); 239 | for (i = 0; i < PA_PAGES_AT_A_TIME; ++i, ++page) 240 | _init_one_page(sbi, pa, page); 241 | 242 | pa->size += PA_PAGES_AT_A_TIME; 243 | return 0; 244 | } 245 | 246 | static void _alloc_one_page(struct pa_page *page) 247 | { 248 | a_list_del_init(&page->list); 249 | page->refcount = 1; 250 | } 251 | 252 | static bool _pa_is_free(struct pa_page *page) 253 | { 254 | return (page->refcount == 0); 255 | } 256 | 257 | /* order - power of 2 of pages to allocate */ 258 | struct pa_page *pa_alloc_order(struct zus_sb_info *sbi, int order) 259 | { 260 | struct pa *pa = &sbi->pa[POOL_NUM]; 261 | struct pa_page *page; 262 | ushort npages = 1 << order; 263 | int err, i = 0; 264 | 265 | if (ZUS_WARN_ON(PA_MAX_ORDER < order)) 266 | return NULL; 267 | 268 | pthread_spin_lock(&pa->lock); 269 | 270 | if (a_list_empty(&pa->head)) { 271 | err = _init_page_of_pages(sbi, pa); 272 | if (unlikely(err)) { 273 | page = NULL; 274 | goto out; 275 | } 276 | } 277 | 278 | rescan: 279 | a_list_for_each_entry(page, &pa->head, list) { 280 | ulong bn = pa_page_to_bn(sbi, page); 281 | 282 | if ((bn % npages) || (bn + npages - 1 > pa->size)) 283 | continue; 284 | 285 | for (i = 1; i < npages; ++i) { 286 | if (!_pa_is_free(page + i)) 287 | break; 288 | } 289 | if (i == npages) { 290 | for (i = 0; i < npages; ++i) 291 | _alloc_one_page(page + i); 292 | goto out; 293 | } 294 | } 295 | page = NULL; 296 | err = _init_page_of_pages(sbi, pa); 297 | if (unlikely(err)) 298 | goto out; 299 | goto rescan; 300 | 301 | out: 302 | pthread_spin_unlock(&pa->lock); 303 | 304 | if (NEED_MLOCK && page) { 305 | err = mlock(pa_page_address(sbi, page), npages * PAGE_SIZE); 306 | 307 | if (unlikely(err)) { 308 | DBG("mlock failed pa=%p npages=%d => %d\n", 309 | pa_page_address(sbi, page), (int)npages, -errno); 310 | fba_punch_hole(&pa->data, pa_page_to_bn(sbi, page), 311 | npages); 312 | 313 | pthread_spin_lock(&pa->lock); 314 | for (i = 0; i < npages; ++i, ++page) 315 | a_list_add(&page->list, &pa->head); 316 | pthread_spin_unlock(&pa->lock); 317 | 318 | page = NULL; 319 | } 320 | } 321 | 322 | return page; 323 | } 324 | 325 | #define ZUS_SBI_MASK 0x7 326 | void __pa_free(struct pa_page *page) 327 | { 328 | struct zus_sb_info *sbi = (void *)((ulong)page->owner & ~ZUS_SBI_MASK); 329 | struct pa *pa = &sbi->pa[POOL_NUM]; 330 | 331 | fba_punch_hole(&pa->data, pa_page_to_bn(sbi, page), 1); 332 | 333 | pthread_spin_lock(&pa->lock); 334 | 335 | a_list_add(&page->list, &pa->head); 336 | 337 | pthread_spin_unlock(&pa->lock); 338 | } 339 | 340 | #define BUILD_BUG_ON_PA_KP(pa_page, pa_mem, zus_page, kmem) \ 341 | BUILD_BUG_ON((offsetof(typeof(*pa_page), pa_mem) != \ 342 | offsetof(typeof(*zus_page), kmem)) || \ 343 | (sizeof(pa_page->pa_mem) != sizeof(zus_page->kmem))) 344 | 345 | static void _require_equal(void) 346 | { 347 | const struct pa_page *pa_page = NULL; 348 | const struct zus_page *zus_page = NULL; 349 | 350 | BUILD_BUG_ON(sizeof(*pa_page) != 64); 351 | BUILD_BUG_ON(sizeof(*pa_page) != sizeof(*zus_page)); 352 | 353 | BUILD_BUG_ON_PA_KP(pa_page, flags, zus_page, flags); 354 | BUILD_BUG_ON_PA_KP(pa_page, use_count, zus_page, use_count); 355 | BUILD_BUG_ON_PA_KP(pa_page, refcount, zus_page, refcount); 356 | BUILD_BUG_ON_PA_KP(pa_page, index, zus_page, index); 357 | BUILD_BUG_ON_PA_KP(pa_page, owner, zus_page, owner); 358 | BUILD_BUG_ON_PA_KP(pa_page, list, zus_page, list_head); 359 | BUILD_BUG_ON_PA_KP(pa_page, private, zus_page, private1); 360 | BUILD_BUG_ON_PA_KP(pa_page, private2, zus_page, private2); 361 | } 362 | 363 | int pa_init(struct zus_sb_info *sbi) 364 | { 365 | struct pa *pa = &sbi->pa[POOL_NUM]; 366 | int err; 367 | 368 | _require_equal(); 369 | 370 | pa->size = 0; 371 | a_list_init(&pa->head); 372 | 373 | err = pthread_spin_init(&pa->lock, PTHREAD_PROCESS_SHARED); 374 | if (unlikely(err)) 375 | goto fail; 376 | 377 | err = fba_alloc(&pa->data, PA_SIZE); 378 | if (unlikely(err)) 379 | goto fail; 380 | 381 | err = fba_alloc(&pa->pages, (PA_SIZE / PAGE_SIZE) * 382 | sizeof(struct pa_page)); 383 | if (unlikely(err)) 384 | goto fail; 385 | 386 | return 0; 387 | 388 | fail: 389 | pa_fini(sbi); 390 | return err; 391 | } 392 | 393 | void pa_fini(struct zus_sb_info *sbi) 394 | { 395 | struct pa *pa = &sbi->pa[POOL_NUM]; 396 | struct pa_page *page; 397 | ulong free_p = 0; 398 | 399 | a_list_for_each_entry(page, &pa->head, list) { 400 | ++free_p; 401 | } 402 | if (unlikely(free_p != pa->size)) 403 | ERROR("pa leaks %lu pages\n", pa->size - free_p); 404 | 405 | fba_free(&pa->pages); 406 | fba_free(&pa->data); 407 | pthread_spin_destroy(&pa->lock); 408 | } 409 | -------------------------------------------------------------------------------- /pkg/create_pkg.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | # 3 | # SPDX-License-Identifier: BSD-3-Clause 4 | # 5 | # Package script 6 | # 7 | # Copyright (C) 2019 NetApp, Inc. All rights reserved. 8 | 9 | SCRIPT_PATH="$(readlink -f ${BASH_SOURCE[0]})" 10 | SCRIPT_DIR=$(dirname $SCRIPT_PATH) 11 | PKG_TYPE=${1:-rpm} 12 | TMPDIR=$(mktemp -d) 13 | 14 | ${SCRIPT_DIR}/install.sh ${TMPDIR} ${PKG_TYPE} 15 | 16 | ZUS_DIR=${SCRIPT_DIR}/.. 17 | cd ${ZUS_DIR} 18 | GIT_HASH=$(git rev-parse HEAD) 19 | 20 | DEPENDS="-d zufs-zuf -d lsof" 21 | if [[ "${PKG_TYPE}" == "rpm" ]] ; then 22 | DEPENDS+=" -d libunwind -d libuuid -d procps-ng -d systemd" 23 | elif [[ "${PKG_TYPE}" == "deb" ]] ; then 24 | DEPENDS+=" -d libunwind8 -d libuuid1 -d procps -d systemd" 25 | fi 26 | 27 | fpm -s dir -t ${PKG_TYPE} -n zufs-zus -v ${VER} -C ${TMPDIR} \ 28 | --iteration ${BUILD_ID} --epoch 1 \ 29 | --url "netapp.com" --license "GPL/BSD" --vendor "NetApp Inc." \ 30 | --description "`printf "ZUS - Zero-copy User-mode Server\nID: ${GIT_HASH}"`" \ 31 | ${DEPENDS} --rpm-rpmbuild-define "_build_id_links none" \ 32 | --before-remove pkg/pre_uninstall.sh \ 33 | --after-remove pkg/post_uninstall.sh \ 34 | --after-install pkg/post_install.sh . 35 | 36 | rm -rf ${TMPDIR} 37 | 38 | -------------------------------------------------------------------------------- /pkg/install.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | 3 | SCRIPT_PATH="$(readlink -f ${BASH_SOURCE[0]})" 4 | SCRIPT_DIR=$(dirname $SCRIPT_PATH) 5 | ZUS_DIR=${SCRIPT_DIR}/.. 6 | 7 | DEST_DIR=${1} 8 | PKG_TYPE=${2:-rpm} 9 | 10 | ZUS_LIB=${ZUS_DIR}/libzus.so 11 | ZUS_BIN=${ZUS_DIR}/zusd 12 | ZUS_SERVICE_NAME=zusd.service 13 | ZUS_SERVICE=${SCRIPT_DIR}/${ZUS_SERVICE_NAME} 14 | ZUS_ZUSD_HELPER_SCRIPT=${SCRIPT_DIR}/zusd.helper 15 | FOOFS_LIB=${ZUS_DIR}/fs/foofs/libfoofs.so 16 | 17 | SYSTEMD_SERVICE=/lib/systemd/system/${ZUS_SERVICE_NAME} 18 | SYSTEMD_SERVICE_DEST=${DEST_DIR}${SYSTEMD_SERVICE} 19 | SYSTEMD_DEPS_DIR=${DEST_DIR}/etc/systemd/system/multi-user.target.wants 20 | ZUFS_LIB_DIR=${DEST_DIR}/usr/lib/zufs 21 | ZUFS_LOG_DIR=${DEST_DIR}/var/log/zufs 22 | SBIN_DIR=${DEST_DIR}/sbin 23 | 24 | if [[ "${PKG_TYPE}" == "rpm" ]] ; then 25 | LIB64_DIR=${DEST_DIR}/usr/lib64 26 | else 27 | LIB64_DIR=${DEST_DIR}/usr/lib/x86_64-linux-gnu 28 | fi 29 | 30 | mkdir -p $(dirname ${SYSTEMD_SERVICE_DEST}) ${SYSTEMD_DEPS_DIR} ${ZUFS_LIB_DIR} \ 31 | ${ZUFS_LOG_DIR} ${SBIN_DIR} ${LIB64_DIR} 32 | cp -f ${ZUS_BIN} ${SBIN_DIR} 33 | cp -f ${ZUS_LIB} ${LIB64_DIR} 34 | if [[ -e ${FOOFS_LIB} ]]; then 35 | cp -f ${FOOFS_LIB} ${ZUFS_LIB_DIR} 36 | fi 37 | cp -f ${ZUS_ZUSD_HELPER_SCRIPT} ${ZUFS_LIB_DIR} 38 | cp -f ${ZUS_SERVICE} ${SYSTEMD_SERVICE_DEST} 39 | ln -sf ${SYSTEMD_SERVICE} ${SYSTEMD_DEPS_DIR} 40 | 41 | [[ -z ${DEST_DIR} ]] && ${SCRIPT_DIR}/post_install.sh || true 42 | -------------------------------------------------------------------------------- /pkg/post_install.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | CONF=/etc/zufs.conf 4 | 5 | if [[ ! -f ${CONF} ]] ; then 6 | echo "ZUFS_LIBFS_LIST=" > ${CONF} 7 | fi 8 | 9 | # add libzus to ld DB 10 | ldconfig 11 | 12 | systemctl daemon-reload 13 | systemctl restart zusd &>/dev/null || : 14 | -------------------------------------------------------------------------------- /pkg/post_uninstall.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ldconfig 4 | -------------------------------------------------------------------------------- /pkg/pre_uninstall.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # package removal, not upgrade 4 | if [[ ${1} -eq 0 ]] ; then 5 | systemctl stop zusd &>/dev/null || : 6 | systemctl daemon-reload 7 | rm -f /etc/zufs.conf 8 | ldconfig 9 | fi 10 | -------------------------------------------------------------------------------- /pkg/zusd.helper: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | # 3 | # SPDX-License-Identifier: BSD-3-Clause 4 | # 5 | # zusd shutdown script 6 | # 7 | # Copyright (C) 2018 NetApp, Inc. All rights reserved. 8 | # 9 | # See module.c for LICENSE details. 10 | # 11 | # Authors: 12 | # Yigal Korman 13 | 14 | ZUF_ROOT=/sys/fs/zuf 15 | 16 | signal_apps() 17 | { 18 | local signal=$1 19 | local mntdev=$2 20 | local pids 21 | 22 | if pids=$(lsof -t ${mntdev}) ; then 23 | kill -s ${signal} ${pids} 24 | else 25 | echo "No apps founds" 26 | fi 27 | } 28 | 29 | unload_libfs() 30 | { 31 | local libfs=$1 32 | local mntdevs mntdev 33 | 34 | mntdevs=$(grep ${libfs} /proc/self/mounts | cut -d" " -f1) 35 | 36 | for mntdev in ${mntdevs} ; do 37 | signal_apps SIGTERM ${mntdev} 38 | sleep 1 # wait a bit for apps to stop 39 | signal_apps SIGKILL ${mntdev} 40 | umount -v -t ${libfs} ${mntdev} 41 | fsck.${libfs} ${mntdev} 42 | done 43 | } 44 | 45 | unload_zusd() 46 | { 47 | local timeout=60 48 | local pids 49 | 50 | # send sigterm to zusd 51 | if pids=$(pgrep -x zusd) ; then 52 | kill -s SIGTERM ${pids} 53 | fi 54 | 55 | # wait for zusd to terminate 56 | while pids=$(pgrep -x zusd) && [[ ${timeout} -gt 1 ]] ; do 57 | # if 'let' returns falsy value => bash exits, so count till 1 58 | let timeout-=1 59 | sleep 1 60 | done 61 | 62 | if [[ -n ${pids} ]] ; then 63 | echo "Warning: Failed to stop zusd, terminating forcibly" 64 | kill -s SIGKILL ${pids} 65 | fi 66 | } 67 | 68 | unload() { 69 | for libfs in ${ZUFS_LIBFS_LIST/,/ } ; do 70 | unload_libfs ${libfs} 71 | done 72 | 73 | unload_zusd 74 | 75 | umount -v -t zuf ${ZUF_ROOT} 76 | rmmod -v zuf 77 | } 78 | 79 | load() 80 | { 81 | local zuf_mod=/usr/lib/zufs/extra/zuf.ko 82 | local kver=$(uname -r) 83 | local kabi=$(echo ${kver} | sed -e 's|\([0-9.]*-[0-9]*\).*|\1|') 84 | local destdir src_mod 85 | 86 | # cleanup previous loads 87 | if grep -qw zuf /proc/self/mounts ; then 88 | unload 89 | fi 90 | 91 | source /etc/os-release 92 | 93 | if [[ "${ID}" == "centos" || "${ID}" == "rhel" ]] ; then 94 | destdir=/lib/modules/${kver}/extra 95 | else 96 | destdir=/lib/modules/${kver}/kernel/fs/zuf 97 | fi 98 | 99 | if [[ ! -d ${destdir} ]] ; then 100 | mkdir -p ${destdir} 101 | fi 102 | 103 | # in case we booted a new kernel that isn't familiar with zuf 104 | # link it hoping the kernel ABI still matches 105 | if ! modinfo zuf >/dev/null ; then 106 | src_mod=$(echo ${zuf_mod%.ko}.*${kabi}*.ko) 107 | if [[ ! -f ${src_mod} ]] ; then 108 | echo "Error: failed to find a matching zuf module for ${kabi}" >&2 109 | return 1 110 | fi 111 | ln -sf ${src_mod%% *} ${destdir}/zuf.ko 112 | depmod 113 | fi 114 | 115 | modprobe -v zuf 116 | mount -v -t zuf nodev ${ZUF_ROOT} 117 | 118 | exec /sbin/zusd ${ZUSD_ARGS} 119 | } 120 | 121 | case $1 in 122 | up) 123 | load 124 | ;; 125 | down) 126 | unload 127 | ;; 128 | *) 129 | echo "Unknown command!" >&2 130 | ;; 131 | esac 132 | -------------------------------------------------------------------------------- /pkg/zusd.service: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: BSD-3-Clause 2 | # 3 | # Service unit file for the ZUFS 4 | # 5 | # Copyright (C) 2018 NetApp, Inc. All rights reserved. 6 | # 7 | # See module.c for LICENSE details. 8 | # 9 | # Authors: 10 | # Boaz Harrosh 11 | # Shachar Sharon 12 | # 13 | 14 | [Unit] 15 | Description=Zero-copy User-mode FileSystem 16 | ConditionPathExists=|/lib/modules/%v/kernel/fs/zuf/zuf.ko 17 | ConditionPathExistsGlob=|/usr/lib/zufs/extra/zuf*.ko 18 | After=local-fs.target 19 | 20 | [Service] 21 | Type=notify 22 | SyslogIdentifier=zusd 23 | EnvironmentFile=-/etc/zufs.conf 24 | LimitCORE=100M 25 | ExecStart=/usr/lib/zufs/zusd.helper up 26 | ExecStop=/usr/lib/zufs/zusd.helper down 27 | WorkingDirectory=/var/log/zufs 28 | Restart=on-failure 29 | 30 | [Install] 31 | WantedBy=multi-user.target 32 | -------------------------------------------------------------------------------- /printz.c: -------------------------------------------------------------------------------- 1 | #include "printz.h" 2 | #include "zus.h" 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | struct module_ddbg { 14 | char name[ZUS_LIBFS_MAX_PATH]; 15 | __u16 n_dbg_entries; 16 | struct _ddebug *dbg_entries[0]; 17 | }; 18 | 19 | static struct ddbg_db { 20 | struct module_ddbg *modules[ZUS_LIBFS_MAX_NR]; 21 | __u16 mod_count; 22 | __u32 next_id; 23 | } ddbg_db = {}; 24 | 25 | static void _init_ddbg(struct _ddebug *dd, const char *modname) 26 | { 27 | char *no_path_name = strrchr(dd->filename, '/'); 28 | 29 | dd->id = ++ddbg_db.next_id; 30 | dd->modname = modname; 31 | if (no_path_name) 32 | dd->filename = no_path_name + 1; 33 | } 34 | 35 | int zus_add_module_ddbg(const char *fs_name, void *handle) 36 | { 37 | struct _ddebug *iter, *stop; 38 | size_t mddbg_sz; 39 | int i; 40 | int n_dbg; 41 | struct module_ddbg *modd; 42 | 43 | if (strlen(fs_name) >= sizeof(modd->name)) { 44 | ERROR("Name too-long fs_name=%s\n", fs_name); 45 | return -EINVAL; 46 | } 47 | 48 | iter = dlsym(handle, "__start_zus_ddbg"); 49 | if (!iter) { 50 | ERROR("Unable to get library start symbol\n"); 51 | return -EINVAL; 52 | } 53 | 54 | stop = dlsym(handle, "__stop_zus_ddbg"); 55 | if (!stop) { 56 | ERROR("Unable to get library start symbol\n"); 57 | return -EINVAL; 58 | } 59 | n_dbg = stop - iter; 60 | mddbg_sz = sizeof(struct module_ddbg) + 61 | n_dbg * sizeof(struct _ddebug *); 62 | ddbg_db.modules[ddbg_db.mod_count] = calloc(1, mddbg_sz); 63 | if (!ddbg_db.modules[ddbg_db.mod_count]) 64 | return -ENOMEM; 65 | modd = ddbg_db.modules[ddbg_db.mod_count]; 66 | 67 | strncpy(modd->name, fs_name, sizeof(modd->name) - 1); 68 | 69 | modd->n_dbg_entries = n_dbg; 70 | for (i = 0; i < n_dbg; ++i, ++iter) { 71 | _init_ddbg(iter, modd->name); 72 | modd->dbg_entries[i] = iter; 73 | } 74 | 75 | ++ddbg_db.mod_count; 76 | return 0; 77 | } 78 | 79 | void zus_free_ddbg_db(void) 80 | { 81 | int i; 82 | 83 | for (i = 0; i < ddbg_db.mod_count && ddbg_db.modules[i]; ++i) 84 | free(ddbg_db.modules[i]); 85 | } 86 | 87 | static void _copy_format(const char *fmt, char *buff, size_t sz) 88 | { 89 | size_t len = strlen(fmt); 90 | size_t i; 91 | size_t buff_i = 0; 92 | 93 | for (i = 0; i < len && buff_i < sz; ++i) { 94 | switch (fmt[i]) { 95 | case '\n': 96 | case '\t': 97 | if (buff_i + 2 > sz) 98 | return; 99 | *buff++ = '\\'; 100 | *buff++ = fmt[i] == '\n' ? 'n' : 't'; 101 | buff_i += 2; 102 | break; 103 | default: 104 | *buff++ = fmt[i]; 105 | ++buff_i; 106 | } 107 | } 108 | *buff = 0; 109 | } 110 | 111 | #define MAX_FORMAT_SIZE 512 112 | 113 | int zus_ddbg_read(struct zufs_ddbg_info *zdi) 114 | { 115 | char *buff = zdi->msg; 116 | size_t buff_sz = zdi->len; 117 | struct module_ddbg *modd; 118 | struct _ddebug *ddbg; 119 | int mod_i, i; 120 | size_t buff_jmp; 121 | char format[MAX_FORMAT_SIZE + 1]; 122 | 123 | for (mod_i = 0; mod_i < ddbg_db.mod_count; ++mod_i) { 124 | modd = ddbg_db.modules[mod_i]; 125 | for (i = 0; i < modd->n_dbg_entries && buff_sz > 0; ++i) { 126 | ddbg = modd->dbg_entries[i]; 127 | if (ddbg->id <= zdi->id) 128 | continue; 129 | 130 | _copy_format(ddbg->format, format, MAX_FORMAT_SIZE); 131 | buff_jmp = 132 | snprintf(buff, buff_sz, 133 | "%s:%d [%s] %s =%s \"%s\"\n", 134 | ddbg->filename, ddbg->lineno, 135 | ddbg->modname, ddbg->function, 136 | ddbg->active ? "p" : "_", 137 | format); 138 | if (buff_jmp > buff_sz) { 139 | *buff = 0; 140 | goto out; 141 | } 142 | buff_sz -= buff_jmp; 143 | buff += buff_jmp; 144 | zdi->id = ddbg->id; 145 | } 146 | ++modd; 147 | } 148 | out: 149 | zdi->len = strlen(zdi->msg); 150 | return 0; 151 | } 152 | 153 | enum ddbg_cmd { 154 | DDBG_CMD_UNSET = 0, 155 | DDBG_CMD_ENABLE, 156 | DDBG_CMD_DISABLE, 157 | }; 158 | 159 | #define MAX_DDBG_CMD_TOKENS 9 160 | struct ddbg_ctl { 161 | char *tokens[MAX_DDBG_CMD_TOKENS]; 162 | int ntokens; 163 | 164 | const char *modname; 165 | const char *function; 166 | const char *filename; 167 | unsigned int lineno; 168 | const char *format; 169 | enum ddbg_cmd cmd; 170 | }; 171 | 172 | static int _tokenize(char *buf, struct ddbg_ctl *cmd) 173 | { 174 | while (*buf) { 175 | char *end; 176 | 177 | /* Skip leading whitespace */ 178 | for (; isspace(*buf); ++buf) 179 | ; 180 | if (!*buf) 181 | break; 182 | /* Skip comment */ 183 | if (*buf == '#') 184 | break; 185 | 186 | if (*buf == '"' || *buf == '\'') { 187 | int quote = *buf++; 188 | 189 | for (end = buf; *end && *end != quote; end++) 190 | ; 191 | if (!*end) { 192 | ERROR("unclosed quote: %s\n", buf); 193 | return -EINVAL; /* unclosed quote */ 194 | } 195 | } else { 196 | for (end = buf; *end && !isspace(*end); end++) 197 | ; 198 | } 199 | 200 | /* `buf' is start of word, `end' is one past its end */ 201 | if (cmd->ntokens == MAX_DDBG_CMD_TOKENS) { 202 | ERROR("too many ddbg cmd tokens\n"); 203 | return -EINVAL; 204 | } 205 | if (*end) 206 | *end++ = 0; 207 | cmd->tokens[cmd->ntokens++] = buf; 208 | buf = end; 209 | } 210 | return 0; 211 | } 212 | 213 | #define NO_LINE_NUMBER ((unsigned int) -1) 214 | enum { 215 | DDBG_CRIT_MOD = 1, 216 | DDBG_CRIT_FUNC, 217 | DDBG_CRIT_FILE, 218 | DDBG_CRIT_LINENO, 219 | DDBG_CRIT_FMT, 220 | DDBG_CRIT_ENABLE, 221 | DDBG_CRIT_DISABLE, 222 | }; 223 | 224 | static int _token_type(const char *token) 225 | { 226 | if (strcmp(token, "module") == 0) 227 | return DDBG_CRIT_MOD; 228 | if (strcmp(token, "func") == 0) 229 | return DDBG_CRIT_FUNC; 230 | if (strcmp(token, "file") == 0) 231 | return DDBG_CRIT_FILE; 232 | if (strcmp(token, "line") == 0) 233 | return DDBG_CRIT_LINENO; 234 | if (strcmp(token, "format") == 0) 235 | return DDBG_CRIT_FMT; 236 | if (strcmp(token, "+p") == 0) 237 | return DDBG_CRIT_ENABLE; 238 | if (strcmp(token, "-p") == 0) 239 | return DDBG_CRIT_DISABLE; 240 | return -EINVAL; 241 | } 242 | 243 | #define REQUIRE_ADDITIONAL_TOKEN \ 244 | do { if (++i >= ddc->ntokens) return -EINVAL; } while (0) 245 | 246 | static int _parse(struct ddbg_ctl *ddc) 247 | { 248 | int i = 0; 249 | int ttype; 250 | 251 | while (i < ddc->ntokens) { 252 | ttype = _token_type(ddc->tokens[i]); 253 | switch (ttype) { 254 | case DDBG_CRIT_MOD: 255 | REQUIRE_ADDITIONAL_TOKEN; 256 | ddc->modname = ddc->tokens[i]; 257 | break; 258 | case DDBG_CRIT_FUNC: 259 | REQUIRE_ADDITIONAL_TOKEN; 260 | ddc->function = ddc->tokens[i]; 261 | break; 262 | case DDBG_CRIT_FILE: 263 | REQUIRE_ADDITIONAL_TOKEN; 264 | ddc->filename = ddc->tokens[i]; 265 | break; 266 | case DDBG_CRIT_LINENO: { 267 | char *leftover = NULL; 268 | 269 | REQUIRE_ADDITIONAL_TOKEN; 270 | ddc->lineno = strtoul(ddc->tokens[i], &leftover, 10); 271 | if (*leftover) /* Someone is having fun */ 272 | return -EINVAL; 273 | break; 274 | } 275 | case DDBG_CRIT_FMT: 276 | REQUIRE_ADDITIONAL_TOKEN; 277 | ddc->format = ddc->tokens[i]; 278 | break; 279 | case DDBG_CRIT_ENABLE: 280 | ddc->cmd = DDBG_CMD_ENABLE; 281 | break; 282 | case DDBG_CRIT_DISABLE: 283 | ddc->cmd = DDBG_CMD_DISABLE; 284 | break; 285 | default: 286 | ERROR("Unkonwn token %s\n", ddc->tokens[i]); 287 | return -EINVAL; 288 | } 289 | ++i; 290 | } 291 | if (ddc->cmd == DDBG_CMD_UNSET) { 292 | ERROR("no ddbg command is given\n"); 293 | return -EINVAL; 294 | } 295 | return 0; 296 | } 297 | 298 | static int _process(struct ddbg_ctl *ddc) 299 | { 300 | bool enable = ddc->cmd == DDBG_CMD_ENABLE; 301 | struct module_ddbg *modd; 302 | struct _ddebug *ddbg; 303 | int mod_i, i; 304 | 305 | for (mod_i = 0; mod_i < ddbg_db.mod_count; ++mod_i) { 306 | modd = ddbg_db.modules[mod_i]; 307 | if (ddc->modname && strcmp(ddc->modname, modd->name)) 308 | continue; 309 | for (i = 0; i < modd->n_dbg_entries; ++i) { 310 | ddbg = modd->dbg_entries[i]; 311 | if (ddc->filename && 312 | strcmp(ddc->filename, ddbg->filename)) 313 | continue; 314 | if (ddc->function && 315 | strcmp(ddc->function, ddbg->function)) 316 | continue; 317 | if (ddc->lineno != NO_LINE_NUMBER && 318 | ddc->lineno != ddbg->lineno) 319 | continue; 320 | if (ddc->format && !strstr(ddbg->format, ddc->format)) 321 | continue; 322 | ddbg->active = enable; 323 | } 324 | ++modd; 325 | } 326 | return 0; 327 | } 328 | 329 | int zus_ddbg_write(struct zufs_ddbg_info *zdi) 330 | { 331 | struct ddbg_ctl ddc = { 332 | .lineno = NO_LINE_NUMBER, 333 | .cmd = DDBG_CMD_UNSET, 334 | }; 335 | int err; 336 | 337 | err = _tokenize(zdi->msg, &ddc); 338 | if (err) 339 | return err; 340 | 341 | err = _parse(&ddc); 342 | if (err) 343 | return err; 344 | 345 | err = _process(&ddc); 346 | if (err) 347 | return err; 348 | 349 | return 0; 350 | } 351 | -------------------------------------------------------------------------------- /printz.h: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: BSD-3-Clause */ 2 | /* 3 | * zuf_call.c - C Wrappers over the ZUFS_IOCTL Api 4 | * 5 | * Copyright (c) 2018 NetApp, Inc. All rights reserved. 6 | * 7 | * See module.c for LICENSE details. 8 | * 9 | * Authors: 10 | * Boaz Harrosh 11 | */ 12 | #ifndef ___PRINTZ_H__ 13 | #define ___PRINTZ_H__ 14 | 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | 21 | #define LOG_STR(l) LOG_XSTR(l) 22 | #define LOG_XSTR(l) "<"#l">" 23 | 24 | struct _ddebug { 25 | __u32 id; 26 | const char *modname; 27 | const char *function; 28 | const char *filename; 29 | unsigned int lineno; 30 | const char *format; 31 | bool active; 32 | } __attribute__((aligned(8))); 33 | 34 | #define DEFINE_DYNAMIC_DEBUG_METADATA(name, fmt) \ 35 | static struct _ddebug \ 36 | __attribute__((aligned(8))) \ 37 | __attribute__((section("zus_ddbg"))) name = { \ 38 | .function = __func__, \ 39 | .filename = __FILE__, \ 40 | .lineno = __LINE__, \ 41 | .format = fmt, \ 42 | .active = false, \ 43 | } 44 | 45 | #define dyn_dbg_pr(fmt, args...) \ 46 | do { \ 47 | DEFINE_DYNAMIC_DEBUG_METADATA(desc, fmt); \ 48 | if (desc.active) \ 49 | fprintf(stderr, LOG_STR(LOG_INFO) "%s: " fmt, \ 50 | desc.modname, ## args); \ 51 | } while (0) 52 | 53 | #define __pr(s, args ...) fprintf(stderr, s, ## args) 54 | 55 | #define pr_crit(s, args ...) __pr(LOG_STR(LOG_CRIT) s, ## args) 56 | #define pr_err(s, args ...) __pr(LOG_STR(LOG_ERR) s, ## args) 57 | #define pr_warning(s, args ...) __pr(LOG_STR(LOG_WARNING) s, ## args) 58 | #define pr_warn pr_warning 59 | #define pr_info(s, args ...) __pr(LOG_STR(LOG_INFO) s, ## args) 60 | #define pr_debug(s, args ...) dyn_dbg_pr(s, ## args) 61 | 62 | #endif /* define ___PRINTZ_H__ */ 63 | -------------------------------------------------------------------------------- /signals.c: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: BSD-3-Clause */ 2 | /* 3 | * signals.c -- Zusd signal handling 4 | * 5 | * Copyright (c) 2018 NetApp, Inc. All rights reserved. 6 | * 7 | * See module.c for LICENSE details. 8 | * 9 | * Authors: 10 | * Shachar Sharon 11 | */ 12 | #define _GNU_SOURCE 13 | 14 | #include 15 | #include 16 | 17 | #include "zus.h" 18 | #include "zusd.h" 19 | 20 | 21 | static void _sigaction_sigbus_handler(int signum, siginfo_t *si, void *p) 22 | { 23 | INFO("SIGNAL: signum=%d si_errno=%d\n", signum, si->si_errno); 24 | ERROR("WARNING: check system LOGS for memory errors and/or MCE.\n" 25 | "In case of \"Uncorrectable Memory Error\", check filesystem manual\n"); 26 | //TODO: call to filesytem(s) memory-error-handling fuction. 27 | abort(); 28 | } 29 | 30 | static void _sigaction_info_handler(int signum, siginfo_t *si, void *p) 31 | { 32 | DBG("SIGNAL: signum=%d si_errno=%d\n", signum, si->si_errno); 33 | } 34 | 35 | static void _sigaction_exit_handler(int signum, siginfo_t *si, void *p) 36 | { 37 | _sigaction_info_handler(signum, si, p); 38 | zus_mount_thread_stop(); 39 | exit(signum == SIGTERM ? 0 : 1); 40 | } 41 | 42 | static void _sigaction_abort_handler(int signum, siginfo_t *si, void *p) 43 | { 44 | zus_dump_stack(stderr, true, "abort: signum=%d si_errno=%d\n", signum, 45 | si->si_errno); 46 | abort(); 47 | } 48 | 49 | static void _sigaction_info(int signum) 50 | { 51 | static struct sigaction sa_info = { 52 | .sa_sigaction = _sigaction_info_handler, 53 | .sa_flags = SA_SIGINFO 54 | }; 55 | 56 | sigaction(signum, &sa_info, NULL); 57 | } 58 | 59 | static void _sigaction_exit(int signum) 60 | { 61 | static struct sigaction sa_exit = { 62 | .sa_sigaction = _sigaction_exit_handler, 63 | .sa_flags = SA_SIGINFO 64 | }; 65 | 66 | sigaction(signum, &sa_exit, NULL); 67 | } 68 | 69 | static void _sigaction_abort(int signum) 70 | { 71 | static struct sigaction sa_abort = { 72 | .sa_sigaction = _sigaction_abort_handler, 73 | .sa_flags = SA_SIGINFO 74 | }; 75 | 76 | sigaction(signum, &sa_abort, NULL); 77 | } 78 | 79 | static void _sigaction_sigbus(int signum) 80 | { 81 | static struct sigaction sa_sigbus = { 82 | .sa_sigaction = _sigaction_sigbus_handler, 83 | .sa_flags = SA_SIGINFO 84 | }; 85 | 86 | sigaction(signum, &sa_sigbus, NULL); 87 | } 88 | 89 | void zus_register_sigactions(void) 90 | { 91 | /* 92 | * IMPORTANT: do not catch SIGABRT -- let abort work as expected from 93 | * within _sigaction_abort_handler 94 | */ 95 | _sigaction_info(SIGHUP); 96 | _sigaction_exit(SIGINT); 97 | _sigaction_exit(SIGQUIT); 98 | _sigaction_abort(SIGILL); 99 | _sigaction_info(SIGTRAP); 100 | _sigaction_sigbus(SIGBUS); 101 | _sigaction_abort(SIGFPE); 102 | _sigaction_abort(SIGKILL); 103 | _sigaction_exit(SIGUSR1); 104 | _sigaction_abort(SIGSEGV); 105 | _sigaction_info(SIGUSR2); 106 | _sigaction_info(SIGPIPE); 107 | _sigaction_info(SIGALRM); 108 | _sigaction_exit(SIGTERM); 109 | _sigaction_abort(SIGSTKFLT); 110 | _sigaction_info(SIGCHLD); /* TODO: Maybe exit? */ 111 | _sigaction_info(SIGCONT); 112 | _sigaction_exit(SIGSTOP); 113 | _sigaction_exit(SIGTSTP); 114 | _sigaction_exit(SIGTTIN); 115 | _sigaction_exit(SIGTTOU); 116 | _sigaction_info(SIGURG); 117 | _sigaction_exit(SIGXCPU); 118 | _sigaction_exit(SIGXFSZ); 119 | _sigaction_exit(SIGVTALRM); 120 | _sigaction_info(SIGPROF); 121 | _sigaction_info(SIGWINCH); 122 | _sigaction_info(SIGIO); 123 | _sigaction_exit(SIGPWR); 124 | _sigaction_exit(SIGSYS); 125 | } 126 | -------------------------------------------------------------------------------- /slab.c: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: BSD-3-Clause */ 2 | /* 3 | * slab.c -- Slab-based allocator utility 4 | * 5 | * Copyright (c) 2019 NetApp, Inc. All rights reserved. 6 | * 7 | * See module.c for LICENSE details. 8 | * 9 | * Authors: 10 | * Shachar Sharon 11 | */ 12 | #define _GNU_SOURCE 13 | 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include "zus.h" 21 | #include "_pr.h" 22 | 23 | /* TODO: Maybe in zus.h ? */ 24 | #ifndef ZUS_BUILD_BUG_ON 25 | #define ZUS_BUILD_BUG_ON(expr) _Static_assert(!(expr), #expr) 26 | #endif 27 | #ifndef ZUS_ARRAY_SIZE 28 | #define ZUS_ARRAY_SIZE(x_) (sizeof(x_) / sizeof(x_[0])) 29 | #endif 30 | 31 | #define ZUS_BLOCK_SIZE PAGE_SIZE 32 | #define ZUS_MIN_SLAB_SHIFT 5 33 | #define ZUS_MIN_SLAB_SIZE (1 << ZUS_MIN_SLAB_SHIFT) /* 32 Bytes */ 34 | #define ZUS_MAX_SLABS_PER_BLOCK (ZUS_BLOCK_SIZE / ZUS_MIN_SLAB_SIZE) 35 | #define ZUS_SLAB_LISTS (PAGE_SHIFT - ZUS_MIN_SLAB_SHIFT + 1) 36 | #define ZUS_SLAB_NFREE_WANT 1024 /* default number of wanted elements */ 37 | 38 | /* ~~~~~ SLAB allocator ~~~~~ */ 39 | 40 | struct zus_slab { 41 | struct zus_sb_info *sbi; 42 | struct zus_slab_list { 43 | struct a_list_head head; 44 | int nused; /* number elements currently owned by user */ 45 | int nfree; /* number of elements currently in free-list */ 46 | int nfree_want; /* threshold of min wanted elements in list */ 47 | int _pad; 48 | } list[ZUS_SLAB_LISTS]; 49 | int cpu; 50 | pthread_spinlock_t lock; 51 | } __aligned(64); 52 | 53 | struct zus_slab_elem { 54 | struct a_list_head list; 55 | } __attribute__((aligned(ZUS_MIN_SLAB_SIZE))); 56 | 57 | 58 | static void _slab_lock(struct zus_slab *slab) 59 | { 60 | int err = pthread_spin_lock(&slab->lock); 61 | 62 | ZUS_WARN_ON(err); 63 | } 64 | 65 | static bool _slab_trylock(struct zus_slab *slab) 66 | { 67 | int err = pthread_spin_trylock(&slab->lock); 68 | 69 | return (err == 0); 70 | } 71 | 72 | static void _slab_unlock(struct zus_slab *slab) 73 | { 74 | int err = pthread_spin_unlock(&slab->lock); 75 | 76 | ZUS_WARN_ON(err); 77 | } 78 | 79 | static void _page_set_slab(struct pa_page *page, int slab_index, int cpu) 80 | { 81 | _pa_page_meta_set(page, slab_index + 1); 82 | page->sinfo.slab_cpu = cpu; 83 | page->sinfo.slab_uc = 0; 84 | } 85 | 86 | static void _page_clear_slab(struct pa_page *page) 87 | { 88 | _pa_page_meta_set(page, 0); 89 | page->sinfo.slab_cpu = 0; 90 | page->sinfo.slab_uc = 0; 91 | } 92 | 93 | static int _page_slab_index(struct pa_page *page) 94 | { 95 | return _pa_page_meta(page) - 1; 96 | } 97 | 98 | static int _page_slab_cpu(const struct pa_page *page) 99 | { 100 | return page->sinfo.slab_cpu; 101 | } 102 | 103 | static void _page_slab_uc_inc(struct pa_page *page) 104 | { 105 | page->sinfo.slab_uc++; 106 | } 107 | 108 | static int _page_slab_uc_dec(struct pa_page *page) 109 | { 110 | return --page->sinfo.slab_uc; 111 | } 112 | 113 | /* ~~~~~ SLAB init ~~~~~ */ 114 | 115 | static void _slab_init(struct zus_slab *slab, struct zus_sb_info *sbi, int cpu) 116 | { 117 | size_t i; 118 | 119 | ZUS_BUILD_BUG_ON(sizeof(struct zus_slab_elem) != ZUS_MIN_SLAB_SIZE); 120 | 121 | slab->sbi = sbi; 122 | slab->cpu = cpu; 123 | for (i = 0; i < ZUS_ARRAY_SIZE(slab->list); i++) { 124 | a_list_init(&slab->list[i].head); 125 | slab->list[i].nused = 0; 126 | slab->list[i].nfree = 0; 127 | slab->list[i].nfree_want = ZUS_SLAB_NFREE_WANT; 128 | } 129 | } 130 | 131 | static struct zus_slab_elem * 132 | _slab_alloc_elem(struct zus_slab *slab, int slab_index) 133 | { 134 | struct zus_slab_elem *se; 135 | struct zus_slab_list *slab_list = &slab->list[slab_index]; 136 | 137 | if (a_list_empty(&slab_list->head)) 138 | return NULL; 139 | 140 | se = a_list_first_entry(&slab_list->head, struct zus_slab_elem, list); 141 | a_list_del_init(&se->list); 142 | --slab_list->nfree; 143 | ++slab_list->nused; 144 | 145 | return se; 146 | } 147 | 148 | static void _slab_page_init(struct zus_slab *slab, 149 | struct pa_page *page, int slab_index) 150 | { 151 | struct zus_slab_list *slab_list = &slab->list[slab_index]; 152 | int slabs_count = ZUS_MAX_SLABS_PER_BLOCK >> slab_index; 153 | int i, step = 1 << slab_index; 154 | struct zus_slab_elem *se; 155 | 156 | _page_set_slab(page, slab_index, slab->cpu); 157 | 158 | se = pa_page_address(slab->sbi, page); 159 | for (i = 0; i < slabs_count; i++) { 160 | a_list_add_tail(&se->list, &slab_list->head); 161 | ++slab_list->nfree; 162 | se += step; 163 | } 164 | } 165 | 166 | static void _slab_page_fini(struct zus_slab *slab, 167 | struct pa_page *page, int slab_index) 168 | { 169 | struct zus_slab_list *slab_list = &slab->list[slab_index]; 170 | int slabs_count = ZUS_MAX_SLABS_PER_BLOCK >> slab_index; 171 | int i, step = 1 << slab_index; 172 | struct zus_slab_elem *se; 173 | 174 | se = pa_page_address(slab->sbi, page); 175 | for (i = 0; i < slabs_count; i++) { 176 | a_list_del_init(&se->list); 177 | --slab_list->nfree; 178 | se += step; 179 | } 180 | _page_clear_slab(page); 181 | } 182 | 183 | /* ~~~~~ SLAB alloc ~~~~~ */ 184 | 185 | static bool _slab_size_valid(size_t size) 186 | { 187 | return (0 < size) && (size <= ZUS_BLOCK_SIZE); 188 | } 189 | 190 | static int _slab_list_index(size_t size) 191 | { 192 | int slab_index; 193 | 194 | if (unlikely(size <= ZUS_MIN_SLAB_SIZE)) 195 | return 0; 196 | 197 | slab_index = (32 - (__builtin_clz((size - 1) >> ZUS_MIN_SLAB_SHIFT))); 198 | ZUS_WARN_ON(ZUS_SLAB_LISTS <= slab_index); 199 | 200 | return slab_index; 201 | } 202 | 203 | static int _slab_check_list_index(int slab_index) 204 | { 205 | return likely((slab_index >= 0) && 206 | (slab_index < ZUS_SLAB_LISTS)) ? 0 : -EINVAL; 207 | } 208 | 209 | static int _slab_increase(struct zus_slab *slab, int slab_index) 210 | { 211 | struct pa_page *page; 212 | 213 | page = pa_alloc(slab->sbi); 214 | if (unlikely(!page)) 215 | return -ENOMEM; 216 | 217 | _slab_page_init(slab, page, slab_index); 218 | return 0; 219 | } 220 | 221 | static bool _slab_iscold(const struct zus_slab *slab, size_t size) 222 | { 223 | int slab_index = _slab_list_index(size); 224 | const struct zus_slab_list *slab_list; 225 | 226 | if (unlikely(_slab_check_list_index(slab_index))) 227 | return false; 228 | 229 | slab_list = &slab->list[slab_index]; 230 | return !slab_list->nfree && !slab_list->nused; 231 | } 232 | 233 | static bool _slab_list_empty(struct zus_slab *slab, size_t size) 234 | { 235 | int slab_index = _slab_list_index(size); 236 | struct zus_slab_list *slab_list; 237 | 238 | if (unlikely(_slab_check_list_index(slab_index))) 239 | return true; 240 | 241 | slab_list = &slab->list[slab_index]; 242 | if (!slab_list->nfree) { 243 | ZUS_WARN_ON(!a_list_empty(&slab_list->head)); 244 | return true; 245 | } 246 | return false; 247 | } 248 | 249 | static void *_slab_alloc(struct zus_slab *slab, size_t size) 250 | { 251 | int slab_index = _slab_list_index(size); 252 | struct zus_slab_elem *se; 253 | struct pa_page *page; 254 | 255 | if (_slab_list_empty(slab, size)) { 256 | int err; 257 | 258 | err = _slab_increase(slab, slab_index); 259 | if (unlikely(err)) { 260 | DBG("failed to increase slab => %d\n", err); 261 | return NULL; 262 | } 263 | } 264 | se = _slab_alloc_elem(slab, slab_index); 265 | if (unlikely(!se)) 266 | return NULL; 267 | 268 | page = pa_virt_to_page(slab->sbi, se); 269 | _page_slab_uc_inc(page); 270 | 271 | ZUS_WARN_ON(pa_page_count(page) != 1); 272 | return se; 273 | } 274 | 275 | /* ~~~~~ SLAB free ~~~~~ */ 276 | 277 | static void _slab_free_elem(struct zus_slab_list *slab_list, 278 | struct zus_slab_elem *se) 279 | { 280 | a_list_add_tail(&se->list, &slab_list->head); 281 | ++slab_list->nfree; 282 | --slab_list->nused; 283 | } 284 | 285 | static void __slab_free(struct zus_slab *slab, int slab_index, 286 | struct pa_page *page, void *addr) 287 | { 288 | struct zus_slab_list *slab_list = &slab->list[slab_index]; 289 | int last; 290 | 291 | _slab_free_elem(slab_list, addr); 292 | 293 | if (_page_slab_uc_dec(page)) 294 | return; 295 | 296 | if (slab_list->nfree < slab_list->nfree_want) 297 | return; 298 | 299 | _slab_page_fini(slab, page, slab_index); 300 | last = pa_put_page(page); 301 | ZUS_WARN_ON(!last); 302 | } 303 | 304 | static int _slab_free(struct zus_slab *slab, void *addr) 305 | { 306 | struct pa_page *page; 307 | int slab_index; 308 | int err = 0; 309 | 310 | _slab_lock(slab); 311 | 312 | page = pa_virt_to_page(slab->sbi, addr); 313 | slab_index = _page_slab_index(page); 314 | 315 | if (unlikely(_slab_check_list_index(slab_index))) { 316 | err = -EINVAL; 317 | goto out; 318 | } 319 | 320 | __slab_free(slab, slab_index, page, addr); 321 | 322 | out: 323 | _slab_unlock(slab); 324 | return err; 325 | } 326 | 327 | 328 | /* ~~~~~ SLAB fini ~~~~~ */ 329 | 330 | static void _slab_fini(struct zus_slab *slab) 331 | { 332 | int slab_index; 333 | 334 | for (slab_index = 0; slab_index < ZUS_SLAB_LISTS; ++slab_index) { 335 | int last; 336 | struct zus_slab_elem *se; 337 | struct pa_page *page; 338 | struct zus_slab_list *slab_list = &slab->list[slab_index]; 339 | 340 | while (!a_list_empty(&slab_list->head)) { 341 | se = a_list_first_entry(&slab_list->head, 342 | struct zus_slab_elem, list); 343 | page = pa_virt_to_page(slab->sbi, se); 344 | if (ZUS_WARN_ON(page->sinfo.slab_uc)) { 345 | ERROR("Slab-Leak! uc=%d\n", page->sinfo.slab_uc); 346 | break; 347 | } 348 | 349 | _slab_page_fini(slab, page, slab_index); 350 | last = pa_put_page(page); 351 | ZUS_WARN_ON(!last); 352 | } 353 | slab->list[slab_index].nused = 0; 354 | slab->list[slab_index].nfree_want = 0; 355 | } 356 | 357 | slab->cpu = 0; 358 | slab->sbi = NULL; 359 | } 360 | 361 | /* ~~~~~ global volatile-memory SLAB allocator ~~~~~ */ 362 | 363 | struct zus_global_slab_allocator { 364 | struct zus_sb_info sbi; 365 | int nslabs; 366 | struct zus_slab slab[1]; /* at least one CPU */ 367 | }; 368 | 369 | static struct zus_global_slab_allocator *g_gsa = NULL; 370 | 371 | 372 | /* TODO: move to pa? */ 373 | static bool __pa_addr_inrange(struct zus_sb_info *sbi, void *addr) 374 | { 375 | struct pa *pa = &sbi->pa[POOL_NUM]; 376 | 377 | return ((pa->data.ptr <= addr) && 378 | (addr < (pa->data.ptr + pa->size * PAGE_SIZE))); 379 | } 380 | 381 | static int _zus_gsa_cpu_of(void *ptr) 382 | { 383 | long addr = (long)ptr; 384 | const struct pa_page *page; 385 | 386 | if (unlikely(!addr)) 387 | return -1; 388 | 389 | if (unlikely(addr & ((1 << ZUS_MIN_SLAB_SHIFT) - 1))) 390 | return -1; 391 | 392 | if (!__pa_addr_inrange(&g_gsa->sbi, ptr)) 393 | return -1; 394 | 395 | page = pa_virt_to_page(&g_gsa->sbi, ptr); 396 | return _page_slab_cpu(page); 397 | } 398 | 399 | static struct zus_slab *_zus_gsa_sslab_at(int cpu, int index) 400 | { 401 | return &g_gsa->slab[(cpu + index) % g_gsa->nslabs]; 402 | } 403 | 404 | static struct zus_slab *_slab_of_cpu(int cpu) 405 | { 406 | if (unlikely((cpu < 0) || (g_gsa->nslabs <= cpu))) 407 | return NULL; 408 | 409 | return &g_gsa->slab[cpu]; 410 | } 411 | 412 | static void *_zus_gsa_malloc(size_t size) 413 | { 414 | int i, cpu = zus_current_cpu_silent(); 415 | struct zus_slab *slab; 416 | void *ptr; 417 | 418 | slab = _slab_of_cpu(cpu); 419 | if (unlikely(_slab_iscold(slab, size))) 420 | goto out; 421 | 422 | for (i = 0; i < g_gsa->nslabs; ++i) { 423 | if (_slab_trylock(slab)) { 424 | if (!_slab_list_empty(slab, size)) { 425 | ptr = _slab_alloc(slab, size); 426 | 427 | _slab_unlock(slab); 428 | 429 | return ptr; 430 | } 431 | _slab_unlock(slab); 432 | } 433 | slab = _zus_gsa_sslab_at(cpu, i + 1); 434 | } 435 | out: 436 | _slab_lock(slab); 437 | 438 | ptr = _slab_alloc(slab, size); 439 | 440 | _slab_unlock(slab); 441 | 442 | return ptr; 443 | } 444 | 445 | static void _zus_gsa_free(void *ptr) 446 | { 447 | int cpu = _zus_gsa_cpu_of(ptr); 448 | 449 | if (ZUS_WARN_ON(cpu < 0)) 450 | return; 451 | 452 | _slab_free(_slab_of_cpu(cpu), ptr); 453 | } 454 | 455 | static size_t __elem_size(void *addr) 456 | { 457 | struct zus_slab *slab = &g_gsa->slab[0]; 458 | struct pa_page *page = pa_virt_to_page(slab->sbi, addr); 459 | int slab_index = _page_slab_index(page); 460 | 461 | if (unlikely(_slab_check_list_index(slab_index))) 462 | return -EINVAL; 463 | 464 | return 1 << (slab_index + ZUS_MIN_SLAB_SHIFT); 465 | } 466 | 467 | /* ~~~~~ malloc/free wrappers ~~~~~ */ 468 | 469 | void *zus_malloc(size_t size) 470 | { 471 | void *ptr; 472 | 473 | if (unlikely(!g_gsa)) 474 | return NULL; 475 | 476 | if (unlikely(!size)) 477 | return NULL; 478 | 479 | if (!_slab_size_valid(size)) 480 | return malloc(size); 481 | 482 | ptr = _zus_gsa_malloc(size); 483 | if (unlikely(!ptr)) { 484 | /* TODO(sagi): remove this code once we move to 128MB chunks */ 485 | int err; 486 | 487 | /* exhausted slab: fallback to malloc + force cache-line align */ 488 | err = posix_memalign(&ptr, CACHELINE_SIZE, size); 489 | if (unlikely(err)) 490 | return NULL; 491 | } 492 | 493 | return ptr; 494 | } 495 | 496 | void zus_free(void *ptr) 497 | { 498 | if (unlikely(!g_gsa)) 499 | return; 500 | 501 | if (unlikely(!ptr)) 502 | return; 503 | 504 | if (_zus_gsa_cpu_of(ptr) < 0) { 505 | free(ptr); 506 | return; 507 | } 508 | _zus_gsa_free(ptr); 509 | } 510 | 511 | void *zus_calloc(size_t nmemb, size_t elemsz) 512 | { 513 | size_t size = nmemb * elemsz; 514 | void *ptr; 515 | 516 | if (unlikely(!g_gsa)) 517 | return NULL; 518 | 519 | if (!_slab_size_valid(size)) 520 | return calloc(nmemb, elemsz); 521 | 522 | ptr = zus_malloc(size); 523 | if (unlikely(!ptr)) 524 | return NULL; 525 | 526 | memset(ptr, 0, size); 527 | 528 | return ptr; 529 | } 530 | 531 | void *zus_realloc(void *ptr, size_t size) 532 | { 533 | void *newptr; 534 | 535 | if (unlikely(!g_gsa)) 536 | return NULL; 537 | 538 | if (unlikely(!ptr)) 539 | return zus_malloc(size); 540 | 541 | if (unlikely(!size)) { 542 | zus_free(ptr); 543 | return NULL; 544 | } 545 | 546 | if (_zus_gsa_cpu_of(ptr) < 0) { 547 | if (!_slab_size_valid(size)) 548 | return realloc(ptr, size); 549 | } else { 550 | if (size <= __elem_size(ptr)) 551 | return ptr; 552 | } 553 | 554 | newptr = zus_malloc(size); 555 | if (unlikely(!newptr)) 556 | return NULL; 557 | 558 | memcpy(newptr, ptr, __elem_size(ptr)); 559 | zus_free(ptr); 560 | 561 | return newptr; 562 | } 563 | 564 | struct pa_page *zus_alloc_page(int mask) 565 | { 566 | void *ptr = zus_malloc(PAGE_SIZE); 567 | 568 | if (unlikely(!ptr)) 569 | return NULL; 570 | if (mask & ZUS_ZERO) 571 | memset(ptr, 0, PAGE_SIZE); 572 | return zus_virt_to_page(ptr); 573 | } 574 | 575 | void zus_free_page(struct pa_page *page) 576 | { 577 | int cpu = _page_slab_cpu(page); 578 | struct zus_slab *slab; 579 | 580 | if (ZUS_WARN_ON(cpu < 0)) 581 | return; 582 | 583 | slab = _slab_of_cpu(cpu); 584 | _slab_free(slab, pa_page_address(slab->sbi, page)); 585 | } 586 | 587 | void *zus_page_address(struct pa_page *page) 588 | { 589 | return pa_page_address(&g_gsa->sbi, page); 590 | } 591 | 592 | void *zus_virt_to_page(void *addr) 593 | { 594 | return pa_virt_to_page(&g_gsa->sbi, addr); 595 | } 596 | 597 | struct zus_sb_info *zus_global_sbi(void) 598 | { 599 | return likely(g_gsa) ? &g_gsa->sbi : NULL; 600 | } 601 | 602 | /* ~~~~~ init global-allocator ~~~~~ */ 603 | 604 | int zus_slab_init(void) 605 | { 606 | struct zus_global_slab_allocator *gsa; 607 | size_t size; 608 | int err, cpu, nprocs, pshared = PTHREAD_PROCESS_SHARED; 609 | 610 | if (unlikely(g_gsa)) 611 | return -EINVAL; 612 | 613 | nprocs = get_nprocs_conf(); 614 | size = sizeof(*gsa) + (nprocs - 1) * sizeof(gsa->slab[0]); 615 | err = posix_memalign((void *)&gsa, 64, size); 616 | if (unlikely(err)) { 617 | ERROR("posix_memalign failed: nprocs=%d size=0x%lx => %d\n", 618 | nprocs, size, -errno); 619 | return err; 620 | } 621 | memset(gsa, 0, size); 622 | 623 | gsa->nslabs = nprocs; 624 | for (cpu = 0; cpu < nprocs; ++cpu) { 625 | err = pthread_spin_init(&gsa->slab[cpu].lock, pshared); 626 | if (unlikely(err)) { 627 | ERROR("pthread_spin_init => %d\n", err); 628 | goto fail; 629 | } 630 | _slab_init(&gsa->slab[cpu], &gsa->sbi, cpu); 631 | } 632 | err = pa_init(&gsa->sbi); 633 | if (unlikely(err)) { 634 | ERROR("pa_init => %d\n", err); 635 | goto fail; 636 | } 637 | g_gsa = gsa; 638 | return 0; 639 | 640 | fail: 641 | free(gsa); 642 | return err; 643 | } 644 | 645 | void zus_slab_fini(void) 646 | { 647 | int err, cpu; 648 | struct zus_global_slab_allocator *gsa = g_gsa; 649 | 650 | if (unlikely(!gsa)) 651 | return; 652 | 653 | g_gsa = NULL; 654 | for (cpu = 0; cpu < gsa->nslabs; ++cpu) { 655 | _slab_fini(&gsa->slab[cpu]); 656 | err = pthread_spin_destroy(&gsa->slab[cpu].lock); 657 | if (unlikely(err)) 658 | ERROR("pthread_spin_destroy => %d\n", err); 659 | } 660 | pa_fini(&gsa->sbi); 661 | free(gsa); 662 | } 663 | -------------------------------------------------------------------------------- /utils.c: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: BSD-3-Clause */ 2 | /* 3 | * utils.c - Common utilities provided to fs via libzus 4 | * 5 | * Copyright (c) 2018 NetApp, Inc. All rights reserved. 6 | * 7 | * See module.c for LICENSE details. 8 | * 9 | * Authors: 10 | * Shachar Sharon 11 | */ 12 | 13 | #define _GNU_SOURCE 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #define UNW_LOCAL_ONLY 21 | #include 22 | #include 23 | 24 | #define BACKTRACE_MAX 128 25 | 26 | static int _dump_backtrace(FILE *fp, bool warn) 27 | { 28 | int err, lim = BACKTRACE_MAX; 29 | char sym[256] = ""; 30 | unw_word_t ip, sp, off; 31 | unw_context_t context; 32 | unw_cursor_t cursor; 33 | const char *prefix = warn ? LOG_STR(LOG_WARNING) "zus_warn: " : 34 | LOG_STR(LOG_NOTICE) " "; 35 | 36 | err = unw_getcontext(&context); 37 | if (err != UNW_ESUCCESS) 38 | return err; 39 | 40 | err = unw_init_local(&cursor, &context); 41 | if (err != UNW_ESUCCESS) 42 | return err; 43 | 44 | while (lim-- > 0) { 45 | ip = sp = off = 0; 46 | err = unw_step(&cursor); 47 | if (err <= 0) 48 | return err; 49 | 50 | err = unw_get_reg(&cursor, UNW_REG_IP, &ip); 51 | if (err) 52 | return err; 53 | 54 | err = unw_get_reg(&cursor, UNW_REG_SP, &sp); 55 | if (err) 56 | return err; 57 | 58 | err = unw_get_proc_name(&cursor, sym, sizeof(sym) - 1, &off); 59 | if (err) 60 | return err; 61 | 62 | fprintf(fp, "%s[<%p>] 0x%lx %s+0x%lx\n", 63 | prefix, (void *)ip, (long)sp, sym, (long)off); 64 | } 65 | return 0; 66 | } 67 | 68 | static void _dump_addr2line(FILE *fp) 69 | { 70 | int i, len; 71 | void *arr[BACKTRACE_MAX]; 72 | char ptrS[2048]; 73 | char *m = ptrS; 74 | int s = sizeof(ptrS); 75 | 76 | len = unw_backtrace(arr, BACKTRACE_MAX); 77 | 78 | for (i = 0; i < len - 3; ++i) { 79 | int l; 80 | 81 | if (!(i % 5)) { 82 | l = snprintf(m, s, 83 | "\\\n "); 84 | s -= l; m += l; 85 | } 86 | l = snprintf(m, s, "%p ", arr[i + 1]); 87 | s -= l; m += l; 88 | } 89 | 90 | fprintf(fp, LOG_STR(LOG_WARNING) 91 | "zus_warn: addr2line -a -C -e %s -f -p -s %s\n", 92 | program_invocation_name, ptrS); 93 | } 94 | 95 | void zus_dump_stack(FILE *fp, bool warn, const char *fmt, ...) 96 | { 97 | va_list args; 98 | 99 | flockfile(fp); 100 | 101 | va_start(args, fmt); 102 | vfprintf(fp, fmt, args); 103 | va_end(args); 104 | 105 | _dump_backtrace(fp, warn); 106 | if (warn) 107 | _dump_addr2line(fp); 108 | funlockfile(fp); 109 | } 110 | 111 | void zus_warn(const char *cond, const char *file, int line) 112 | { 113 | zus_dump_stack(stderr, true, LOG_STR(LOG_WARNING) "%s: %s (%s:%d)\n", 114 | __func__, cond, file, line); 115 | } 116 | 117 | void zus_bug(const char *cond, const char *file, int line) 118 | { 119 | zus_dump_stack(stderr, true, LOG_STR(LOG_ERR) "%s: %s (%s:%d)\n", 120 | __func__, cond, file, line); 121 | abort(); 122 | } 123 | 124 | #define ZUS_MAX_FILES 65536 125 | 126 | int zus_increase_max_files(void) 127 | { 128 | struct rlimit rl; 129 | int err; 130 | 131 | err = getrlimit(RLIMIT_NOFILE, &rl); 132 | if (err) { 133 | ERROR("getrlimit failed => %d", -errno); 134 | return -errno; 135 | } 136 | 137 | if (rl.rlim_cur < ZUS_MAX_FILES) 138 | rl.rlim_cur = ZUS_MAX_FILES; 139 | if (rl.rlim_max < ZUS_MAX_FILES) 140 | rl.rlim_max = ZUS_MAX_FILES; 141 | 142 | err = setrlimit(RLIMIT_NOFILE, &rl); 143 | if (err) { 144 | ERROR("setrlimit %lu/%lu failed => %d\n", rl.rlim_cur, 145 | rl.rlim_max, -errno); 146 | return -errno; 147 | } 148 | 149 | return 0; 150 | } 151 | -------------------------------------------------------------------------------- /wtz.h: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: BSD-3-Clause */ 2 | /* 3 | * wtz.h - Wait Til Zero wait object 4 | * 5 | * This is opposite of a semaphore. It arms the object with a count 6 | * and only the last arrival releases the waiter. Usually used 7 | * for a barrier, where main thread needs to wait for all workers 8 | * to finish a stage. 9 | * 10 | * TODO: where is the MPI true barrier where also all the workers 11 | * then wait for the barrier to finish? Probably we can do that with 12 | * the semaphore or another one, I need to calculate that 13 | * 14 | * Copyright (c) 2018 NetApp, Inc. All rights reserved. 15 | * 16 | * See module.c for LICENSE details. 17 | * 18 | * Authors: 19 | * Boaz Harrosh 20 | */ 21 | #ifndef __WTZ_H__ 22 | #define __WTZ_H__ 23 | 24 | #include 25 | 26 | struct wait_til_zero { 27 | int acnt; 28 | sem_t sem; 29 | }; 30 | 31 | static void wtz_init(struct wait_til_zero *wtz) 32 | { 33 | __atomic_store_n(&wtz->acnt, 0, __ATOMIC_RELAXED); 34 | sem_init(&wtz->sem, 0, 0); 35 | } 36 | 37 | static int wtz_arm(struct wait_til_zero *wtz, int c) 38 | { 39 | int prev = __atomic_fetch_add(&wtz->acnt, c, 40 | __ATOMIC_RELAXED); 41 | 42 | return prev; 43 | } 44 | 45 | /* Release one at a time sorry ;-) */ 46 | static int wtz_release(struct wait_til_zero *wtz) 47 | { 48 | int prev = __atomic_fetch_sub(&wtz->acnt, 1, 49 | __ATOMIC_RELAXED); 50 | if (prev == 1) 51 | sem_post(&wtz->sem); 52 | 53 | return prev - 1; 54 | } 55 | 56 | /* Wait all arms are released */ 57 | static void wtz_wait(struct wait_til_zero *wtz) 58 | { 59 | sem_wait(&wtz->sem); 60 | } 61 | 62 | #endif /* define __WTZ_H__ */ 63 | -------------------------------------------------------------------------------- /zuf_call.h: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: BSD-3-Clause */ 2 | /* 3 | * zuf_call.h - C Wrappers over the ZUFS_IOCTL Api 4 | * 5 | * Copyright (c) 2018 NetApp, Inc. All rights reserved. 6 | * 7 | * See module.c for LICENSE details. 8 | * 9 | * Authors: 10 | * Boaz Harrosh 11 | */ 12 | 13 | #ifndef _GNU_SOURCE 14 | #define _GNU_SOURCE 15 | #endif 16 | 17 | #include 18 | #include 19 | 20 | #include "zus.h" 21 | 22 | /* Just a wraper for the commom unexpected print */ 23 | static inline 24 | int __ioctl(int fd, ulong zu_vect, struct zufs_ioc_hdr *hdr, const char *msg) 25 | { 26 | int ret; 27 | 28 | ret = ioctl(fd, zu_vect, hdr); 29 | if (ret) { 30 | ERROR("Unexpected ioctl => %d errno=%d zu_n=%lx zu_s=%s hdr=%d\n", 31 | ret, errno, zu_vect, msg, hdr->err); 32 | return -errno; 33 | } 34 | 35 | return hdr->err; 36 | } 37 | 38 | #define _ioctl(fd, zu_vect, hdr) __ioctl(fd, zu_vect, hdr, #zu_vect) 39 | 40 | static inline 41 | int zuf_register_fs(int fd, struct zus_fs_info *zfi) 42 | { 43 | struct zufs_ioc_register_fs zirf = { 44 | .zus_zfi = zfi, 45 | .rfi = zfi->rfi, 46 | }; 47 | 48 | return _ioctl(fd, ZU_IOC_REGISTER_FS, &zirf.hdr); 49 | } 50 | 51 | static inline 52 | int zuf_recieve_mount(int fd, struct zufs_ioc_mount *zim) 53 | { 54 | return _ioctl(fd, ZU_IOC_MOUNT, &zim->hdr); 55 | } 56 | 57 | static inline 58 | int zuf_numa_map(int fd, struct zufs_ioc_numa_map *zinm) 59 | { 60 | return _ioctl(fd, ZU_IOC_NUMA_MAP, &zinm->hdr); 61 | } 62 | 63 | static inline 64 | int zuf_grab_pmem(int fd, __u64 sb_id, struct zufs_ioc_pmem *zip) 65 | { 66 | zip->sb_id = sb_id; 67 | return _ioctl(fd, ZU_IOC_GRAB_PMEM, &zip->hdr); 68 | } 69 | 70 | static inline 71 | int zuf_zt_init(int fd, int cpu_num, uint chan, uint max_command) 72 | { 73 | struct zufs_ioc_init zii = { 74 | .channel_no = chan, 75 | .max_command = max_command, 76 | }; 77 | 78 | return _ioctl(fd, ZU_IOC_INIT_THREAD, &zii.hdr); 79 | } 80 | 81 | static inline 82 | int zuf_wait_opt(int fd, struct zufs_ioc_wait_operation *opt /*OUT*/) 83 | { 84 | return _ioctl(fd, ZU_IOC_WAIT_OPT, &opt->hdr); 85 | } 86 | 87 | static inline 88 | int zuf_break_all(int fd) 89 | { 90 | struct zufs_ioc_break_all zba = {}; 91 | 92 | return _ioctl(fd, ZU_IOC_BREAK_ALL, &zba.hdr); 93 | } 94 | 95 | static inline 96 | int zuf_iomap_exec(int fd, struct zufs_ioc_iomap_exec *ziome) 97 | { 98 | return _ioctl(fd, ZU_IOC_IOMAP_EXEC, &ziome->hdr); 99 | } 100 | 101 | static inline 102 | int zuf_private_mount(int fd, struct zufs_ioc_mount_private *zip) 103 | { 104 | zip->is_umount = false; 105 | return _ioctl(fd, ZU_IOC_PRIVATE_MOUNT, &zip->hdr); 106 | } 107 | 108 | static inline 109 | int zuf_private_umount(int fd, struct zufs_ioc_mount_private *zip) 110 | { 111 | zip->is_umount = true; 112 | return _ioctl(fd, ZU_IOC_PRIVATE_MOUNT, &zip->hdr); 113 | } 114 | -------------------------------------------------------------------------------- /zus-vfs.c: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: BSD-3-Clause */ 2 | /* 3 | * zus-vfs.c - Abstract FS interface that calls into the um-FS 4 | * 5 | * Copyright (c) 2018 NetApp, Inc. All rights reserved. 6 | * 7 | * See module.c for LICENSE details. 8 | * 9 | * Authors: 10 | * Boaz Harrosh 11 | */ 12 | 13 | #define _GNU_SOURCE 14 | 15 | #include 16 | #include 17 | #include 18 | 19 | #include "zus.h" 20 | #include "zuf_call.h" 21 | #include "iom_enc.h" 22 | 23 | /* ~~~ mount stuff ~~~ */ 24 | 25 | /* TODO: Perhaps an md_shadow_size(md) */ 26 | static ulong _pmem_map_size(struct multi_devices *md) 27 | { 28 | size_t size = md_p2o(md_t1_blocks(md));\ 29 | 30 | if (unlikely(md->pmem_info.mdt.s_flags & MDT_F_SHADOW)) 31 | size += size; 32 | return size; 33 | } 34 | 35 | static int _pmem_mmap(struct multi_devices *md) 36 | { 37 | size_t size = _pmem_map_size(md); 38 | int prot = PROT_WRITE | PROT_READ; 39 | int flags = MAP_SHARED; 40 | int err; 41 | 42 | md->p_pmem_addr = mmap(NULL, size, prot, flags, md->fd, 0); 43 | if (md->p_pmem_addr == MAP_FAILED) { 44 | ERROR("mmap failed=> %d: %s\n", errno, strerror(errno)); 45 | return -(errno ?: ENOMEM); 46 | } 47 | 48 | err = madvise(md->p_pmem_addr, size, MADV_DONTDUMP); 49 | if (err == -1) 50 | ERROR("pmem madvise(DONTDUMP) failed=> %d: %s\n", errno, 51 | strerror(errno)); 52 | 53 | return 0; 54 | } 55 | 56 | static int _pmem_unmap(struct multi_devices *md) 57 | { 58 | size_t size = _pmem_map_size(md); 59 | int err; 60 | 61 | err = munmap(md->p_pmem_addr, size); 62 | if (err == -1) { 63 | ERROR("munmap failed=> %d: %s\n", errno, strerror(errno)); 64 | return -errno; 65 | } 66 | 67 | return 0; 68 | } 69 | 70 | static inline size_t _zpages_mmap_size(struct multi_devices *md) 71 | { 72 | /* We must map 2M aligned size so zuf will give us a 2M aligned 73 | * vm pointer. Some FSs bug if this is not so. 74 | */ 75 | return ALIGN(md_t1_blocks(md) * sizeof(struct zus_page), ZUFS_2M_SIZE); 76 | } 77 | 78 | static int _zpages_mmap(struct multi_devices *md) 79 | { 80 | size_t size = _zpages_mmap_size(md); 81 | size_t pmem_size = _pmem_map_size(md); 82 | int prot = PROT_WRITE | PROT_READ; 83 | int flags = MAP_SHARED; 84 | 85 | md->pages = mmap(NULL, size, prot, flags, md->fd, 86 | pmem_size + ZUFS_2M_SIZE); 87 | if (md->pages == MAP_FAILED) { 88 | ERROR("mmap failed=> %d: %s\n", errno, strerror(errno)); 89 | md->pages = NULL; 90 | return -(errno ?: ENOMEM); 91 | } 92 | 93 | /* NOTE: It is nice to have the pages state in the mem-dump */ 94 | 95 | return 0; 96 | } 97 | 98 | static int _zpages_unmap(struct multi_devices *md) 99 | { 100 | size_t size = _zpages_mmap_size(md); 101 | int err; 102 | 103 | if (unlikely(!md->pages)) 104 | return 0; 105 | 106 | err = munmap(md->pages, size); 107 | if (err == -1) { 108 | ERROR("munmap failed=> %d: %s\n", errno, strerror(errno)); 109 | return -errno; 110 | } 111 | 112 | return 0; 113 | } 114 | 115 | 116 | static int _pmem_grab(struct zus_sb_info *sbi, __u64 sb_id) 117 | { 118 | struct multi_devices *md = &sbi->md; 119 | int err; 120 | 121 | md->sbi = sbi; 122 | err = zuf_root_open_tmp(&md->fd); 123 | if (unlikely(err)) 124 | return err; 125 | 126 | err = zuf_grab_pmem(md->fd, sb_id, &md->pmem_info); 127 | if (unlikely(err)) 128 | return err; 129 | 130 | err = _pmem_mmap(md); 131 | if (unlikely(err)) 132 | return err; 133 | 134 | err = md_init_from_pmem_info(md); 135 | if (unlikely(err)) { 136 | ERROR("md_init_from_pmem_info sb_id=%llu => %d\n", sb_id, err); 137 | return err; 138 | } 139 | md->dev_index = md->pmem_info.dev_index; 140 | 141 | md->user_page_size = sbi->zfi->user_page_size; 142 | if (!md->user_page_size) 143 | return 0; /* User does not want pages */ 144 | if (ZUS_WARN_ON(md->user_page_size != sizeof(struct zus_page))) 145 | return EINVAL; 146 | 147 | err = _zpages_mmap(md); 148 | return err; 149 | } 150 | 151 | static void _pmem_ungrab(struct zus_sb_info *sbi) 152 | { 153 | /* Kernel makes free easy (close couple files) */ 154 | _zpages_unmap(&sbi->md); 155 | 156 | md_fini(&sbi->md, true); 157 | 158 | _pmem_unmap(&sbi->md); 159 | zuf_root_close(&sbi->md.fd); 160 | sbi->md.p_pmem_addr = NULL; 161 | } 162 | 163 | static void _zus_sbi_fini(struct zus_sb_info *sbi) 164 | { 165 | // zus_iput(sbi->z_root); was this done already 166 | if (sbi->zfi->op->sbi_fini) 167 | sbi->zfi->op->sbi_fini(sbi); 168 | _pmem_ungrab(sbi); 169 | sbi->zfi->op->sbi_free(sbi); 170 | } 171 | 172 | int zus_private_mount(struct zus_fs_info *zfi, const char *options, ulong flags, 173 | struct zufs_ioc_mount_private **zip_out) 174 | { 175 | struct zufs_ioc_mount_private *zip; 176 | struct zus_sb_info *sbi = NULL; 177 | int zip_len; 178 | int err; 179 | 180 | zip_len = sizeof(*zip) + strlen(options) + 1; 181 | 182 | zip = calloc(1, zip_len); 183 | if (!zip) { 184 | ERROR("failed to allocate memory\n"); 185 | return -ENOMEM; 186 | } 187 | 188 | zip->zmi.zus_zfi = zfi; 189 | 190 | err = zuf_root_open_tmp((int *)&zip->mount_fd); 191 | if (unlikely(err)) 192 | goto fail; 193 | 194 | err = zus_numa_map_init(zip->mount_fd); 195 | if (unlikely(err)) 196 | goto fail_fd; 197 | 198 | err = zus_thread_current_init(); 199 | if (unlikely(err)) 200 | goto fail_fd; 201 | 202 | sbi = zfi->op->sbi_alloc(zfi); 203 | if (unlikely(!sbi)) { 204 | err = -ENOMEM; 205 | goto fail_alloc; 206 | } 207 | 208 | zip->hdr.in_len = zip_len; 209 | zip->zmi.po.mount_flags = flags; 210 | zip->zmi.po.mount_options_len = strlen(options); 211 | memcpy(&zip->zmi.po.mount_options, options, 212 | zip->zmi.po.mount_options_len); 213 | memcpy(&zip->rfi, &zfi->rfi, sizeof(struct register_fs_info)); 214 | 215 | err = zuf_private_mount(zip->mount_fd, zip); 216 | if (unlikely(err)) 217 | goto fail_mount; 218 | 219 | sbi->zfi = zip->zmi.zus_zfi; 220 | sbi->kern_sb_id = zip->zmi.sb_id; 221 | err = _pmem_grab(sbi, zip->zmi.sb_id); 222 | if (unlikely(err)) 223 | goto fail_grab; 224 | 225 | err = sbi->zfi->op->sbi_init(sbi, &zip->zmi); 226 | if (unlikely(err)) { 227 | zus_sbi_set_flag(sbi, ZUS_SBIF_ERROR); 228 | goto fail_sbi_init; 229 | } 230 | 231 | zip->zmi.zus_sbi = sbi; 232 | zip->zmi._zi = pmem_dpp_t(md_addr_to_offset(&sbi->md, sbi->z_root->zi)); 233 | zip->zmi.zus_ii = sbi->z_root; 234 | 235 | DBG("[%lld] _zi 0x%lx zus_ii=%p\n", 236 | sbi->z_root->zi->i_ino, (ulong)zip->zmi._zi, zip->zmi.zus_ii); 237 | 238 | *zip_out = zip; 239 | 240 | return 0; 241 | 242 | fail_sbi_init: 243 | if (sbi->z_root) 244 | sbi->z_root->op->evict(sbi->z_root); 245 | if (sbi->zfi->op->sbi_fini) 246 | sbi->zfi->op->sbi_fini(sbi); 247 | _pmem_ungrab(sbi); 248 | fail_grab: 249 | zuf_private_umount(zip->mount_fd, zip); 250 | fail_mount: 251 | zfi->op->sbi_free(sbi); 252 | fail_alloc: 253 | zus_thread_current_fini(); 254 | fail_fd: 255 | close(zip->mount_fd); 256 | fail: 257 | free(zip); 258 | 259 | return err; 260 | } 261 | 262 | int zus_private_umount(struct zufs_ioc_mount_private *zip) 263 | { 264 | struct zus_sb_info *sbi = zip->zmi.zus_sbi; 265 | int err = 0; 266 | 267 | /* evict root inode (done by VFS on regular mount) */ 268 | if (sbi->z_root) 269 | sbi->z_root->op->evict(sbi->z_root); 270 | 271 | if (sbi->zfi->op->sbi_fini) 272 | err = sbi->zfi->op->sbi_fini(sbi); 273 | _pmem_ungrab(sbi); 274 | zuf_private_umount(zip->mount_fd, zip); 275 | sbi->zfi->op->sbi_free(sbi); 276 | zus_thread_current_fini(); 277 | close(zip->mount_fd); 278 | free(zip); 279 | return err; 280 | } 281 | 282 | int zus_mount(int fd, struct zufs_ioc_mount *zim) 283 | { 284 | struct zus_fs_info *zfi = zim->zmi.zus_zfi; 285 | struct zus_sb_info *sbi; 286 | int err; 287 | 288 | sbi = zfi->op->sbi_alloc(zfi); 289 | if (unlikely(!sbi)) { 290 | zim->hdr.err = -ENOMEM; 291 | return zim->hdr.err; 292 | } 293 | sbi->zfi = zim->zmi.zus_zfi; 294 | sbi->kern_sb_id = zim->zmi.sb_id; 295 | 296 | err = _pmem_grab(sbi, zim->zmi.sb_id); 297 | if (unlikely(err)) 298 | goto err; 299 | 300 | err = sbi->zfi->op->sbi_init(sbi, &zim->zmi); 301 | if (unlikely(err)) 302 | goto err; 303 | 304 | zim->zmi.zus_sbi = sbi; 305 | zim->zmi._zi = pmem_dpp_t(md_addr_to_offset(&sbi->md, sbi->z_root->zi)); 306 | zim->zmi.zus_ii = sbi->z_root; 307 | 308 | DBG("[%lld] _zi 0x%lx zus_ii=%p\n", 309 | sbi->z_root->zi->i_ino, (ulong)zim->zmi._zi, zim->zmi.zus_ii); 310 | 311 | return 0; 312 | err: 313 | zus_sbi_set_flag(sbi, ZUS_SBIF_ERROR); 314 | _zus_sbi_fini(sbi); 315 | zim->hdr.err = err; 316 | return err; 317 | } 318 | 319 | int zus_umount(int fd, struct zufs_ioc_mount *zim) 320 | { 321 | _zus_sbi_fini(zim->zmi.zus_sbi); 322 | return 0; 323 | } 324 | 325 | int zus_remount(int fd, struct zufs_ioc_mount *zim) 326 | { 327 | struct zus_sb_info *sbi = zim->zmi.zus_sbi; 328 | 329 | if (sbi->zfi->op->sbi_remount) 330 | return sbi->zfi->op->sbi_remount(sbi, &zim->zmi); 331 | return 0; 332 | } 333 | 334 | /* ~~~ FS operations ~~~~ */ 335 | 336 | struct zus_inode_info *zus_iget(struct zus_sb_info *sbi, ulong ino) 337 | { 338 | struct zus_inode_info *zii; 339 | int err; 340 | 341 | err = sbi->op->iget(sbi, ino, &zii); 342 | if (err) 343 | return NULL; 344 | 345 | zii->sbi = sbi; 346 | return zii; 347 | } 348 | 349 | static int _new_inode(void *app_ptr, struct zufs_ioc_hdr *hdr) 350 | { 351 | struct zufs_ioc_new_inode *ioc_new = (void *)hdr; 352 | struct zus_sb_info *sbi = ioc_new->dir_ii->sbi; 353 | struct zus_inode_info *zii; 354 | int err; 355 | 356 | /* In ZUS protocol we start zero ref, add_dentry increments the refs 357 | * (Kernel gave us a 1 here expect for O_TMPFILE) 358 | */ 359 | ioc_new->zi.i_nlink = 0; 360 | 361 | err = sbi->op->new_inode(sbi, app_ptr, ioc_new); 362 | if (unlikely(err)) 363 | return err; 364 | 365 | zii = ioc_new->zus_ii; 366 | ioc_new->_zi = md_addr_to_offset(&sbi->md, zii->zi); 367 | 368 | if (ioc_new->flags & ZI_TMPFILE) 369 | return 0; 370 | 371 | err = ioc_new->dir_ii->sbi->op->add_dentry(ioc_new->dir_ii, zii, 372 | &ioc_new->str); 373 | if (unlikely(err)) 374 | goto _err_free_inode; 375 | 376 | return 0; 377 | 378 | _err_free_inode: 379 | zii->sbi->op->free_inode(zii); 380 | return err; 381 | } 382 | 383 | static int _evict(struct zufs_ioc_hdr *hdr) 384 | { 385 | struct zufs_ioc_evict_inode *ziei = (void *)hdr; 386 | struct zus_inode_info *zii = ziei->zus_ii; 387 | 388 | if (unlikely(!zii)) { 389 | ERROR("!ziei->zus_ii\n"); 390 | return 0; 391 | } 392 | 393 | if (hdr->operation == ZUFS_OP_FREE_INODE) { 394 | if (likely(zii->sbi->op->free_inode)) 395 | zii->sbi->op->free_inode(zii); 396 | } else { /* ZUFS_OP_EVICT_INODE */ 397 | /* NOTE: On lookup Kernel ask's zus to allocate a new zii && 398 | * retrieve the zi, before it inserts it to inode cache, it is 399 | * possible to race, and have two threads do a lookup. The 400 | * loosing thread calls _evict(ZI_LOOKUP_RACE) to de-allocate 401 | * the extra zii. But fs->evict need not be called. Only 402 | * zii_free. 403 | * (So it is possible at FS to see two fs->igets but one 404 | * fs->evict) 405 | */ 406 | if (zii->op->evict) 407 | zii->op->evict(zii); 408 | } 409 | return 0; 410 | } 411 | 412 | static int _lookup(struct zufs_ioc_hdr *hdr) 413 | { 414 | struct zufs_ioc_lookup *lookup = (void *)hdr; 415 | struct zufs_str *str = &lookup->str; 416 | struct zus_inode_info *zii; 417 | ulong ino; 418 | 419 | if (!str->len || !str->name[0]) { 420 | ERROR("lookup NULL string\n"); 421 | return 0; 422 | } 423 | 424 | if (0 == strncmp(".", str->name, str->len)) 425 | ino = lookup->dir_ii->zi->i_ino; 426 | else if (0 == strncmp("..", str->name, str->len)) 427 | ino = lookup->dir_ii->zi->i_dir.parent; 428 | else 429 | ino = lookup->dir_ii->sbi->op->lookup(lookup->dir_ii, str); 430 | 431 | if (!ino) { 432 | DBG("[%.*s] NOT FOUND\n", lookup->str.len, lookup->str.name); 433 | return -ENOENT; 434 | } 435 | 436 | DBG("[%.*s] ino=%ld\n", lookup->str.len, lookup->str.name, ino); 437 | zii = zus_iget(lookup->dir_ii->sbi, ino); 438 | if (unlikely(!zii)) 439 | return -ENOENT; 440 | 441 | lookup->_zi = md_addr_to_offset(&zii->sbi->md, zii->zi); 442 | lookup->zus_ii = zii; 443 | return 0; 444 | } 445 | 446 | static int _dentry(struct zufs_ioc_hdr *hdr) 447 | { 448 | struct zufs_ioc_dentry *zid = (void *)hdr; 449 | struct zus_inode_info *dir_ii = zid->zus_dir_ii; 450 | struct zus_inode_info *zii = zid->zus_ii; 451 | 452 | if (hdr->operation == ZUFS_OP_REMOVE_DENTRY) 453 | return dir_ii->sbi->op->remove_dentry(dir_ii, zii, &zid->str); 454 | 455 | return dir_ii->sbi->op->add_dentry(dir_ii, zid->zus_ii, &zid->str); 456 | } 457 | 458 | static int _rename(struct zufs_ioc_hdr *hdr) 459 | { 460 | struct zufs_ioc_rename *zir = (void *)hdr; 461 | struct zus_sb_info *sbi = zir->old_dir_ii->sbi; 462 | 463 | if (!sbi->op->rename) 464 | return -ENOTSUP; 465 | 466 | return sbi->op->rename(zir); 467 | } 468 | 469 | static int _readdir(void *app_ptr, struct zufs_ioc_hdr *hdr) 470 | { 471 | struct zufs_ioc_readdir *zir = (void *)hdr; 472 | struct zus_sb_info *sbi = zir->dir_ii->sbi; 473 | 474 | if (!sbi->op->readdir) 475 | return -ENOTSUP; 476 | 477 | return sbi->op->readdir(app_ptr, zir); 478 | } 479 | 480 | static int _clone(struct zufs_ioc_hdr *hdr) 481 | { 482 | struct zufs_ioc_clone *ioc_clone = (void *)hdr; 483 | struct zus_sb_info *sbi = ioc_clone->src_zus_ii->sbi; 484 | 485 | if (!sbi->op->clone) 486 | return -ENOTSUP; 487 | 488 | return sbi->op->clone(ioc_clone); 489 | } 490 | 491 | static int _io_read(ulong *app_ptr, struct zufs_ioc_hdr *hdr) 492 | { 493 | struct zufs_ioc_IO *io = (void *)hdr; 494 | struct zus_inode_info *zii = io->zus_ii; 495 | 496 | return zii->op->read(app_ptr, io); 497 | } 498 | 499 | static int _io_pre_read(ulong *app_ptr, struct zufs_ioc_hdr *hdr) 500 | { 501 | struct zufs_ioc_IO *io = (void *)hdr; 502 | struct zus_inode_info *zii = io->zus_ii; 503 | 504 | if (!zii->op->pre_read) 505 | return -ENOTSUP; 506 | 507 | return zii->op->pre_read(app_ptr, io); 508 | } 509 | 510 | static int _io_write(ulong *app_ptr, struct zufs_ioc_hdr *hdr) 511 | { 512 | struct zufs_ioc_IO *io = (void *)hdr; 513 | struct zus_inode_info *zii = io->zus_ii; 514 | 515 | return zii->op->write(app_ptr, io); 516 | } 517 | 518 | static int _get_put_multy(struct zufs_ioc_hdr *hdr) 519 | { 520 | struct zufs_ioc_IO *io = (void *)hdr; 521 | struct zus_inode_info *zii = io->zus_ii; 522 | 523 | if (unlikely(!zii->op->get_put_multy)) { 524 | ERROR("No get_put_multy operation set\n"); 525 | return -EIO; 526 | } 527 | 528 | return zii->op->get_put_multy(zii, io); 529 | } 530 | 531 | static int _mmap_close(struct zufs_ioc_hdr *hdr) 532 | { 533 | struct zufs_ioc_mmap_close *mmap_close = (void *)hdr; 534 | struct zus_inode_info *zii = mmap_close->zus_ii; 535 | 536 | if (unlikely(!zii->op->mmap_close)) 537 | return 0; 538 | 539 | return zii->op->mmap_close(zii, mmap_close); 540 | } 541 | 542 | static int _setattr(struct zufs_ioc_hdr *hdr) 543 | { 544 | struct zufs_ioc_attr *ioc_attr = (void *)hdr; 545 | struct zus_inode_info *zii = ioc_attr->zus_ii; 546 | 547 | if (!zii->op->setattr) 548 | return 0; /* This is fine no flushing needed */ 549 | 550 | return zii->op->setattr(zii, ioc_attr->zuf_attr); 551 | } 552 | 553 | static int _sync(struct zufs_ioc_hdr *hdr) 554 | { 555 | struct zufs_ioc_sync *ioc_range = (void *)hdr; 556 | struct zus_inode_info *zii = ioc_range->zus_ii; 557 | 558 | if (!zii->op->sync) 559 | return 0; /* This is fine sync not needed */ 560 | 561 | return zii->op->sync(zii, ioc_range); 562 | } 563 | 564 | static int _fallocate(struct zufs_ioc_hdr *hdr) 565 | { 566 | struct zufs_ioc_IO *ioc_IO = (void *)hdr; 567 | struct zus_inode_info *zii = ioc_IO->zus_ii; 568 | 569 | if (!zii->op->fallocate) 570 | return -ENOTSUP; 571 | 572 | return zii->op->fallocate(zii, ioc_IO); 573 | } 574 | 575 | static int _seek(struct zufs_ioc_hdr *hdr) 576 | { 577 | struct zufs_ioc_seek *ioc_seek = (void *)hdr; 578 | struct zus_inode_info *zii = ioc_seek->zus_ii; 579 | 580 | if (!zii->op->seek) 581 | return -ENOTSUP; 582 | 583 | return zii->op->seek(zii, ioc_seek); 584 | } 585 | 586 | static int _ioc_ioctl(struct zufs_ioc_hdr *hdr) 587 | { 588 | struct zufs_ioc_ioctl *ioc_ioctl = (void *)hdr; 589 | struct zus_inode_info *zii = ioc_ioctl->zus_ii; 590 | 591 | if (!zii->op->ioctl) 592 | return -ENOTTY; 593 | 594 | return zii->op->ioctl(zii, ioc_ioctl); 595 | } 596 | 597 | static int _ioc_xattr(struct zufs_ioc_hdr *hdr) 598 | { 599 | struct zufs_ioc_xattr *ioc_xattr = (void *)hdr; 600 | struct zus_inode_info *zii = ioc_xattr->zus_ii; 601 | 602 | if (hdr->operation == ZUFS_OP_XATTR_GET) { 603 | if (!zii->op->getxattr) 604 | return -ENOTSUP; 605 | return zii->op->getxattr(zii, ioc_xattr); 606 | } else if (hdr->operation == ZUFS_OP_XATTR_SET) { 607 | if (!zii->op->setxattr) 608 | return -ENOTSUP; 609 | return zii->op->setxattr(zii, ioc_xattr); 610 | } else if (hdr->operation == ZUFS_OP_XATTR_LIST) { 611 | if (!zii->op->listxattr) 612 | return -ENOTSUP; 613 | return zii->op->listxattr(zii, ioc_xattr); 614 | } 615 | ERROR("Unknown xattr operation!\n"); 616 | return -EFAULT; 617 | } 618 | 619 | static int _statfs(struct zufs_ioc_hdr *hdr) 620 | { 621 | struct zufs_ioc_statfs *ioc_statfs = (void *)hdr; 622 | struct zus_sb_info *sbi = ioc_statfs->zus_sbi; 623 | 624 | if (!sbi->op->statfs) 625 | return -ENOTSUP; 626 | 627 | return sbi->op->statfs(sbi, ioc_statfs); 628 | } 629 | 630 | static int _fiemap(void *app_ptr, struct zufs_ioc_hdr *hdr) 631 | { 632 | struct zufs_ioc_fiemap *zif = (void *)hdr; 633 | struct zus_inode_info *zii = zif->zus_ii; 634 | 635 | if (!zii->op->fiemap) 636 | return -ENOTSUP; 637 | 638 | return zii->op->fiemap(app_ptr, zif); 639 | } 640 | 641 | static int _show_options(struct zufs_ioc_hdr *hdr) 642 | { 643 | struct zufs_ioc_mount_options *ioc_mount_options = (void *)hdr; 644 | struct zus_sb_info *sbi = ioc_mount_options->zus_sbi; 645 | 646 | if (!sbi->op->show_options) 647 | return 0; 648 | 649 | return sbi->op->show_options(sbi, ioc_mount_options); 650 | } 651 | 652 | static int _iom_done(struct zufs_ioc_hdr *hdr) 653 | { 654 | struct zufs_ioc_iomap_done *ziid = (void *)hdr; 655 | 656 | if (unlikely(!(ziid->iomd && ziid->iomd->done))) { 657 | ERROR("iom_done is %p %p\n", ziid->iomd, 658 | ziid->iomd ? ziid->iomd->done: NULL); 659 | return -EINVAL; 660 | } 661 | 662 | DBG("%p\n", ziid->iomd); 663 | ziid->iomd->done(ziid->iomd, hdr->err); 664 | return 0; 665 | } 666 | 667 | const char *ZUFS_OP_name(enum e_zufs_operation op) 668 | { 669 | #define CASE_ENUM_NAME(e) case e: return #e 670 | switch (op) { 671 | CASE_ENUM_NAME(ZUFS_OP_NULL); 672 | CASE_ENUM_NAME(ZUFS_OP_BREAK); 673 | CASE_ENUM_NAME(ZUFS_OP_STATFS); 674 | CASE_ENUM_NAME(ZUFS_OP_SHOW_OPTIONS); 675 | CASE_ENUM_NAME(ZUFS_OP_NEW_INODE); 676 | CASE_ENUM_NAME(ZUFS_OP_FREE_INODE); 677 | CASE_ENUM_NAME(ZUFS_OP_EVICT_INODE); 678 | CASE_ENUM_NAME(ZUFS_OP_LOOKUP); 679 | CASE_ENUM_NAME(ZUFS_OP_ADD_DENTRY); 680 | CASE_ENUM_NAME(ZUFS_OP_REMOVE_DENTRY); 681 | CASE_ENUM_NAME(ZUFS_OP_RENAME); 682 | CASE_ENUM_NAME(ZUFS_OP_READDIR); 683 | CASE_ENUM_NAME(ZUFS_OP_CLONE); 684 | CASE_ENUM_NAME(ZUFS_OP_COPY); 685 | CASE_ENUM_NAME(ZUFS_OP_READ); 686 | CASE_ENUM_NAME(ZUFS_OP_PRE_READ); 687 | CASE_ENUM_NAME(ZUFS_OP_WRITE); 688 | CASE_ENUM_NAME(ZUFS_OP_MMAP_CLOSE); 689 | CASE_ENUM_NAME(ZUFS_OP_SETATTR); 690 | CASE_ENUM_NAME(ZUFS_OP_SYNC); 691 | CASE_ENUM_NAME(ZUFS_OP_FALLOCATE); 692 | CASE_ENUM_NAME(ZUFS_OP_LLSEEK); 693 | CASE_ENUM_NAME(ZUFS_OP_IOCTL); 694 | CASE_ENUM_NAME(ZUFS_OP_XATTR_GET); 695 | CASE_ENUM_NAME(ZUFS_OP_XATTR_SET); 696 | CASE_ENUM_NAME(ZUFS_OP_XATTR_LIST); 697 | CASE_ENUM_NAME(ZUFS_OP_FIEMAP); 698 | CASE_ENUM_NAME(ZUFS_OP_GET_MULTY); 699 | CASE_ENUM_NAME(ZUFS_OP_PUT_MULTY); 700 | CASE_ENUM_NAME(ZUFS_OP_NOOP); 701 | CASE_ENUM_NAME(ZUFS_OP_IOM_DONE); 702 | CASE_ENUM_NAME(ZUFS_OP_MAX_OPT); 703 | default: 704 | return "UNKNOWN"; 705 | } 706 | } 707 | 708 | static void _some_pigy_put(struct zufs_ioc_hdr *hdr) 709 | { 710 | while (hdr->flags & ZUFS_H_HAS_PIGY_PUT) { 711 | /* Kernel made sure to update hdr->in_len including the 712 | * iom_n. Kernel also checks bounds. 713 | */ 714 | hdr = (void *)hdr + hdr->in_len; 715 | 716 | if (unlikely(hdr->operation != ZUFS_OP_PUT_MULTY)) { 717 | ERROR("Not yet, easily support pigy ANY operation(%s)\n", 718 | ZUFS_OP_name(hdr->operation)); 719 | break; 720 | } 721 | _get_put_multy(hdr); 722 | } 723 | } 724 | 725 | int zus_do_command(void *app_ptr, struct zufs_ioc_hdr *hdr) 726 | { 727 | DBG("[%s] OP=%d off=0x%x len=0x%x\n", ZUFS_OP_name(hdr->operation), 728 | hdr->operation, hdr->offset, hdr->len); 729 | 730 | if (hdr->flags & ZUFS_H_HAS_PIGY_PUT) 731 | _some_pigy_put(hdr); 732 | 733 | switch (hdr->operation) { 734 | case ZUFS_OP_NEW_INODE: 735 | return _new_inode(app_ptr, hdr); 736 | case ZUFS_OP_FREE_INODE: 737 | case ZUFS_OP_EVICT_INODE: 738 | return _evict(hdr); 739 | case ZUFS_OP_LOOKUP: 740 | return _lookup(hdr); 741 | case ZUFS_OP_ADD_DENTRY: 742 | case ZUFS_OP_REMOVE_DENTRY: 743 | return _dentry(hdr); 744 | case ZUFS_OP_RENAME: 745 | return _rename(hdr); 746 | case ZUFS_OP_READDIR: 747 | return _readdir(app_ptr, hdr); 748 | case ZUFS_OP_CLONE: 749 | case ZUFS_OP_COPY: 750 | return _clone(hdr); 751 | case ZUFS_OP_READ: 752 | return _io_read(app_ptr, hdr); 753 | case ZUFS_OP_PRE_READ: 754 | return _io_pre_read(app_ptr, hdr); 755 | case ZUFS_OP_WRITE: 756 | return _io_write(app_ptr, hdr); 757 | case ZUFS_OP_MMAP_CLOSE: 758 | return _mmap_close(hdr); 759 | case ZUFS_OP_SETATTR: 760 | return _setattr(hdr); 761 | case ZUFS_OP_SYNC: 762 | return _sync(hdr); 763 | case ZUFS_OP_FALLOCATE: 764 | return _fallocate(hdr); 765 | case ZUFS_OP_LLSEEK: 766 | return _seek(hdr); 767 | case ZUFS_OP_IOCTL: 768 | return _ioc_ioctl(hdr); 769 | case ZUFS_OP_XATTR_GET: 770 | case ZUFS_OP_XATTR_SET: 771 | case ZUFS_OP_XATTR_LIST: 772 | return _ioc_xattr(hdr); 773 | case ZUFS_OP_STATFS: 774 | return _statfs(hdr); 775 | case ZUFS_OP_FIEMAP: 776 | return _fiemap(app_ptr, hdr); 777 | case ZUFS_OP_SHOW_OPTIONS: 778 | return _show_options(hdr); 779 | 780 | case ZUFS_OP_GET_MULTY: 781 | case ZUFS_OP_PUT_MULTY: 782 | return _get_put_multy(hdr); 783 | case ZUFS_OP_IOM_DONE: 784 | return _iom_done(hdr); 785 | 786 | case ZUFS_OP_NOOP: 787 | case ZUFS_OP_BREAK: 788 | break; 789 | default: 790 | ERROR("Unknown OP=%d\n", hdr->operation); 791 | } 792 | 793 | return 0; 794 | } 795 | -------------------------------------------------------------------------------- /zus_ddbg.ld: -------------------------------------------------------------------------------- 1 | SECTIONS 2 | { 3 | zus_ddbg : { 4 | __start_zus_ddbg = .; 5 | *(zus_ddbg*) 6 | __stop_zus_ddbg = .; 7 | } 8 | } 9 | INSERT AFTER .text; 10 | -------------------------------------------------------------------------------- /zusd.h: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: BSD-3-Clause */ 2 | /* 3 | * Copyright (c) 2018 NetApp, Inc. All rights reserved. 4 | * 5 | * See module.c for LICENSE details. 6 | * 7 | * Authors: 8 | * Boaz Harrosh 9 | */ 10 | #include 11 | 12 | #include "zus.h" 13 | 14 | int zus_mount_thread_start(struct zus_thread_params *tp, const char *zuf_path); 15 | void zus_mount_thread_stop(void); 16 | void zus_join(void); 17 | void zus_register_sigactions(void); 18 | -------------------------------------------------------------------------------- /zusd.mk: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: BSD-3-Clause 2 | # 3 | # Makefile for the zus user-mode application 4 | # 5 | # Copyright (C) 2018 NetApp, Inc. All rights reserved. 6 | # 7 | # See module.c for LICENSE details. 8 | # 9 | # Authors: 10 | # Omer Caspi 11 | # 12 | -include $(CURDIR)/.config 13 | PROJ_NAME := zusd 14 | PROJ_OBJS := main.o signals.o 15 | PROJ_INCLUDES += . 16 | PROJ_LIBS := zus 17 | PROJ_LIB_DIRS := . 18 | PROJ_OBJS_DEPS := zusd.mk 19 | PROJ_TARGET_DEPS := libzus.so 20 | 21 | include common.zus.mk 22 | -------------------------------------------------------------------------------- /zuslib.mk: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: BSD-3-Clause 2 | # 3 | # Makefile for the zus 4 | # 5 | # Copyright (C) 2018 NetApp, Inc. All rights reserved. 6 | # 7 | # See module.c for LICENSE details. 8 | # 9 | # Authors: 10 | # Omer Caspi 11 | # 12 | -include $(CURDIR)/.config 13 | PROJ_NAME := zus 14 | PROJ_TARGET_TYPE := lib 15 | PROJ_OBJS := zus-core.o zus-vfs.o module.o md_zus.o nvml_movnt.o utils.o fs-loader.o pa.o 16 | PROJ_OBJS += printz.o slab.o 17 | PROJ_INCLUDES := . 18 | PROJ_LIBS := rt uuid unwind dl pthread systemd 19 | 20 | ifeq ($(CONFIG_TRY_ANON_MMAP),1) 21 | PROJ_CDEFS += CONFIG_TRY_ANON_MMAP=1 22 | else 23 | PROJ_CDEFS += CONFIG_TRY_ANON_MMAP=0 24 | endif 25 | 26 | ifdef CONFIG_ZUF_DEF_PATH 27 | PROJ_CDEFS += CONFIG_ZUF_DEF_PATH=\"$(CONFIG_ZUF_DEF_PATH)\" 28 | endif 29 | 30 | ZUS_API_H := zus_api.h md_def.h md.h 31 | LINUX_STAT_H := linux/stat.h 32 | LINKED_HEADERS := $(ZUS_API_H) $(LINUX_STAT_H) 33 | 34 | $(ZUS_API_H): 35 | @ln -sTfv $(shell realpath --relative-to=$(ZDIR) \ 36 | $(ZUF_KERN_DIR))/fs/zuf/$@ $@ 37 | 38 | $(LINUX_STAT_H): 39 | @mkdir -p linux/ ; \ 40 | ln -sTfv $(shell realpath --relative-to=$(ZDIR)/linux \ 41 | $(ZUF_KERN_DIR))/include/uapi/$(LINUX_STAT_H) $(LINUX_STAT_H) 42 | 43 | PROJ_OBJS_DEPS := zuslib.mk $(LINKED_HEADERS) 44 | 45 | clean_headers: 46 | rm -f $(LINKED_HEADERS) 47 | 48 | PROJ_CLEAN_DEPS := clean_headers 49 | 50 | include common.zus.mk 51 | -------------------------------------------------------------------------------- /zusmodule.mk: -------------------------------------------------------------------------------- 1 | include $(M)/Makefile 2 | ZDIR ?=$(CURDIR) 3 | MAKEFLAGS := --no-print-directory 4 | 5 | PROJ_NAME := $(ZM_NAME) 6 | PROJ_OBJS := $(ZM_OBJS) 7 | PROJ_CDEFS := $(ZM_CDEFS) 8 | PROJ_WARNS := $(ZM_WARNS) 9 | PROJ_INCLUDES := $(ZM_INCLUDES) 10 | PROJ_LIBS := $(ZM_LIBS) 11 | PROJ_LIB_DIRS := $(ZM_LIB_DIRS) 12 | PROJ_CFLAGS := $(ZM_CFLAGS) 13 | PROJ_LDFLAGS := $(ZM_LDFLAGS) 14 | PROJ_OBJS_DEPS := $(M)/Makefile $(ZM_OBJS_DEPS) $(ZDIR)/zusmodule.mk 15 | PROJ_TARGET_DEPS += $(ZM_TARGET_DEPS) 16 | PROJ_LANG += $(ZM_LANG) 17 | 18 | # ZM_TYPE can be one of the following: 19 | # FS means a ZUS filesystem libraray 20 | # ZUS_BIN means a binary that isn't a file system library but 21 | # still depends on libzus. 22 | # ZM_TYPE==GENERIC means a binary that does not require libzus. 23 | ifeq ($(ZM_TYPE),) 24 | ZM_TYPE := FS # default to ZUS_FS 25 | endif 26 | ZM_TYPE := $(strip $(ZM_TYPE)) 27 | 28 | ifeq ($(filter $(ZM_TYPE),FS ZUS_BIN GENERIC),) 29 | $(error Unknown ZUS projec type $(ZM_TYPE)) 30 | endif 31 | 32 | ifeq ($(filter $(ZM_TYPE),FS ZUS_BIN), $(ZM_TYPE)) 33 | PROJ_INCLUDES += $(ZDIR) 34 | PROJ_LIB_DIRS += $(ZDIR) 35 | PROJ_LIBS += zus 36 | PROJ_CFLAGS := -pthread $(PROJ_CFLAGS) 37 | PROJ_LDFLAGS := -pthread $(PROJ_LDFLAGS) 38 | PROJ_TARGET_DEPS += $(ZDIR)/libzus.so 39 | ifeq ($(ZM_TYPE), FS) 40 | PROJ_TARGET_TYPE := lib 41 | PROJ_LDFLAGS := -Wl,-Tzus_ddbg.ld $(PROJ_LDFLAGS) 42 | endif 43 | else # Generic binary 44 | PROJ_TARGET_TYPE := $(ZM_TARGET_TYPE) 45 | endif 46 | 47 | module: 48 | @$(foreach t,$(ZM_PRE_BUILD),$(MAKE) -C $(M) $(t);) 49 | @$(MAKE) M=$(M) -C . 50 | @$(foreach t,$(ZM_POST_BUILD),$(MAKE) -C $(M) $(t);) 51 | 52 | module_clean: 53 | @$(foreach t,$(ZM_PRE_CLEAN),$(MAKE) -C $(M) $(t);) 54 | @$(MAKE) M=$(M) -C . __clean 55 | @$(foreach t,$(ZM_POST_CLEAN),$(MAKE) -C $(M) $(t);) 56 | 57 | .PHONY: module module_clean 58 | 59 | include common.zus.mk 60 | --------------------------------------------------------------------------------