├── .gitmodules ├── CMakeLists.txt ├── Makefile ├── README.md ├── README_TESTING.md ├── build_3fs.sh ├── extension_config.cmake ├── extension_version.txt ├── prepare_test_dirs.sh ├── src ├── include │ └── threefs.hpp ├── threefs.cpp └── threefs_extension.cpp └── test ├── check_3fs_mount.sh └── sql ├── localfs_io.test ├── threefs.test ├── threefs_basic.test ├── threefs_concurrency.test ├── threefs_errors.test ├── threefs_integration.test ├── threefs_io.test └── threefs_performance.test /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "duckdb"] 2 | path = duckdb 3 | url = https://github.com/duckdb/duckdb 4 | branch = main 5 | [submodule "extension-ci-tools"] 6 | path = extension-ci-tools 7 | url = https://github.com/duckdb/extension-ci-tools 8 | [submodule "3fs"] 9 | path = 3fs 10 | url = https://github.com/deepseek-ai/3FS.git 11 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # CMakeLists.txt for the ThreeFS extension 2 | 3 | # Set minimum required version of CMake 4 | cmake_minimum_required(VERSION 3.12) 5 | 6 | # Set C++17 standard 7 | set(CMAKE_CXX_STANDARD 17) 8 | set(CMAKE_CXX_STANDARD_REQUIRED ON) 9 | set(CMAKE_CXX_EXTENSIONS OFF) 10 | 11 | # Set extension name 12 | set(TARGET_NAME threefs) 13 | 14 | # Set extension description 15 | set(EXTENSION_NAME ${TARGET_NAME}_extension) 16 | set(LOADABLE_EXTENSION_NAME ${TARGET_NAME}_loadable_extension) 17 | 18 | project(${TARGET_NAME}) 19 | include_directories(src/include) 20 | 21 | add_extension_definitions() 22 | include_directories(${CMAKE_CURRENT_SOURCE_DIR}/src/include) 23 | 24 | # 3FS build options 25 | option(BUILD_3FS_LIB "Build 3FS library from source" ON) 26 | 27 | # Custom command to build 3FS library 28 | if(BUILD_3FS_LIB) 29 | # Set 3FS source path 30 | set(3FS_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/3fs") 31 | 32 | # Set 3FS build directory 33 | set(3FS_BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}/3fs_build") 34 | 35 | # Create build directory 36 | file(MAKE_DIRECTORY ${3FS_BUILD_DIR}) 37 | 38 | # Add custom command to build 3FS library 39 | add_custom_command( 40 | OUTPUT ${3FS_BUILD_DIR}/src/lib/api/libhf3fs_api.a 41 | COMMAND ${CMAKE_COMMAND} -DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_COMPILER=clang -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DENABLE_FUSE_APPLICATION=OFF -DOVERRIDE_CXX_NEW_DELETE=OFF ${3FS_SOURCE_DIR} 42 | COMMAND ${CMAKE_COMMAND} --build . --parallel 8 43 | WORKING_DIRECTORY ${3FS_BUILD_DIR} 44 | COMMENT "Building 3FS library..." 45 | VERBATIM 46 | ) 47 | 48 | # Add custom target 49 | add_custom_target(build_3fs DEPENDS ${3FS_BUILD_DIR}/src/lib/api/libhf3fs_api.a) 50 | 51 | # Set 3FS header and library directories 52 | set(DEEPSEEK_3FS_INCLUDE_DIR "${3FS_SOURCE_DIR}/src" CACHE PATH "Path to deepseek 3fs include directory" FORCE) 53 | set(DEEPSEEK_3FS_LIB_DIR "${3FS_BUILD_DIR}/src/lib/api" CACHE PATH "Path to deepseek 3fs library directory" FORCE) 54 | else() 55 | # If not building from source, use provided paths 56 | set(DEEPSEEK_3FS_INCLUDE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/3fs/src" CACHE PATH "Path to deepseek 3fs include directory") 57 | set(DEEPSEEK_3FS_LIB_DIR "${CMAKE_CURRENT_SOURCE_DIR}/3fs/build/src/lib/api" CACHE PATH "Path to deepseek 3fs library directory") 58 | endif() 59 | 60 | # Include deepseek 3fs headers 61 | if(NOT DEEPSEEK_3FS_INCLUDE_DIR STREQUAL "") 62 | include_directories(${DEEPSEEK_3FS_INCLUDE_DIR}) 63 | # Add 3fs root directory to include paths 64 | include_directories(${CMAKE_CURRENT_SOURCE_DIR}/3fs) 65 | endif() 66 | 67 | # Set library directories 68 | if(NOT DEEPSEEK_3FS_LIB_DIR STREQUAL "") 69 | # Set library path 70 | link_directories(${DEEPSEEK_3FS_LIB_DIR}) 71 | 72 | # Add -Wl,--copy-dt-needed-entries link flag to include secondary dependencies 73 | set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,--copy-dt-needed-entries") 74 | endif() 75 | 76 | # Add source files 77 | set(EXTENSION_SOURCES 78 | src/threefs_extension.cpp 79 | src/threefs.cpp 80 | ) 81 | 82 | # Build static and loadable extensions 83 | build_static_extension(${TARGET_NAME} ${EXTENSION_SOURCES}) 84 | build_loadable_extension(${TARGET_NAME} " " ${EXTENSION_SOURCES}) 85 | 86 | # Set extension information 87 | set_target_properties(${EXTENSION_NAME} PROPERTIES 88 | EXPORT_NAME ${TARGET_NAME} 89 | ) 90 | 91 | # Add dependencies to ensure 3FS library is built before the extension 92 | if(BUILD_3FS_LIB) 93 | add_dependencies(${EXTENSION_NAME} build_3fs) 94 | add_dependencies(${LOADABLE_EXTENSION_NAME} build_3fs) 95 | endif() 96 | 97 | # Link 3FS libraries and dependencies 98 | if(NOT DEEPSEEK_3FS_LIB_DIR STREQUAL "") 99 | # Static library only links API library 100 | target_link_libraries(${EXTENSION_NAME} hf3fs_api) 101 | 102 | # Loadable extension links shared library version of API to include all symbols 103 | target_link_libraries(${LOADABLE_EXTENSION_NAME} hf3fs_api_shared) 104 | endif() 105 | 106 | # Install targets 107 | install( 108 | TARGETS ${EXTENSION_NAME} 109 | EXPORT "${DUCKDB_EXPORT_SET}" 110 | LIBRARY DESTINATION "${INSTALL_LIB_DIR}" 111 | ARCHIVE DESTINATION "${INSTALL_LIB_DIR}") -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # Set project directory 2 | PROJ_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST)))) 3 | 4 | # Extension name 5 | EXT_NAME=threefs 6 | 7 | # Extension configuration file 8 | EXT_CONFIG=$(PROJ_DIR)extension_config.cmake 9 | 10 | # Include DuckDB extension makefile 11 | include $(PROJ_DIR)extension-ci-tools/makefiles/duckdb_extension.Makefile -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # DuckDB 3FS Extension 2 | 3 | The DuckDB 3FS extension enables DuckDB to seamlessly interact with the 3FS distributed file system, providing high-performance data access and manipulation capabilities directly from SQL. 4 | 5 | ## Overview 6 | 7 | The 3FS extension integrates DuckDB with the high-performance 3FS distributed file system, allowing users to: 8 | 9 | - Read and write data directly to and from 3FS using standard SQL 10 | - Perform SQL queries over data stored in 3FS without copying to local storage 11 | - Support for various file formats including CSV, Parquet, and JSON 12 | - Take advantage of 3FS's high-throughput I/O and USRBIO capabilities 13 | - Manage 3FS files and directories directly from SQL queries 14 | 15 | ## Features 16 | 17 | - **Direct Data Access**: Access data stored in 3FS using the `3fs://` URI prefix and /3fs/ path prefix 18 | - **File Operations**: Create, read, write, append, delete, copy, and move files 19 | - **Directory Operations**: Create, list, and delete directories 20 | - **Format Support**: Native support for multiple formats, including: 21 | - CSV 22 | - Parquet 23 | - JSON 24 | - **Advanced Features**: 25 | - Parallel reading and writing 26 | - Buffer size optimization 27 | - USRBIO support for enhanced performance 28 | - Integration with other DuckDB functionality (views, joins, etc.) 29 | 30 | ## Requirements 31 | 32 | - DuckDB (latest recommended) 33 | - 3FS installation with properly configured mountpoints 34 | - CMake 3.12 or later 35 | - C++17 compatible compiler 36 | 37 | ## Building the Extension 38 | 39 | ### Prerequisites 40 | 41 | Ensure you have the following prerequisites installed: 42 | - CMake (3.12 or newer) 43 | - C++17 compatible compiler (e.g., GCC 7+, Clang 5+) 44 | - DuckDB development environment 45 | - 3FS libraries and headers 46 | 47 | ### Building from Source 48 | 49 | 1. Clone the repository with submodules: 50 | ```bash 51 | git clone --recurse-submodules https://github.com/xxxx/duckdb-3fs.git 52 | cd duckdb-3fs 53 | ``` 54 | 55 | 2. Build 3FS libraries (if not using pre-built versions): 56 | ```bash 57 | ./build_3fs.sh 58 | ``` 59 | 60 | 3. Build the DuckDB 3FS extension: 61 | ```bash 62 | GEN=ninja make -j$(nproc) 63 | ``` 64 | 65 | This will compile both the statically linked extension and the loadable extension. 66 | 67 | 4. The built extension will be located at: 68 | ``` 69 | build/release/extension/threefs/threefs.duckdb_extension 70 | ``` 71 | 72 | ### Building with Custom 3FS Installation 73 | 74 | If you have 3FS installed in a custom location, modify `CMakeLists.txt` to point to your installation: 75 | 76 | ```cmake 77 | set(DEEPSEEK_3FS_INCLUDE_DIR "/path/to/3fs/include") 78 | set(DEEPSEEK_3FS_LIB_DIR "/path/to/3fs/lib") 79 | ``` 80 | 81 | Or pass the paths during the build: 82 | 83 | ```bash 84 | make DEEPSEEK_3FS_INCLUDE_DIR=/path/to/3fs/include DEEPSEEK_3FS_LIB_DIR=/path/to/3fs/lib 85 | ``` 86 | 87 | ### Building Python Duckdb 88 | 89 | If you want to use python duckdb with 3FS extension, you can build and install: 90 | ```bash 91 | cd /path/duckdb-3fs/ 92 | GEN=ninja make -j$(nproc) BUILD_PYTHON=1 CORE_EXTENSIONS="httpfs" 93 | cd /path/duckdb-3fs/duckdb/tools/pythonpkg 94 | python3 -m pip install . 95 | ``` 96 | 97 | 98 | ## Using the Extension 99 | 100 | ### Loading the Extension 101 | 102 | Load the extension in DuckDB: 103 | 104 | ```sql 105 | -- Install the extension (if not installed already) 106 | INSTALL 'path/to/threefs.duckdb_extension'; 107 | 108 | -- Load the extension 109 | LOAD threefs; 110 | ``` 111 | 112 | ### Configuration 113 | 114 | Configure the extension with your 3FS cluster details: 115 | 116 | ```sql 117 | -- Set 3FS cluster name 118 | SET threefs_cluster='my_cluster'; 119 | 120 | -- Set 3FS mount root (if needed) 121 | SET threefs_mount_root='/path/to/3fs/mount'; 122 | 123 | -- Enable USRBIO mode for optimal performance (if supported) 124 | SET threefs_use_usrbio=true; 125 | 126 | -- Set custom buffer size (optional) 127 | SET threefs_iov_size=16384; 128 | ``` 129 | 130 | ### Basic Usage 131 | 132 | Query data directly from 3FS: 133 | 134 | ```sql 135 | -- Read CSV file from 3FS 136 | SELECT * FROM read_csv_auto('3fs://path/to/data.csv'); 137 | 138 | -- Read Parquet file from 3FS 139 | SELECT * FROM read_parquet('3fs://path/to/data.parquet'); 140 | 141 | -- Write query results to 3FS 142 | COPY (SELECT * FROM my_table) TO '3fs://path/to/output.parquet' (FORMAT PARQUET); 143 | ``` 144 | 145 | ## Using the Extension in Python 146 | ```python 147 | import duckdb 148 | con = duckdb.connect(config={'allow_unsigned_extensions': True}) 149 | con.install_extension('/root/duckdb-3fs/build/release/extension/threefs/threefs.duckdb_extension') 150 | con.load_extension('threefs') 151 | con.execute("SET threefs_cluster='open3fs';") 152 | con.execute("SET threefs_mount_root='/3fs/';") 153 | con.execute("SET threefs_use_usrbio=true;") 154 | con.execute("SET threefs_iov_size=16384;") 155 | con.execute("SET threefs_enable_debug_logging=true;") 156 | data = con.sql("SELECT * FROM read_parquet('/3fs/duckdb/prices.parquet')") 157 | print(data) 158 | con.sql("COPY (SELECT * FROM read_parquet('/3fs/duckdb/prices.parquet')) TO '/3fs/duckdb/output.parquet' (FORMAT PARQUET);") 159 | data = con.sql("SELECT * FROM read_parquet('3fs://3fs/duckdb/prices.parquet')") 160 | print(data) 161 | ``` 162 | 163 | 164 | ## Testing 165 | 166 | The extension includes comprehensive test suites covering different aspects of functionality: 167 | 168 | ### Running All Tests 169 | 170 | To run all tests: 171 | 172 | ```bash 173 | make test 174 | ``` 175 | 176 | ### Running Specific Test Groups 177 | 178 | To run only 3FS extension tests: 179 | 180 | ```bash 181 | make TEST_GROUP=threefs test 182 | ``` 183 | 184 | ### Running Individual Tests 185 | 186 | To run a specific test file: 187 | 188 | ```bash 189 | make TEST_FILE=test/sql/threefs_basic.test test 190 | ``` 191 | 192 | ### Manual Testing 193 | 194 | You can also run tests manually with the DuckDB CLI: 195 | 196 | ```bash 197 | build/release/duckdb -unsigned < test/sql/threefs_basic.test 198 | ``` 199 | 200 | ### Test Categories 201 | 202 | The test suite includes: 203 | 204 | 1. **Basic Tests** (`threefs_basic.test`): Core file and directory operations 205 | 2. **I/O Tests** (`threefs_io.test`): File reading and writing performance 206 | 3. **Concurrency Tests** (`threefs_concurrency.test`): Simulated concurrent access 207 | 4. **Error Handling** (`threefs_errors.test`): Error conditions and edge cases 208 | 5. **Performance Tests** (`threefs_performance.test`): Benchmark different operations 209 | 6. **Integration Tests** (`threefs_integration.test`): Integration with DuckDB features 210 | 211 | ### Creating Your Own Tests 212 | 213 | You can create additional tests by following the DuckDB test format. Test files should be placed in the `test/sql/` directory with a `.test` extension. 214 | 215 | ## Troubleshooting 216 | 217 | - If the extension fails to load with errors about missing symbols, ensure all dependencies are properly installed and linked. 218 | - For I/O performance issues, try adjusting the buffer size with `SET threefs_iov_size=`. 219 | - If experiencing permission errors, check that your 3FS mount point has appropriate permissions. 220 | 221 | ## License 222 | 223 | This project is licensed under the [MIT License](LICENSE). 224 | -------------------------------------------------------------------------------- /README_TESTING.md: -------------------------------------------------------------------------------- 1 | # DuckDB 3FS Extension Testing Guide 2 | 3 | ## Prerequisites 4 | 5 | Before running the tests for the DuckDB 3FS extension, you need to ensure that all required directories exist. The tests expect certain directory structures to be in place. 6 | 7 | ## Preparing Test Environment 8 | 9 | To prepare your environment for testing, follow these steps: 10 | 11 | 1. Make the preparation script executable: 12 | ```bash 13 | chmod +x prepare_test_dirs.sh 14 | ``` 15 | 16 | 2. Run the script as a user with sufficient permissions (typically root): 17 | ```bash 18 | sudo ./prepare_test_dirs.sh 19 | ``` 20 | 21 | This script will create all necessary directories under `/3fs/test_threefs/` with appropriate permissions. 22 | 23 | 3. If you're running in a containerized environment, ensure that the container has access to the `/3fs` mount point. 24 | 25 | ## Directory Structure 26 | 27 | The script creates the following directory structure: 28 | 29 | ``` 30 | /3fs/ 31 | └── test_threefs/ 32 | ├── integration_test/ 33 | │ ├── gender=M/ 34 | │ └── gender=F/ 35 | ├── basic_test/ 36 | │ ├── dir1/ 37 | │ └── dir2/ 38 | ├── io_test/ 39 | ├── concurrency_test/ 40 | ├── error_test/ 41 | │ └── special@#chars/ 42 | └── perf_test/ 43 | ``` 44 | 45 | ## Running the Tests 46 | 47 | After preparing the directories, you can run the tests using various methods: 48 | 49 | ### Running All Tests 50 | 51 | To run the entire test suite: 52 | 53 | ```bash 54 | # Using the make command 55 | make test 56 | 57 | # Using the unittest executable directly 58 | ./build/debug/test/unittest 59 | ``` 60 | 61 | ### Running Specific Test Groups 62 | 63 | To run only tests related to the 3FS extension: 64 | 65 | ```bash 66 | make TEST_GROUP=threefs test 67 | ``` 68 | 69 | ### Running Individual Test Files 70 | 71 | To run a specific test file: 72 | 73 | ```bash 74 | # Using the make command 75 | make TEST_FILE=test/sql/threefs_basic.test test 76 | 77 | # Using the unittest executable directly 78 | ./build/debug/test/unittest "/root/duckdb-3fs/test/sql/threefs_integration.test" 79 | ``` 80 | 81 | ### Manual Testing with DuckDB CLI 82 | 83 | You can also run tests manually with the DuckDB CLI: 84 | 85 | ```bash 86 | # Using the release build 87 | build/release/duckdb -unsigned < test/sql/threefs_basic.test 88 | 89 | # Using the debug build 90 | build/debug/duckdb -unsigned < test/sql/threefs_basic.test 91 | ``` 92 | 93 | ### Test Categories 94 | 95 | The DuckDB 3FS extension test suite includes several categories of tests: 96 | 97 | 1. **Basic Tests** (`threefs_basic.test`): Core file and directory operations 98 | 2. **I/O Tests** (`threefs_io.test`): File reading and writing performance 99 | 3. **Concurrency Tests** (`threefs_concurrency.test`): Simulated concurrent access 100 | 4. **Error Handling** (`threefs_errors.test`): Error conditions and edge cases 101 | 5. **Performance Tests** (`threefs_performance.test`): Benchmark different operations 102 | 6. **Integration Tests** (`threefs_integration.test`): Integration with DuckDB features 103 | 104 | ### Testing in Different Environments 105 | 106 | For CI/CD environments, you may want to run tests with different configurations: 107 | 108 | ```bash 109 | # Run tests with debug build 110 | DEBUG=1 make test 111 | 112 | # Run tests with release build and optimizations 113 | RELEASE=1 OPTIMIZE=1 make test 114 | ``` 115 | 116 | ## Troubleshooting 117 | 118 | If you encounter errors like: 119 | 120 | ``` 121 | No files found that match the pattern 122 | ``` 123 | 124 | It likely means that the required directories don't exist or have incorrect permissions. Re-run the `prepare_test_dirs.sh` script. 125 | 126 | For other issues, check the logs and ensure that the 3FS extension is properly configured with appropriate environment variables. 127 | 128 | ## Note 129 | 130 | The test framework doesn't support direct system commands (like `system mkdir`) within test files, which is why this separate preparation script is necessary. -------------------------------------------------------------------------------- /build_3fs.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | 4 | cd 3fs 5 | # Update and initialize submodules 6 | echo "Updating git submodules..." 7 | git submodule update --init --recursive 8 | 9 | # Apply patches 10 | echo "Applying patches..." 11 | ./patches/apply.sh 12 | 13 | # Create build directory 14 | echo "Building 3fs..." 15 | cmake -S . -B build -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_COMPILER=clang -DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_EXPORT_COMPILE_COMMANDS=ON 16 | cmake --build build -j 8 17 | 18 | cd .. 19 | echo "Build completed successfully!" -------------------------------------------------------------------------------- /extension_config.cmake: -------------------------------------------------------------------------------- 1 | duckdb_extension_load(threefs 2 | DONT_LINK 3 | SOURCE_DIR ${CMAKE_CURRENT_LIST_DIR} 4 | INCLUDE_DIR ${CMAKE_CURRENT_LIST_DIR}/src/include 5 | LOAD_TESTS 6 | ) -------------------------------------------------------------------------------- /extension_version.txt: -------------------------------------------------------------------------------- 1 | v0.0.1 2 | -------------------------------------------------------------------------------- /prepare_test_dirs.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Script to create all directories required for DuckDB 3FS extension tests 4 | echo "Creating directories needed for tests..." 5 | 6 | # Ensure 3FS mount point exists 7 | mkdir -p /3fs 8 | 9 | # Create 3FS test directories 10 | mkdir -p /3fs/test_threefs/integration_test/gender=M 11 | mkdir -p /3fs/test_threefs/integration_test/gender=F 12 | mkdir -p /3fs/test_threefs/basic_test/dir1 13 | mkdir -p /3fs/test_threefs/basic_test/dir2 14 | mkdir -p /3fs/test_threefs/io_test 15 | mkdir -p /3fs/test_threefs/concurrency_test 16 | mkdir -p /3fs/test_threefs/error_test/special@#chars 17 | mkdir -p /3fs/test_threefs/error_test/very_long_directory_name_to_test_path_length_limits_in_3fs_filesystem_implementation 18 | mkdir -p /3fs/test_threefs/perf_test 19 | 20 | echo "All test directories have been created successfully!" 21 | echo "You can now run the tests." -------------------------------------------------------------------------------- /src/include/threefs.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #include "duckdb/common/file_system.hpp" 6 | #include "duckdb/main/extension.hpp" 7 | #include "duckdb/storage/buffer_manager.hpp" 8 | 9 | namespace duckdb { 10 | 11 | void InitializeThreeFS(); 12 | void DeinitializeThreeFS(); 13 | 14 | struct ThreeFSParams { 15 | // 3FS cluster related parameters 16 | string cluster_name; // Cluster name 17 | string mount_root = "/"; // Mount point root directory 18 | bool enable_debug_logging = false; // Enable debug logging 19 | 20 | // USRBIO related parameters 21 | bool use_usrbio = true; // Whether to use USRBIO API 22 | size_t iov_size = 1024 * 1024; // Shared memory size (1MB) 23 | size_t ior_entries = 1024; // Maximum number of requests in IO ring 24 | //`0` for no control with I/O depth. 25 | // If greater than 0, then only when `io_depth` I/O requests are in queue, they will be issued to server as a batch. 26 | // If smaller than 0, then USRBIO will wait for at most `-io_depth` I/O requests are in queue and issue them in one batch. 27 | // If io_depth is 0, then USRBIO will issue all the prepared I/O requests to server ASAP. 28 | size_t io_depth = 0; // IO batch processing depth 29 | int ior_timeout = 0; // IO timeout (milliseconds) 30 | 31 | // Read parameters from file opener 32 | static ThreeFSParams ReadFrom(optional_ptr opener); 33 | }; 34 | 35 | 36 | class ThreeFSFileSystem : public FileSystem { 37 | public: 38 | explicit ThreeFSFileSystem(BufferManager &) { 39 | // Initialize global default parameters 40 | params.cluster_name = ""; 41 | params.mount_root = "/"; 42 | params.use_usrbio = true; 43 | } 44 | 45 | // Verify that 3FS library is available 46 | static void Verify() { 47 | // Add verification logic here to ensure 3FS library is available 48 | // Throw an exception if there's a problem 49 | } 50 | 51 | unique_ptr OpenFile( 52 | const string &path, FileOpenFlags flags, 53 | optional_ptr opener = nullptr) override; 54 | 55 | //! Read exactly nr_bytes from the specified location in the file. Fails if 56 | //! nr_bytes could not be read. This is equivalent to calling 57 | //! SetFilePointer(location) followed by calling Read(). 58 | void Read(FileHandle &handle, void *buffer, int64_t nr_bytes, 59 | idx_t location) override; 60 | //! Write exactly nr_bytes to the specified location in the file. Fails if 61 | //! nr_bytes could not be written. This is equivalent to calling 62 | //! SetFilePointer(location) followed by calling Write(). 63 | void Write(FileHandle &handle, void *buffer, int64_t nr_bytes, 64 | idx_t location) override; 65 | //! Read nr_bytes from the specified file into the buffer, moving the file 66 | //! pointer forward by nr_bytes. Returns the amount of bytes read. 67 | int64_t Read(FileHandle &handle, void *buffer, int64_t nr_bytes) override; 68 | //! Write nr_bytes from the buffer into the file, moving the file pointer 69 | //! forward by nr_bytes. 70 | int64_t Write(FileHandle &handle, void *buffer, int64_t nr_bytes) override; 71 | //! Excise a range of the file. The file-system is free to deallocate this 72 | //! range (sparse file support). Reads to the range will succeed but will 73 | //! return undefined data. 74 | bool Trim(FileHandle &handle, idx_t offset_bytes, 75 | idx_t length_bytes) override; 76 | 77 | //! Returns the file size of a file handle, returns -1 on error 78 | int64_t GetFileSize(FileHandle &handle) override; 79 | //! Returns the file last modified time of a file handle, returns timespec 80 | //! with zero on all attributes on error 81 | time_t GetLastModifiedTime(FileHandle &handle) override; 82 | //! Returns the file last modified time of a file handle, returns timespec 83 | //! with zero on all attributes on error 84 | FileType GetFileType(FileHandle &handle) override; 85 | //! Truncate a file to a maximum size of new_size, new_size should be smaller 86 | //! than or equal to the current size of the file 87 | void Truncate(FileHandle &handle, int64_t new_size) override; 88 | 89 | //! Check if a directory exists 90 | bool DirectoryExists(const string &directory, 91 | optional_ptr opener = nullptr) override; 92 | //! Create a directory if it does not exist 93 | void CreateDirectory(const string &directory, 94 | optional_ptr opener = nullptr) override; 95 | //! Recursively remove a directory and all files in it 96 | void RemoveDirectory(const string &directory, 97 | optional_ptr opener = nullptr) override; 98 | //! List files in a directory, invoking the callback method for each one with 99 | //! (filename, is_dir) 100 | bool ListFiles(const string &directory, 101 | const std::function &callback, 102 | FileOpener *opener = nullptr) override; 103 | //! Move a file from source path to the target, StorageManager relies on this 104 | //! being an atomic action for ACID properties 105 | void MoveFile(const string &source, const string &target, 106 | optional_ptr opener = nullptr) override; 107 | //! Check if a file exists 108 | bool FileExists(const string &filename, 109 | optional_ptr opener = nullptr) override; 110 | 111 | //! Check if path is a pipe 112 | bool IsPipe(const string &filename, 113 | optional_ptr opener = nullptr) override; 114 | //! Remove a file from disk 115 | void RemoveFile(const string &filename, 116 | optional_ptr opener = nullptr) override; 117 | //! Sync a file handle to disk 118 | void FileSync(FileHandle &handle) override; 119 | 120 | //! Runs a glob on the file system, returning a list of matching files 121 | vector Glob(const string &path, 122 | FileOpener *opener = nullptr) override; 123 | 124 | bool CanHandleFile(const string &fpath) override; 125 | 126 | //! Set the file pointer of a file handle to a specified location. Reads and 127 | //! writes will happen from this location 128 | void Seek(FileHandle &handle, idx_t location) override; 129 | //! Return the current seek posiiton in the file. 130 | idx_t SeekPosition(FileHandle &handle) override; 131 | 132 | //! Whether or not we can seek into the file 133 | bool CanSeek() override; 134 | //! Whether or not the FS handles plain files on disk. This is relevant for 135 | //! certain optimizations, as random reads in a file on-disk are much cheaper 136 | //! than e.g. random reads in a file over the network 137 | bool OnDiskFile(FileHandle &handle) override; 138 | 139 | std::string GetName() const override { return "3fs"; } 140 | 141 | //! Returns the last Win32 error, in string format. Returns an empty string if 142 | //! there is no error, or on non-Windows systems. 143 | static std::string GetLastErrorAsString(); 144 | 145 | //! Checks a file is private (checks for 600 on linux/macos, TODO: currently 146 | //! always returns true on windows) 147 | static bool IsPrivateFile(const string &path_p, FileOpener *opener); 148 | 149 | // returns a C-string of the path that trims any file:/ prefix 150 | static const char *NormalizeLocalPath(const string &path); 151 | 152 | private: 153 | //! Set the file pointer of a file handle to a specified location. Reads and 154 | //! writes will happen from this location 155 | void SetFilePointer(FileHandle &handle, idx_t location); 156 | idx_t GetFilePointer(FileHandle &handle); 157 | 158 | vector FetchFileWithoutGlob(const string &path, FileOpener *opener, 159 | bool absolute_path); 160 | int64_t ReadImpl(FileHandle &handle, void *buffer, int64_t nr_bytes, idx_t location); 161 | 162 | ThreeFSParams params; 163 | }; 164 | 165 | // Define ThreeFSExtension class 166 | class ThreeFSExtension : public Extension { 167 | public: 168 | void Load(DuckDB &db) override; 169 | std::string Name() override; 170 | ~ThreeFSExtension() override; 171 | }; 172 | 173 | } // namespace duckdb -------------------------------------------------------------------------------- /src/threefs.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include // For time() 8 | 9 | // Include 3fs headers 10 | #include "lib/api/hf3fs_usrbio.h" 11 | 12 | // macOS uses sys/mount.h instead of sys/vfs.h 13 | #if defined(__APPLE__) 14 | #include 15 | #define HAVE_SYS_MOUNT_H 16 | #endif 17 | 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | 25 | #include "duckdb/common/checksum.hpp" 26 | #include "duckdb/common/exception.hpp" 27 | #include "duckdb/common/file_opener.hpp" 28 | #include "duckdb/common/helper.hpp" 29 | #include "duckdb/common/local_file_system.hpp" 30 | #include "duckdb/common/string_util.hpp" 31 | #include "duckdb/common/windows.hpp" 32 | #include "duckdb/logging/logger.hpp" 33 | #include "duckdb/main/client_context.hpp" 34 | #include "duckdb/main/database.hpp" 35 | 36 | // includes for giving a better error message on lock conflicts 37 | #if defined(__linux__) || defined(__APPLE__) 38 | #include 39 | #endif 40 | 41 | #if defined(__linux__) 42 | // See https://man7.org/linux/man-pages/man2/fallocate.2.html 43 | #ifndef _GNU_SOURCE 44 | #define _GNU_SOURCE /* See feature_test_macros(7) */ 45 | #endif 46 | #include 47 | #include 48 | // See e.g.: 49 | // https://opensource.apple.com/source/CarbonHeaders/CarbonHeaders-18.1/TargetConditionals.h.auto.html 50 | #elif defined(__APPLE__) 51 | #include 52 | #include 53 | #endif 54 | 55 | #include "include/threefs.hpp" 56 | #include "lib/api/hf3fs.h" 57 | 58 | // Define HAVE_SYS_MOUNT_H macro before including 3FS headers so that 3FS headers can properly handle macOS 59 | #if defined(__APPLE__) 60 | #ifndef HAVE_SYS_MOUNT_H 61 | #define HAVE_SYS_MOUNT_H 62 | #endif 63 | #endif 64 | 65 | // Simplified implementation of Glob function 66 | // Referenced from DuckDB's Glob function implementation 67 | // Placed in the global namespace so that it can be called from other places 68 | static bool Glob(const char *string, duckdb::idx_t slen, const char *pattern, duckdb::idx_t plen) { 69 | // Check special cases 70 | if (plen == 0) { 71 | return slen == 0; 72 | } 73 | 74 | if (pattern[0] == '*') { 75 | // Handle '*' wildcard 76 | pattern++, plen--; 77 | // Try to skip the current character and see if the rest matches 78 | for (duckdb::idx_t i = 0; i <= slen; i++) { 79 | if (Glob(string + i, slen - i, pattern, plen)) { 80 | return true; 81 | } 82 | } 83 | return false; 84 | } else { 85 | // Handle regular character matching 86 | if (slen == 0) { 87 | return false; 88 | } 89 | if (*string != *pattern) { 90 | return false; 91 | } 92 | return Glob(string + 1, slen - 1, pattern + 1, plen - 1); 93 | } 94 | } 95 | 96 | namespace duckdb { 97 | 98 | ThreeFSParams ThreeFSParams::ReadFrom(optional_ptr opener) 99 | { 100 | ThreeFSParams params; 101 | 102 | if (!opener) { 103 | return params; 104 | } 105 | 106 | // Set cluster name 107 | Value value; 108 | if (opener->TryGetCurrentSetting("threefs_cluster", value)) { 109 | params.cluster_name = value.ToString(); 110 | } 111 | 112 | // Set mount root directory 113 | if (opener->TryGetCurrentSetting("threefs_mount_root", value)) { 114 | params.mount_root = value.ToString(); 115 | } 116 | 117 | // Set enable debug logging 118 | if (opener->TryGetCurrentSetting("threefs_enable_debug_logging", value)) { 119 | params.enable_debug_logging = value.GetValue(); 120 | } 121 | 122 | // Set whether to use USRBIO 123 | if (opener->TryGetCurrentSetting("threefs_use_usrbio", value)) { 124 | params.use_usrbio = value.GetValue(); 125 | } 126 | 127 | // Set shared memory size 128 | if (opener->TryGetCurrentSetting("threefs_iov_size", value)) { 129 | params.iov_size = value.GetValue(); 130 | } 131 | 132 | // Set maximum number of IO ring requests 133 | if (opener->TryGetCurrentSetting("threefs_ior_entries", value)) { 134 | params.ior_entries = value.GetValue(); 135 | } 136 | 137 | // Set IO batch processing depth 138 | if (opener->TryGetCurrentSetting("threefs_io_depth", value)) { 139 | params.io_depth = value.GetValue(); 140 | } 141 | 142 | // Set IO timeout 143 | if (opener->TryGetCurrentSetting("threefs_ior_timeout", value)) { 144 | params.ior_timeout = value.GetValue(); 145 | } 146 | 147 | return params; 148 | } 149 | 150 | class USRBIOResourceManager { 151 | public: 152 | // Singleton pattern to get instance 153 | static USRBIOResourceManager *instance; 154 | 155 | USRBIOResourceManager() {} 156 | // Get current thread's USRBIO resources 157 | struct ThreadUSRBIOResource *GetThreadResource(const ThreeFSParams ¶ms); 158 | 159 | // Global resource cleanup 160 | ~USRBIOResourceManager(); 161 | 162 | private: 163 | USRBIOResourceManager(const USRBIOResourceManager &) = delete; 164 | USRBIOResourceManager &operator=(const USRBIOResourceManager &) = delete; 165 | 166 | // Thread resources map protection lock 167 | std::mutex resource_map_mutex; 168 | 169 | // ThreadID to resource mapping 170 | std::unordered_map 171 | thread_resources; 172 | }; 173 | 174 | // Thread level USRBIO resource structure 175 | struct ThreadUSRBIOResource { 176 | // USRBIO resources 177 | struct hf3fs_iov iov; 178 | struct hf3fs_ior ior_read; 179 | struct hf3fs_ior ior_write; 180 | 181 | // Resource initialization status 182 | bool initialized; 183 | 184 | // Resource belongs to parameters 185 | ThreeFSParams params; 186 | 187 | ThreadUSRBIOResource() : initialized(false) {} 188 | 189 | // Initialize resource 190 | bool Initialize(const ThreeFSParams ¶ms); 191 | 192 | // Cleanup resource 193 | void Cleanup(); 194 | 195 | ~ThreadUSRBIOResource() { Cleanup(); } 196 | }; 197 | 198 | // threefs.cpp 中添加 199 | bool ThreadUSRBIOResource::Initialize(const ThreeFSParams ¶ms) { 200 | if (initialized) { 201 | return true; 202 | } 203 | 204 | this->params = params; 205 | 206 | // Create shared memory 207 | int ret = 208 | hf3fs_iovcreate(&iov, params.mount_root.c_str(), params.iov_size, 0, -1); 209 | if (ret < 0) { 210 | return false; 211 | } 212 | 213 | // Create read I/O ring 214 | ret = 215 | hf3fs_iorcreate4(&ior_read, params.mount_root.c_str(), params.ior_entries, 216 | true, params.io_depth, params.ior_timeout, -1, 0); 217 | if (ret < 0) { 218 | hf3fs_iovdestroy(&iov); 219 | return false; 220 | } 221 | 222 | // Create write I/O ring 223 | ret = hf3fs_iorcreate4(&ior_write, params.mount_root.c_str(), 224 | params.ior_entries, false, params.io_depth, 225 | params.ior_timeout, -1, 0); 226 | if (ret < 0) { 227 | hf3fs_iordestroy(&ior_read); 228 | hf3fs_iovdestroy(&iov); 229 | return false; 230 | } 231 | 232 | initialized = true; 233 | return true; 234 | } 235 | 236 | void ThreadUSRBIOResource::Cleanup() { 237 | if (!initialized) { 238 | return; 239 | } 240 | 241 | // Destroy USRBIO resources 242 | hf3fs_iordestroy(&ior_write); 243 | hf3fs_iordestroy(&ior_read); 244 | hf3fs_iovdestroy(&iov); 245 | 246 | initialized = false; 247 | } 248 | 249 | // Resource manager implementation 250 | struct ThreadUSRBIOResource *USRBIOResourceManager::GetThreadResource( 251 | const ThreeFSParams ¶ms) { 252 | std::thread::id thread_id = std::this_thread::get_id(); 253 | 254 | { 255 | std::lock_guard lock(resource_map_mutex); 256 | 257 | // Find if current thread already has resources 258 | auto it = thread_resources.find(thread_id); 259 | if (it != thread_resources.end()) { 260 | return it->second; 261 | } 262 | 263 | // Create new thread resources 264 | ThreadUSRBIOResource *resource = new ThreadUSRBIOResource(); 265 | if (!resource->Initialize(params)) { 266 | delete resource; 267 | return nullptr; 268 | } 269 | 270 | // Store resource mapping 271 | thread_resources[thread_id] = resource; 272 | return resource; 273 | } 274 | } 275 | 276 | USRBIOResourceManager::~USRBIOResourceManager() { 277 | // Clean up all thread resources 278 | for (auto &pair : thread_resources) { 279 | delete pair.second; 280 | } 281 | thread_resources.clear(); 282 | } 283 | 284 | // 添加静态成员变量的定义 285 | USRBIOResourceManager* USRBIOResourceManager::instance = nullptr; 286 | 287 | struct ThreeFSFileHandle : public FileHandle { 288 | public: 289 | ThreeFSFileHandle(FileSystem &file_system, string &path, int fd, 290 | FileOpenFlags flags, ThreeFSParams params, bool append) 291 | : FileHandle(file_system, std::move(path), flags), 292 | fd(fd), 293 | params(params), 294 | is_append(append) {} 295 | ~ThreeFSFileHandle() override { ThreeFSFileHandle::Close(); } 296 | 297 | int fd; 298 | idx_t current_pos = 0; 299 | bool is_append = false; 300 | ThreeFSParams params; 301 | 302 | void Close() override { 303 | if (fd != -1) { 304 | if (params.enable_debug_logging) { 305 | fprintf(stderr, "Closing file handle: %s, fd: %d\n", path.c_str(), fd); 306 | } 307 | hf3fs_dereg_fd(fd); 308 | close(fd); 309 | fd = -1; 310 | } 311 | }; 312 | }; 313 | 314 | 315 | bool ThreeFSFileSystem::FileExists(const string &filename, 316 | optional_ptr opener) { 317 | if (!filename.empty()) { 318 | auto normalized_file = NormalizeLocalPath(filename); 319 | if (access(normalized_file, 0) == 0) { 320 | struct stat status; 321 | stat(normalized_file, &status); 322 | if (S_ISREG(status.st_mode)) { 323 | return true; 324 | } 325 | } 326 | } 327 | // if any condition fails 328 | return false; 329 | } 330 | 331 | bool ThreeFSFileSystem::IsPipe(const string &filename, 332 | optional_ptr opener) { 333 | if (!filename.empty()) { 334 | auto normalized_file = NormalizeLocalPath(filename); 335 | if (access(normalized_file, 0) == 0) { 336 | struct stat status; 337 | stat(normalized_file, &status); 338 | if (S_ISFIFO(status.st_mode)) { 339 | return true; 340 | } 341 | } 342 | } 343 | // if any condition fails 344 | return false; 345 | } 346 | 347 | 348 | static FileType GetFileTypeInternal(int fd) { // LCOV_EXCL_START 349 | struct stat s; 350 | if (fstat(fd, &s) == -1) { 351 | return FileType::FILE_TYPE_INVALID; 352 | } 353 | switch (s.st_mode & S_IFMT) { 354 | case S_IFBLK: 355 | return FileType::FILE_TYPE_BLOCKDEV; 356 | case S_IFCHR: 357 | return FileType::FILE_TYPE_CHARDEV; 358 | case S_IFIFO: 359 | return FileType::FILE_TYPE_FIFO; 360 | case S_IFDIR: 361 | return FileType::FILE_TYPE_DIR; 362 | case S_IFLNK: 363 | return FileType::FILE_TYPE_LINK; 364 | case S_IFREG: 365 | return FileType::FILE_TYPE_REGULAR; 366 | case S_IFSOCK: 367 | return FileType::FILE_TYPE_SOCKET; 368 | default: 369 | return FileType::FILE_TYPE_INVALID; 370 | } 371 | } // LCOV_EXCL_STOP 372 | 373 | bool ThreeFSFileSystem::IsPrivateFile(const string &path_p, 374 | FileOpener *opener) { 375 | auto path = FileSystem::ExpandPath(path_p, opener); 376 | auto normalized_path = NormalizeLocalPath(path); 377 | 378 | struct stat st; 379 | 380 | if (lstat(normalized_path, &st) != 0) { 381 | throw IOException( 382 | "Failed to stat '%s' when checking file permissions, file may be " 383 | "missing or have incorrect permissions", 384 | path.c_str()); 385 | } 386 | 387 | // If group or other have any permission, the file is not private 388 | if (st.st_mode & 389 | (S_IRGRP | S_IWGRP | S_IXGRP | S_IROTH | S_IWOTH | S_IXOTH)) { 390 | return false; 391 | } 392 | 393 | return true; 394 | } 395 | 396 | unique_ptr ThreeFSFileSystem::OpenFile( 397 | const string &path_p, FileOpenFlags flags, 398 | optional_ptr opener) { 399 | auto path = FileSystem::ExpandPath(path_p, opener); 400 | auto normalized_path = NormalizeLocalPath(path); 401 | if (flags.Compression() != FileCompressionType::UNCOMPRESSED) { 402 | throw NotImplementedException( 403 | "Unsupported compression type for default file system"); 404 | } 405 | 406 | flags.Verify(); 407 | 408 | int open_flags = 0; 409 | int rc; 410 | bool open_read = flags.OpenForReading(); 411 | bool open_write = flags.OpenForWriting(); 412 | if (open_read && open_write) { 413 | open_flags = O_RDWR; 414 | } else if (open_read) { 415 | open_flags = O_RDONLY; 416 | } else if (open_write) { 417 | open_flags = O_WRONLY; 418 | } else { 419 | throw InternalException( 420 | "READ, WRITE or both should be specified when opening a file"); 421 | } 422 | if (open_write) { 423 | // need Read or Write 424 | D_ASSERT(flags.OpenForWriting()); 425 | open_flags |= O_CLOEXEC; 426 | if (flags.CreateFileIfNotExists()) { 427 | open_flags |= O_CREAT; 428 | } else if (flags.OverwriteExistingFile()) { 429 | open_flags |= O_CREAT | O_TRUNC; 430 | } 431 | if (flags.OpenForAppending()) { 432 | open_flags |= O_APPEND; 433 | } 434 | } 435 | if (flags.DirectIO()) { 436 | #if defined(__DARWIN__) || defined(__APPLE__) || defined(__OpenBSD__) 437 | // OSX does not have O_DIRECT, instead we need to use fcntl afterwards to 438 | // support direct IO 439 | #else 440 | open_flags |= O_DIRECT; 441 | #endif 442 | } 443 | 444 | // Determine permissions 445 | mode_t filesec; 446 | if (flags.CreatePrivateFile()) { 447 | open_flags |= O_EXCL; // Ensure we error on existing files or the 448 | // permissions may not set 449 | filesec = 0600; 450 | } else { 451 | filesec = 0666; 452 | } 453 | 454 | if (flags.ExclusiveCreate()) { 455 | open_flags |= O_EXCL; 456 | } 457 | 458 | // Open the file 459 | int fd = open(normalized_path, open_flags, filesec); 460 | 461 | if (fd == -1) { 462 | if (flags.ReturnNullIfNotExists() && errno == ENOENT) { 463 | return nullptr; 464 | } 465 | if (flags.ReturnNullIfExists() && errno == EEXIST) { 466 | return nullptr; 467 | } 468 | throw IOException("Cannot open file \"%s\": %s", 469 | {{"errno", std::to_string(errno)}}, path, 470 | strerror(errno)); 471 | } 472 | 473 | #if defined(__DARWIN__) || defined(__APPLE__) 474 | if (flags.DirectIO()) { 475 | // OSX requires fcntl for Direct IO 476 | rc = fcntl(fd, F_NOCACHE, 1); 477 | if (rc == -1) { 478 | throw IOException("Could not enable direct IO for file \"%s\": %s", path, 479 | strerror(errno)); 480 | } 481 | } 482 | #endif 483 | 484 | if (flags.Lock() != FileLockType::NO_LOCK) { 485 | // set lock on file 486 | // but only if it is not an input/output stream 487 | auto file_type = GetFileTypeInternal(fd); 488 | if (file_type != FileType::FILE_TYPE_FIFO && 489 | file_type != FileType::FILE_TYPE_SOCKET) { 490 | struct flock fl; 491 | memset(&fl, 0, sizeof fl); 492 | fl.l_type = flags.Lock() == FileLockType::READ_LOCK ? F_RDLCK : F_WRLCK; 493 | fl.l_whence = SEEK_SET; 494 | fl.l_start = 0; 495 | fl.l_len = 0; 496 | rc = fcntl(fd, F_SETLK, &fl); 497 | // Retain the original error. 498 | int retained_errno = errno; 499 | bool has_error = rc == -1; 500 | string extended_error; 501 | if (has_error) { 502 | if (retained_errno == ENOTSUP) { 503 | // file lock not supported for this file system 504 | if (flags.Lock() == FileLockType::READ_LOCK) { 505 | // for read-only, we ignore not-supported errors 506 | has_error = false; 507 | errno = 0; 508 | } else { 509 | extended_error = 510 | "File locks are not supported for this file system, cannot " 511 | "open the file in " 512 | "read-write mode. Try opening the file in read-only mode"; 513 | } 514 | } 515 | } 516 | if (has_error) { 517 | if (extended_error.empty()) { 518 | // try to find out who is holding the lock using F_GETLK 519 | rc = fcntl(fd, F_GETLK, &fl); 520 | if (rc == -1) { // fnctl does not want to help us 521 | extended_error = strerror(errno); 522 | } 523 | if (flags.Lock() == FileLockType::WRITE_LOCK) { 524 | // maybe we can get a read lock instead and tell this to the user. 525 | fl.l_type = F_RDLCK; 526 | rc = fcntl(fd, F_SETLK, &fl); 527 | if (rc != -1) { // success! 528 | extended_error += 529 | ". However, you would be able to open this database in " 530 | "read-only mode, e.g. by " 531 | "using the -readonly parameter in the CLI"; 532 | } 533 | } 534 | } 535 | rc = close(fd); 536 | if (rc == -1) { 537 | extended_error += ". Also, failed closing file"; 538 | } 539 | extended_error += 540 | ". See also https://duckdb.org/docs/connect/concurrency"; 541 | throw IOException("Could not set lock on file \"%s\": %s", 542 | {{"errno", std::to_string(retained_errno)}}, path, 543 | extended_error); 544 | } 545 | } 546 | } 547 | 548 | // Register file descriptor 549 | rc = hf3fs_reg_fd(fd, 0); 550 | // hf3fs_reg_fd indicates if the file descriptor less than 0, it means the file descriptor 551 | // is registered 552 | if (rc > 0) { 553 | throw IOException("Failed to register file descriptor: " + std::to_string(fd), 554 | {{"errno", std::to_string(rc)}}); 555 | } 556 | 557 | ThreeFSParams params = ThreeFSParams::ReadFrom(opener); 558 | if (params.enable_debug_logging) { 559 | fprintf(stderr, "OpenFile: File handle: %s, fd: %d, is_append: %d\n", path.c_str(), fd, open_flags & O_APPEND); 560 | } 561 | return make_uniq(*this, path, fd, flags, params, open_flags & O_APPEND); 562 | } 563 | 564 | void ThreeFSFileSystem::SetFilePointer(FileHandle &handle, idx_t location) { 565 | int fd = handle.Cast().fd; 566 | off_t offset = lseek(fd, UnsafeNumericCast(location), SEEK_SET); 567 | if (offset == (off_t)-1) { 568 | throw IOException("Could not seek to location %lu for file \"%s\": %s", 569 | {{"errno", std::to_string(errno)}}, location, handle.path, 570 | strerror(errno)); 571 | } 572 | } 573 | 574 | idx_t ThreeFSFileSystem::GetFilePointer(FileHandle &handle) { 575 | int fd = handle.Cast().fd; 576 | off_t position = lseek(fd, 0, SEEK_CUR); 577 | if (position == (off_t)-1) { 578 | throw IOException("Could not get file position file \"%s\": %s", 579 | {{"errno", std::to_string(errno)}}, handle.path, 580 | strerror(errno)); 581 | } 582 | return UnsafeNumericCast(position); 583 | } 584 | 585 | // ReadImpl is the internal implementation of Read, it's used to handle the case when the file is read from a specific location 586 | // This funcation won't modify the file descriptor position 587 | int64_t ThreeFSFileSystem::ReadImpl(FileHandle &handle, void *buffer, int64_t nr_bytes, idx_t location) { 588 | auto &threefs_handle = handle.Cast(); 589 | // Get current thread's USRBIO resources 590 | ThreadUSRBIOResource *resource = 591 | USRBIOResourceManager::instance->GetThreadResource( 592 | threefs_handle.params); 593 | if (!resource || !resource->initialized) { 594 | throw IOException("Read: Failed to initialize USRBIO for thread"); 595 | } 596 | 597 | uint8_t *buf_ptr = static_cast(buffer); 598 | int64_t bytes_remaining = nr_bytes; 599 | idx_t current_offset = location; 600 | if (threefs_handle.params.enable_debug_logging) { 601 | fprintf(stderr, "Read: File handle: %s, location: %lu, nr_bytes: %ld\n", threefs_handle.path.c_str(), current_offset, nr_bytes); 602 | } 603 | 604 | // Block processing large data 605 | while (bytes_remaining > 0) { 606 | // Determine current block size 607 | size_t current_chunk_size = 608 | std::min(bytes_remaining, resource->params.iov_size); 609 | 610 | // Prepare I/O request 611 | if (threefs_handle.params.enable_debug_logging) { 612 | fprintf(stderr, "Prepare read I/O request for file %s, location: %lu, nr_bytes: %ld\n", threefs_handle.path.c_str(), current_offset, current_chunk_size); 613 | } 614 | int ret = 615 | hf3fs_prep_io(&resource->ior_read, &resource->iov, 616 | true, // Read operation 617 | resource->iov.base, // Use shared memory 618 | threefs_handle.fd, current_offset, current_chunk_size, 619 | nullptr // User data 620 | ); 621 | 622 | if (ret < 0) { 623 | throw IOException("Failed to prepare read I/O: " + std::to_string(-ret)); 624 | } 625 | 626 | // Submit I/O request 627 | ret = hf3fs_submit_ios(&resource->ior_read); 628 | if (ret < 0) { 629 | throw IOException("Failed to submit read I/O: " + std::to_string(-ret)); 630 | } 631 | 632 | // Wait for I/O to complete 633 | struct hf3fs_cqe cqes[1]; 634 | ret = hf3fs_wait_for_ios(&resource->ior_read, cqes, 1, 1, nullptr); 635 | if (ret < 0) { 636 | throw IOException("Failed to wait for read I/O: " + std::to_string(-ret)); 637 | } 638 | 639 | // Check completion status 640 | if (cqes[0].result < 0) { 641 | throw IOException("Read I/O failed: " + std::to_string(-cqes[0].result)); 642 | } 643 | 644 | // Get actual read bytes 645 | size_t bytes_read = cqes[0].result; 646 | if (bytes_read == 0 && bytes_remaining > 0) { 647 | // Reached end of file 648 | break; 649 | } 650 | 651 | // Copy data from shared memory to user buffer 652 | memcpy(buf_ptr, resource->iov.base, bytes_read); 653 | 654 | // Update pointer and count 655 | buf_ptr += bytes_read; 656 | bytes_remaining -= bytes_read; 657 | current_offset += bytes_read; 658 | 659 | // If read bytes are less than requested bytes, it means end of file 660 | if (bytes_read < current_chunk_size) { 661 | break; 662 | } 663 | } 664 | 665 | if (threefs_handle.params.enable_debug_logging) { 666 | fprintf(stderr, "Successfully read %ld bytes from offset %lu of file %s\n", nr_bytes - bytes_remaining, location, threefs_handle.path.c_str()); 667 | } 668 | return nr_bytes - bytes_remaining; 669 | } 670 | 671 | int64_t ThreeFSFileSystem::Read(FileHandle &handle, void *buffer, int64_t nr_bytes) { 672 | auto &threefs_handle = handle.Cast(); 673 | idx_t location = threefs_handle.current_pos; 674 | try { 675 | auto bytes_read = ReadImpl(threefs_handle, buffer, nr_bytes, location); 676 | threefs_handle.current_pos += bytes_read; 677 | return bytes_read; 678 | } catch (const IOException &e) { 679 | throw; 680 | } 681 | } 682 | 683 | void ThreeFSFileSystem::Read(FileHandle &handle, void *buffer, int64_t nr_bytes, idx_t location) { 684 | auto &threefs_handle = handle.Cast(); 685 | 686 | try { 687 | ReadImpl(threefs_handle, buffer, nr_bytes, location); 688 | } catch (const IOException &e) { 689 | throw; 690 | } 691 | } 692 | 693 | 694 | // This funcation won't modify the file descriptor position 695 | void ThreeFSFileSystem::Write(FileHandle &handle, void *buffer, 696 | int64_t nr_bytes, idx_t location) { 697 | auto &threefs_handle = handle.Cast(); 698 | 699 | // Get current thread's USRBIO resources 700 | ThreadUSRBIOResource *resource = 701 | USRBIOResourceManager::instance->GetThreadResource( 702 | threefs_handle.params); 703 | if (!resource || !resource->initialized) { 704 | throw IOException("Write: Failed to initialize USRBIO for thread"); 705 | } 706 | 707 | const uint8_t *buf_ptr = static_cast(buffer); 708 | int64_t bytes_remaining = nr_bytes; 709 | idx_t current_offset = location; 710 | if (threefs_handle.is_append) { 711 | current_offset = GetFileSize(handle); 712 | } 713 | 714 | if (threefs_handle.params.enable_debug_logging) { 715 | fprintf(stderr, "Write: File handle: %s, location: %lu, nr_bytes: %ld\n", threefs_handle.path.c_str(), current_offset, nr_bytes); 716 | } 717 | 718 | // Block processing large data 719 | while (bytes_remaining > 0) { 720 | // Determine current block size 721 | size_t current_chunk_size = 722 | std::min(bytes_remaining, resource->params.iov_size); 723 | 724 | // Copy data to shared memory 725 | memcpy(resource->iov.base, buf_ptr, current_chunk_size); 726 | 727 | // Prepare I/O request 728 | if (threefs_handle.params.enable_debug_logging) { 729 | fprintf(stderr, "Prepare write I/O request for file %s, location: %lu, nr_bytes: %ld\n", threefs_handle.path.c_str(), current_offset, current_chunk_size); 730 | } 731 | 732 | int ret = 733 | hf3fs_prep_io(&resource->ior_write, &resource->iov, 734 | false, // Write operation 735 | resource->iov.base, // Use shared memory 736 | threefs_handle.fd, current_offset, current_chunk_size, 737 | nullptr // User data 738 | ); 739 | 740 | if (ret < 0) { 741 | throw IOException("Failed to prepare write I/O: " + std::to_string(-ret)); 742 | } 743 | 744 | // Submit I/O request 745 | ret = hf3fs_submit_ios(&resource->ior_write); 746 | 747 | if (ret < 0) { 748 | throw IOException("Failed to submit write I/O: " + std::to_string(-ret)); 749 | } 750 | 751 | // Wait for I/O to complete 752 | struct hf3fs_cqe cqes[1]; 753 | ret = hf3fs_wait_for_ios(&resource->ior_write, cqes, 1, 1, nullptr); 754 | if (ret < 0) { 755 | throw IOException("Failed to wait for write I/O: " + 756 | std::to_string(-ret)); 757 | } 758 | 759 | // Check completion status 760 | if (cqes[0].result < 0) { 761 | throw IOException("Write I/O failed: " + std::to_string(-cqes[0].result)); 762 | } 763 | 764 | // Get actual written bytes 765 | size_t bytes_written = cqes[0].result; 766 | if (bytes_written != current_chunk_size) { 767 | throw IOException("Could not write all bytes to file \"" + 768 | threefs_handle.path + "\": wrote " + 769 | std::to_string(bytes_written) + "/" + 770 | std::to_string(current_chunk_size) + " bytes"); 771 | } 772 | 773 | // Update pointer and count 774 | buf_ptr += bytes_written; 775 | bytes_remaining -= bytes_written; 776 | current_offset += bytes_written; 777 | } 778 | 779 | if (threefs_handle.params.enable_debug_logging) { 780 | fprintf(stderr, "Successfully written %ld bytes to offset %lu of file %s\n", nr_bytes - bytes_remaining, location, threefs_handle.path.c_str()); 781 | } 782 | } 783 | 784 | int64_t ThreeFSFileSystem::Write(FileHandle &handle, void *buffer, 785 | int64_t nr_bytes) { 786 | auto &threefs_handle = handle.Cast(); 787 | 788 | Write(handle, buffer, nr_bytes, threefs_handle.current_pos); 789 | // Follow posix standard, if the file is opened for appending, the file pointer won't be updated 790 | if (!threefs_handle.is_append) { 791 | threefs_handle.current_pos += nr_bytes; 792 | } 793 | 794 | // Return actual written bytes 795 | return nr_bytes; 796 | } 797 | 798 | bool ThreeFSFileSystem::Trim(FileHandle &handle, idx_t offset_bytes, 799 | idx_t length_bytes) { 800 | #if defined(__linux__) 801 | // FALLOC_FL_PUNCH_HOLE requires glibc 2.18 or up 802 | #if __GLIBC__ < 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ < 18) 803 | return false; 804 | #else 805 | int fd = handle.Cast().fd; 806 | int res = fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 807 | UnsafeNumericCast(offset_bytes), 808 | UnsafeNumericCast(length_bytes)); 809 | return res == 0; 810 | #endif 811 | #else 812 | return false; 813 | #endif 814 | } 815 | 816 | int64_t ThreeFSFileSystem::GetFileSize(FileHandle &handle) { 817 | int fd = handle.Cast().fd; 818 | struct stat s; 819 | if (fstat(fd, &s) == -1) { 820 | throw IOException("Failed to get file size for file \"%s\": %s", 821 | {{"errno", std::to_string(errno)}}, handle.path, 822 | strerror(errno)); 823 | } 824 | return s.st_size; 825 | } 826 | 827 | time_t ThreeFSFileSystem::GetLastModifiedTime(FileHandle &handle) { 828 | int fd = handle.Cast().fd; 829 | struct stat s; 830 | if (fstat(fd, &s) == -1) { 831 | throw IOException("Failed to get last modified time for file \"%s\": %s", 832 | {{"errno", std::to_string(errno)}}, handle.path, 833 | strerror(errno)); 834 | } 835 | return s.st_mtime; 836 | } 837 | 838 | FileType ThreeFSFileSystem::GetFileType(FileHandle &handle) { 839 | int fd = handle.Cast().fd; 840 | return GetFileTypeInternal(fd); 841 | } 842 | 843 | void ThreeFSFileSystem::Truncate(FileHandle &handle, int64_t new_size) { 844 | auto &threefs_handle = handle.Cast(); 845 | int fd = threefs_handle.fd; 846 | if (threefs_handle.params.enable_debug_logging) { 847 | fprintf(stderr, "Truncate: File handle: %s, new_size: %ld\n", handle.path.c_str(), new_size); 848 | } 849 | if (ftruncate(fd, new_size) != 0) { 850 | throw IOException("Could not truncate file \"%s\": %s", 851 | {{"errno", std::to_string(errno)}}, handle.path, 852 | strerror(errno)); 853 | } 854 | } 855 | 856 | bool ThreeFSFileSystem::DirectoryExists(const string &directory, 857 | optional_ptr opener) { 858 | ThreeFSParams params = ThreeFSParams::ReadFrom(opener); 859 | if (params.enable_debug_logging) { 860 | fprintf(stderr, "DirectoryExists: directory: %s\n", directory.c_str()); 861 | } 862 | if (!directory.empty()) { 863 | auto normalized_dir = NormalizeLocalPath(directory); 864 | if (access(normalized_dir, 0) == 0) { 865 | struct stat status; 866 | stat(normalized_dir, &status); 867 | if (status.st_mode & S_IFDIR) { 868 | return true; 869 | } 870 | } 871 | } 872 | // if any condition fails 873 | return false; 874 | } 875 | 876 | void ThreeFSFileSystem::CreateDirectory(const string &directory, 877 | optional_ptr opener) { 878 | struct stat st; 879 | ThreeFSParams params = ThreeFSParams::ReadFrom(opener); 880 | if (params.enable_debug_logging) { 881 | fprintf(stderr, "CreateDirectory: directory: %s\n", directory.c_str()); 882 | } 883 | auto normalized_dir = NormalizeLocalPath(directory); 884 | if (stat(normalized_dir, &st) != 0) { 885 | /* Directory does not exist. EEXIST for race condition */ 886 | if (mkdir(normalized_dir, 0755) != 0 && errno != EEXIST) { 887 | throw IOException("Failed to create directory \"%s\": %s", 888 | {{"errno", std::to_string(errno)}}, directory, 889 | strerror(errno)); 890 | } 891 | } else if (!S_ISDIR(st.st_mode)) { 892 | throw IOException( 893 | "Failed to create directory \"%s\": path exists but is not a " 894 | "directory!", 895 | {{"errno", std::to_string(errno)}}, directory); 896 | } 897 | } 898 | 899 | int RemoveDirectoryFastOrRecursive(const char *path, 900 | optional_ptr opener) { 901 | ThreeFSParams params = ThreeFSParams::ReadFrom(opener); 902 | if (params.enable_debug_logging) { 903 | fprintf(stderr, "RemoveDirectoryFastOrRecursive: path: %s\n", path); 904 | } 905 | // Check if the path is on a 3fs filesystem 906 | char hf3fs_mount_point[256]; 907 | 908 | // Try to extract the mount point 909 | int extract_result = hf3fs_extract_mount_point(hf3fs_mount_point, sizeof(hf3fs_mount_point), path); 910 | if (extract_result > 0) { 911 | // Path is on 3fs filesystem, use the efficient deletion method 912 | try { 913 | // Create a symlink in the rm-rf directory with a timestamped name 914 | char timestamp[32]; 915 | snprintf(timestamp, sizeof(timestamp), "%ld", static_cast(time(nullptr))); 916 | 917 | // Create the link path: /3fs-virt/rm-rf/- 918 | std::string basename = std::string(path).substr(std::string(path).find_last_of("/\\") + 1); 919 | std::string link_path = std::string(hf3fs_mount_point) + "/3fs-virt/rm-rf/" + basename + "-" + timestamp; 920 | 921 | // Create the symlink 922 | if (symlink(path, link_path.c_str()) == 0) { 923 | return 0; // Successfully created symlink for background deletion 924 | } 925 | } catch (const std::exception &e) { 926 | // We'll fall back to regular recursive deletion on any failure 927 | } 928 | } 929 | 930 | // If not on 3fs or the 3fs method failed, use regular recursive deletion 931 | DIR *d = opendir(path); 932 | idx_t path_len = (idx_t)strlen(path); 933 | int r = -1; 934 | 935 | if (d) { 936 | struct dirent *p; 937 | r = 0; 938 | while (!r && (p = readdir(d))) { 939 | int r2 = -1; 940 | char *buf; 941 | idx_t len; 942 | /* Skip the names "." and ".." as we don't want to recurse on them. */ 943 | if (!strcmp(p->d_name, ".") || !strcmp(p->d_name, "..")) { 944 | continue; 945 | } 946 | len = path_len + (idx_t)strlen(p->d_name) + 2; 947 | buf = new (std::nothrow) char[len]; 948 | if (buf) { 949 | struct stat statbuf; 950 | snprintf(buf, len, "%s/%s", path, p->d_name); 951 | if (!stat(buf, &statbuf)) { 952 | if (S_ISDIR(statbuf.st_mode)) { 953 | r2 = RemoveDirectoryFastOrRecursive(buf, opener); 954 | } else { 955 | r2 = unlink(buf); 956 | } 957 | } 958 | delete[] buf; 959 | } 960 | r = r2; 961 | } 962 | closedir(d); 963 | } 964 | if (!r) { 965 | r = rmdir(path); 966 | } 967 | return r; 968 | } 969 | 970 | void ThreeFSFileSystem::RemoveDirectory(const string &directory, 971 | optional_ptr opener) { 972 | auto normalized_dir = NormalizeLocalPath(directory); 973 | RemoveDirectoryFastOrRecursive(normalized_dir, opener); 974 | } 975 | 976 | void ThreeFSFileSystem::RemoveFile(const string &filename, 977 | optional_ptr opener) { 978 | ThreeFSParams params = ThreeFSParams::ReadFrom(opener); 979 | if (params.enable_debug_logging) { 980 | fprintf(stderr, "RemoveFile: filename: %s\n", filename.c_str()); 981 | } 982 | auto normalized_file = NormalizeLocalPath(filename); 983 | if (std::remove(normalized_file) != 0) { 984 | throw IOException("Could not remove file \"%s\": %s", 985 | {{"errno", std::to_string(errno)}}, filename, 986 | strerror(errno)); 987 | } 988 | } 989 | 990 | bool ThreeFSFileSystem::ListFiles( 991 | const string &directory, 992 | const std::function &callback, 993 | FileOpener *opener) { 994 | ThreeFSParams params = ThreeFSParams::ReadFrom(opener); 995 | if (params.enable_debug_logging) { 996 | fprintf(stderr, "ListFiles: directory: %s\n", directory.c_str()); 997 | } 998 | auto normalized_dir = NormalizeLocalPath(directory); 999 | auto dir = opendir(normalized_dir); 1000 | if (!dir) { 1001 | return false; 1002 | } 1003 | 1004 | // RAII wrapper around DIR to automatically free on exceptions in callback 1005 | std::unique_ptr> dir_unique_ptr( 1006 | dir, [](DIR *d) { closedir(d); }); 1007 | 1008 | struct dirent *ent; 1009 | // loop over all files in the directory 1010 | while ((ent = readdir(dir)) != nullptr) { 1011 | string name = string(ent->d_name); 1012 | // skip . .. and empty files 1013 | if (name.empty() || name == "." || name == "..") { 1014 | continue; 1015 | } 1016 | // now stat the file to figure out if it is a regular file or directory 1017 | string full_path = JoinPath(normalized_dir, name); 1018 | struct stat status; 1019 | auto res = stat(full_path.c_str(), &status); 1020 | if (res != 0) { 1021 | continue; 1022 | } 1023 | if (!(status.st_mode & S_IFREG) && !(status.st_mode & S_IFDIR)) { 1024 | // not a file or directory: skip 1025 | continue; 1026 | } 1027 | // invoke callback 1028 | callback(name, status.st_mode & S_IFDIR); 1029 | } 1030 | 1031 | return true; 1032 | } 1033 | 1034 | void ThreeFSFileSystem::FileSync(FileHandle &handle) { 1035 | auto &threefs_handle = handle.Cast(); 1036 | int fd = threefs_handle.fd; 1037 | if (threefs_handle.params.enable_debug_logging) { 1038 | fprintf(stderr, "FileSync: File handle: %s, fd: %d\n", handle.path.c_str(), fd); 1039 | } 1040 | if (fsync(fd) != 0) { 1041 | throw FatalException("fsync failed!"); 1042 | } 1043 | } 1044 | 1045 | void ThreeFSFileSystem::MoveFile(const string &source, const string &target, 1046 | optional_ptr opener) { 1047 | auto normalized_source = NormalizeLocalPath(source); 1048 | auto normalized_target = NormalizeLocalPath(target); 1049 | ThreeFSParams params = ThreeFSParams::ReadFrom(opener); 1050 | if (params.enable_debug_logging) { 1051 | fprintf(stderr, "MoveFile: source: %s, target: %s\n", normalized_source, normalized_target); 1052 | } 1053 | //! FIXME: rename does not guarantee atomicity or overwriting target file if 1054 | //! it exists 1055 | if (rename(normalized_source, normalized_target) != 0) { 1056 | throw IOException("Could not rename file!", 1057 | {{"errno", std::to_string(errno)}}); 1058 | } 1059 | } 1060 | 1061 | std::string ThreeFSFileSystem::GetLastErrorAsString() { return string(); } 1062 | 1063 | bool ThreeFSFileSystem::CanSeek() { return true; } 1064 | 1065 | bool ThreeFSFileSystem::OnDiskFile(FileHandle &handle) { return true; } 1066 | 1067 | void ThreeFSFileSystem::Seek(FileHandle &handle, idx_t location) { 1068 | if (!CanSeek()) { 1069 | throw IOException("Cannot seek in files of this type"); 1070 | } 1071 | SetFilePointer(handle, location); 1072 | } 1073 | 1074 | idx_t ThreeFSFileSystem::SeekPosition(FileHandle &handle) { 1075 | if (!CanSeek()) { 1076 | throw IOException("Cannot seek in files of this type"); 1077 | } 1078 | return GetFilePointer(handle); 1079 | } 1080 | 1081 | static bool IsCrawl(const string &glob) { 1082 | // glob must match exactly 1083 | return glob == "**"; 1084 | } 1085 | static bool HasMultipleCrawl(const vector &splits) { 1086 | return std::count(splits.begin(), splits.end(), "**") > 1; 1087 | } 1088 | static bool IsSymbolicLink(const string &path) { 1089 | auto normalized_path = ThreeFSFileSystem::NormalizeLocalPath(path); 1090 | struct stat status; 1091 | return (lstat(normalized_path, &status) != -1 && S_ISLNK(status.st_mode)); 1092 | } 1093 | 1094 | static void RecursiveGlobDirectories(FileSystem &fs, const string &path, 1095 | vector &result, 1096 | bool match_directory, bool join_path) { 1097 | fs.ListFiles(path, [&](const string &fname, bool is_directory) { 1098 | string concat; 1099 | if (join_path) { 1100 | concat = fs.JoinPath(path, fname); 1101 | } else { 1102 | concat = fname; 1103 | } 1104 | if (IsSymbolicLink(concat)) { 1105 | return; 1106 | } 1107 | if (is_directory == match_directory) { 1108 | result.push_back(concat); 1109 | } 1110 | if (is_directory) { 1111 | RecursiveGlobDirectories(fs, concat, result, match_directory, true); 1112 | } 1113 | }); 1114 | } 1115 | 1116 | static void GlobFilesInternal(FileSystem &fs, const string &path, 1117 | const string &glob, bool match_directory, 1118 | vector &result, bool join_path) { 1119 | fs.ListFiles(path, [&](const string &fname, bool is_directory) { 1120 | if (is_directory != match_directory) { 1121 | return; 1122 | } 1123 | if (Glob(fname.c_str(), fname.size(), glob.c_str(), glob.size())) { 1124 | if (join_path) { 1125 | result.push_back(fs.JoinPath(path, fname)); 1126 | } else { 1127 | result.push_back(fname); 1128 | } 1129 | } 1130 | }); 1131 | } 1132 | 1133 | vector ThreeFSFileSystem::FetchFileWithoutGlob(const string &path, 1134 | FileOpener *opener, 1135 | bool absolute_path) { 1136 | vector result; 1137 | if (FileExists(path, opener) || IsPipe(path, opener)) { 1138 | result.push_back(path); 1139 | } else if (!absolute_path) { 1140 | Value value; 1141 | if (opener && opener->TryGetCurrentSetting("file_search_path", value)) { 1142 | auto search_paths_str = value.ToString(); 1143 | vector search_paths = 1144 | StringUtil::Split(search_paths_str, ','); 1145 | for (const auto &search_path : search_paths) { 1146 | auto joined_path = JoinPath(search_path, path); 1147 | if (FileExists(joined_path, opener) || IsPipe(joined_path, opener)) { 1148 | result.push_back(joined_path); 1149 | } 1150 | } 1151 | } 1152 | } 1153 | return result; 1154 | } 1155 | 1156 | // Helper function to handle 3fs:/ URLs 1157 | static idx_t GetFileUrlOffset(const string &path) { 1158 | if (!StringUtil::StartsWith(path, "3fs:/")) { 1159 | return 0; 1160 | } 1161 | 1162 | // Url without host: 3fs:/some/path 1163 | if (path[6] != '/') { 1164 | return 5; 1165 | } 1166 | 1167 | // Url with empty host: 3fs:///some/path 1168 | if (path[7] == '/') { 1169 | return 7; 1170 | } 1171 | 1172 | // Url with localhost: 3fs://localhost/some/path 1173 | if (path.compare(7, 10, "localhost/") == 0) { 1174 | return 16; 1175 | } 1176 | 1177 | // unkown 3fs:/ url format 1178 | return 0; 1179 | } 1180 | 1181 | const char *ThreeFSFileSystem::NormalizeLocalPath(const string &path) { 1182 | return path.c_str() + GetFileUrlOffset(path); 1183 | } 1184 | 1185 | vector ThreeFSFileSystem::Glob(const string &path, FileOpener *opener) { 1186 | if (path.empty()) { 1187 | return vector(); 1188 | } 1189 | // split up the path into separate chunks 1190 | vector splits; 1191 | 1192 | bool is_file_url = StringUtil::StartsWith(path, "file:/"); 1193 | idx_t file_url_path_offset = GetFileUrlOffset(path); 1194 | 1195 | idx_t last_pos = 0; 1196 | for (idx_t i = file_url_path_offset; i < path.size(); i++) { 1197 | if (path[i] == '\\' || path[i] == '/') { 1198 | if (i == last_pos) { 1199 | // empty: skip this position 1200 | last_pos = i + 1; 1201 | continue; 1202 | } 1203 | if (splits.empty()) { 1204 | // splits.push_back(path.substr(file_url_path_offset, 1205 | // i-file_url_path_offset)); 1206 | splits.push_back(path.substr(0, i)); 1207 | } else { 1208 | splits.push_back(path.substr(last_pos, i - last_pos)); 1209 | } 1210 | last_pos = i + 1; 1211 | } 1212 | } 1213 | splits.push_back(path.substr(last_pos, path.size() - last_pos)); 1214 | // handle absolute paths 1215 | bool absolute_path = false; 1216 | if (IsPathAbsolute(path)) { 1217 | // first character is a slash - unix absolute path 1218 | absolute_path = true; 1219 | } else if (StringUtil::Contains(splits[0], 1220 | ":")) { // TODO: this is weird? shouldn't 1221 | // IsPathAbsolute handle this? 1222 | // first split has a colon - windows absolute path 1223 | absolute_path = true; 1224 | } else if (splits[0] == "~") { 1225 | // starts with home directory 1226 | auto home_directory = GetHomeDirectory(opener); 1227 | if (!home_directory.empty()) { 1228 | absolute_path = true; 1229 | splits[0] = home_directory; 1230 | D_ASSERT(path[0] == '~'); 1231 | if (!HasGlob(path)) { 1232 | return Glob(home_directory + path.substr(1)); 1233 | } 1234 | } 1235 | } 1236 | // Check if the path has a glob at all 1237 | if (!HasGlob(path)) { 1238 | // no glob: return only the file (if it exists or is a pipe) 1239 | return FetchFileWithoutGlob(path, opener, absolute_path); 1240 | } 1241 | vector previous_directories; 1242 | if (absolute_path) { 1243 | // for absolute paths, we don't start by scanning the current directory 1244 | previous_directories.push_back(splits[0]); 1245 | } else { 1246 | // If file_search_path is set, use those paths as the first glob elements 1247 | Value value; 1248 | if (opener && opener->TryGetCurrentSetting("file_search_path", value)) { 1249 | auto search_paths_str = value.ToString(); 1250 | vector search_paths = 1251 | StringUtil::Split(search_paths_str, ','); 1252 | for (const auto &search_path : search_paths) { 1253 | previous_directories.push_back(search_path); 1254 | } 1255 | } 1256 | } 1257 | 1258 | if (HasMultipleCrawl(splits)) { 1259 | throw IOException("Cannot use multiple \'**\' in one path"); 1260 | } 1261 | 1262 | idx_t start_index; 1263 | if (is_file_url) { 1264 | start_index = 1; 1265 | } else if (absolute_path) { 1266 | start_index = 1; 1267 | } else { 1268 | start_index = 0; 1269 | } 1270 | 1271 | for (idx_t i = start_index ? 1 : 0; i < splits.size(); i++) { 1272 | bool is_last_chunk = i + 1 == splits.size(); 1273 | bool has_glob = HasGlob(splits[i]); 1274 | // if it's the last chunk we need to find files, otherwise we find 1275 | // directories not the last chunk: gather a list of all directories that 1276 | // match the glob pattern 1277 | vector result; 1278 | if (!has_glob) { 1279 | // no glob, just append as-is 1280 | if (previous_directories.empty()) { 1281 | result.push_back(splits[i]); 1282 | } else { 1283 | if (is_last_chunk) { 1284 | for (auto &prev_directory : previous_directories) { 1285 | const string filename = JoinPath(prev_directory, splits[i]); 1286 | if (FileExists(filename, opener) || 1287 | DirectoryExists(filename, opener)) { 1288 | result.push_back(filename); 1289 | } 1290 | } 1291 | } else { 1292 | for (auto &prev_directory : previous_directories) { 1293 | result.push_back(JoinPath(prev_directory, splits[i])); 1294 | } 1295 | } 1296 | } 1297 | } else { 1298 | if (IsCrawl(splits[i])) { 1299 | if (!is_last_chunk) { 1300 | result = previous_directories; 1301 | } 1302 | if (previous_directories.empty()) { 1303 | RecursiveGlobDirectories(*this, ".", result, !is_last_chunk, false); 1304 | } else { 1305 | for (auto &prev_dir : previous_directories) { 1306 | RecursiveGlobDirectories(*this, prev_dir, result, !is_last_chunk, 1307 | true); 1308 | } 1309 | } 1310 | } else { 1311 | if (previous_directories.empty()) { 1312 | // no previous directories: list in the current path 1313 | GlobFilesInternal(*this, ".", splits[i], !is_last_chunk, result, 1314 | false); 1315 | } else { 1316 | // previous directories 1317 | // we iterate over each of the previous directories, and apply the 1318 | // glob of the current directory 1319 | for (auto &prev_directory : previous_directories) { 1320 | GlobFilesInternal(*this, prev_directory, splits[i], !is_last_chunk, 1321 | result, true); 1322 | } 1323 | } 1324 | } 1325 | } 1326 | if (result.empty()) { 1327 | // no result found that matches the glob 1328 | // last ditch effort: search the path as a string literal 1329 | return FetchFileWithoutGlob(path, opener, absolute_path); 1330 | } 1331 | if (is_last_chunk) { 1332 | return result; 1333 | } 1334 | previous_directories = std::move(result); 1335 | } 1336 | return vector(); 1337 | } 1338 | 1339 | // Initialize 3FS resources 1340 | void InitializeThreeFS() { 1341 | //fprintf(stderr, "InitializeThreeFS\n"); 1342 | USRBIOResourceManager::instance = new USRBIOResourceManager(); 1343 | } 1344 | 1345 | void DeinitializeThreeFS() { 1346 | //fprintf(stderr, "DeinitializeThreeFS\n"); 1347 | if (USRBIOResourceManager::instance) { 1348 | delete USRBIOResourceManager::instance; 1349 | USRBIOResourceManager::instance = nullptr; 1350 | } 1351 | } 1352 | 1353 | bool ThreeFSFileSystem::CanHandleFile(const string &fpath) { 1354 | // Check if the path starts with "3fs://" or "/3fs/" 1355 | return (StringUtil::StartsWith(fpath, "3fs://") || StringUtil::StartsWith(fpath, "/3fs/")); 1356 | } 1357 | 1358 | } // namespace duckdb 1359 | -------------------------------------------------------------------------------- /src/threefs_extension.cpp: -------------------------------------------------------------------------------- 1 | #define DUCKDB_EXTENSION_MAIN 2 | #include "threefs.hpp" 3 | 4 | #include "duckdb/common/exception.hpp" 5 | #include "duckdb/function/scalar/string_functions.hpp" 6 | #include "duckdb/main/client_context.hpp" 7 | #include "duckdb/main/connection.hpp" 8 | #include "duckdb/main/database.hpp" 9 | #include "duckdb/main/extension_util.hpp" 10 | 11 | namespace duckdb { 12 | 13 | void InitializeThreeFS(); 14 | void DeinitializeThreeFS(); 15 | 16 | static void LoadInternal(DatabaseInstance &instance) { 17 | auto &fs = instance.GetFileSystem(); 18 | // Register 3FS filesystem 19 | auto &buffer_manager = instance.GetBufferManager(); 20 | try { 21 | // Verify 3FS library is available and working 22 | ThreeFSFileSystem::Verify(); 23 | 24 | // Register the file system with DuckDB 25 | fs.RegisterSubSystem(make_uniq(buffer_manager)); 26 | 27 | // Log success message 28 | //fprintf(stderr, "Successfully registered 3FS extension\n"); 29 | } catch (std::exception &e) { 30 | throw IOException("Failed to initialize 3FS extension: %s", e.what()); 31 | } 32 | } 33 | 34 | void ThreeFSExtension::Load(DuckDB &db) { 35 | LoadInternal(*db.instance); 36 | 37 | // 初始化3FS资源 38 | InitializeThreeFS(); 39 | 40 | // Register extension parameter options 41 | auto &config = DBConfig::GetConfig(*db.instance); 42 | 43 | // Register 3FS debug parameters 44 | config.AddExtensionOption("threefs_enable_debug_logging", "Enable verbose debug logging for 3FS operations to standard error", LogicalType::BOOLEAN); 45 | 46 | // Register 3FS connection parameters 47 | config.AddExtensionOption("threefs_cluster", "Specifies the 3FS cluster name", LogicalType::VARCHAR); 48 | config.AddExtensionOption("threefs_mount_root", "Specifies the mount root path for 3FS", LogicalType::VARCHAR); 49 | 50 | // Register USRBIO parameters 51 | config.AddExtensionOption("threefs_use_usrbio", "Whether to use USRBIO when possible", LogicalType::BOOLEAN); 52 | config.AddExtensionOption("threefs_iov_size", "Size of the shared memory buffer for USRBIO (bytes)", LogicalType::UBIGINT); 53 | config.AddExtensionOption("threefs_ior_entries", "Maximum number of IO requests that can be submitted", LogicalType::INTEGER); 54 | config.AddExtensionOption("threefs_io_depth", "IO depth for batching", LogicalType::INTEGER); 55 | config.AddExtensionOption("threefs_ior_timeout", "Timeout for IO operations (ms)", LogicalType::INTEGER); 56 | } 57 | 58 | std::string ThreeFSExtension::Name() { 59 | return "3fs"; 60 | } 61 | 62 | ThreeFSExtension::~ThreeFSExtension() { 63 | } 64 | 65 | } // namespace duckdb 66 | 67 | extern "C" { 68 | 69 | DUCKDB_EXTENSION_API void threefs_init(duckdb::DatabaseInstance &db) { 70 | duckdb::DuckDB db_wrapper(db); 71 | db_wrapper.LoadExtension(); 72 | } 73 | 74 | DUCKDB_EXTENSION_API const char *threefs_version() { 75 | return duckdb::DuckDB::LibraryVersion(); 76 | } 77 | } 78 | 79 | #ifndef DUCKDB_EXTENSION_MAIN 80 | #error DUCKDB_EXTENSION_MAIN not defined 81 | #endif -------------------------------------------------------------------------------- /test/check_3fs_mount.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # 检查 /3fs 挂载点是否存在 4 | if [ ! -d "/3fs" ]; then 5 | echo -e "\033[1;31mERROR: /3fs mount point not found!\033[0m" 6 | echo -e "\033[1;31mPlease mount 3fs filesystem before running tests.\033[0m" 7 | echo -e "\033[1;31mExample: mount -t 3fs /3fs\033[0m" 8 | exit 1 9 | fi 10 | 11 | # 检查 /3fs 是否确实是一个挂载点,而不仅仅是一个目录 12 | if ! mountpoint -q /3fs; then 13 | echo -e "\033[1;31mERROR: /3fs exists but is not a mount point!\033[0m" 14 | echo -e "\033[1;31mPlease mount 3fs filesystem before running tests.\033[0m" 15 | echo -e "\033[1;31mExample: mount -t 3fs /3fs\033[0m" 16 | exit 1 17 | fi 18 | 19 | # 清理测试目录的函数 20 | cleanup_test_dirs() { 21 | echo -e "\033[1;33mCleaning up test directories...\033[0m" 22 | rm -rf /3fs/test_threefs 23 | } 24 | 25 | # 注册退出时的清理函数 26 | #trap cleanup_test_dirs EXIT 27 | 28 | # 创建测试目录结构 29 | mkdir -p /3fs/test_threefs 30 | mkdir -p /3fs/test_threefs/basic_test/dir1 31 | mkdir -p /3fs/test_threefs/basic_test/dir2 32 | mkdir -p /3fs/test_threefs/io_test 33 | mkdir -p /3fs/test_threefs/error_test 34 | mkdir -p /3fs/test_threefs/error_test/special@#$chars 35 | mkdir -p /3fs/test_threefs/perf_test 36 | mkdir -p /3fs/test_threefs/concurrency_test 37 | mkdir -p /3fs/test_threefs/integration_test 38 | mkdir -p /3fs/test_threefs/integration_test/partitioned/gender=M 39 | mkdir -p /3fs/test_threefs/integration_test/partitioned/gender=F 40 | 41 | echo -e "\033[1;32m✓ 3FS mount point found at /3fs, proceeding with tests\033[0m" 42 | exit 0 -------------------------------------------------------------------------------- /test/sql/localfs_io.test: -------------------------------------------------------------------------------- 1 | # name: test/sql/threefs_io.test 2 | # description: Test 3FS filesystem I/O operations 3 | # group: [threefs] 4 | 5 | require threefs 6 | 7 | # Setup test environment variables 8 | statement ok 9 | SET threefs_cluster='test_cluster'; 10 | 11 | statement ok 12 | SET threefs_mount_root='/3fs'; 13 | 14 | statement ok 15 | SET threefs_use_usrbio=true; 16 | 17 | # Install parquet extension for testing various formats 18 | statement ok 19 | INSTALL parquet; 20 | 21 | statement ok 22 | LOAD parquet; 23 | 24 | # Generate test data 25 | statement ok 26 | CREATE OR REPLACE TABLE large_data AS 27 | SELECT i, 'Row ' || i || repeat(' padding text', 10) as content 28 | FROM range(1, 10000) t(i); 29 | 30 | # Write to CSV 31 | statement ok 32 | COPY large_data TO '/tmp/large_data.csv' (FORMAT CSV, HEADER); 33 | 34 | # Verify file exists and can be read 35 | query I 36 | SELECT COUNT(*) FROM read_csv_auto('/tmp/large_data.csv'); 37 | ---- 38 | 9999 39 | 40 | # Append to existing file 41 | statement ok 42 | COPY ( 43 | (SELECT * FROM read_csv_auto('/tmp/large_data.csv')) 44 | UNION ALL 45 | (SELECT i+10000, 'Row ' || (i+10000) || repeat(' padding text', 10) as content 46 | FROM range(1, 5000) t(i)) 47 | ) TO '/tmp/large_data.csv' (FORMAT CSV, HEADER); 48 | 49 | # Verify appended data 50 | query I 51 | SELECT COUNT(*) FROM read_csv_auto('/tmp/large_data.csv'); 52 | ---- 53 | 14998 54 | 55 | # Test partial reads 56 | query I 57 | SELECT COUNT(*) FROM ( 58 | SELECT * FROM read_csv_auto('/tmp/large_data.csv') LIMIT 100 59 | ); 60 | ---- 61 | 100 62 | 63 | query I 64 | SELECT COUNT(*) FROM ( 65 | SELECT * FROM read_csv_auto('/tmp/large_data.csv') LIMIT 100 OFFSET 10000 66 | ); 67 | ---- 68 | 100 69 | 70 | # Test Parquet format 71 | statement ok 72 | COPY large_data TO '/tmp/large_data.parquet' (FORMAT PARQUET); 73 | 74 | query I 75 | SELECT COUNT(*) FROM read_parquet('/tmp/large_data.parquet'); 76 | ---- 77 | 9999 78 | 79 | # Install json extension for testing various formats 80 | statement ok 81 | INSTALL json; 82 | 83 | statement ok 84 | LOAD json; 85 | 86 | # Test JSON format 87 | statement ok 88 | COPY (SELECT * FROM large_data LIMIT 1000) TO '/tmp/large_data.json' (FORMAT JSON); 89 | 90 | query I 91 | SELECT COUNT(*) FROM read_json_auto('/tmp/large_data.json'); 92 | ---- 93 | 1000 94 | 95 | # Clean up temporary tables 96 | statement ok 97 | DROP TABLE IF EXISTS large_data; -------------------------------------------------------------------------------- /test/sql/threefs.test: -------------------------------------------------------------------------------- 1 | # name: test/sql/threefs.test 2 | # description: Test 3FS extension loading and basic functionality 3 | # group: [threefs] 4 | 5 | # This will fail before loading the extension 6 | statement error 7 | SELECT '3fs://3fs/test.csv'; 8 | ---- 9 | No filesystem registered for prefix "3fs" 10 | 11 | # Ensure the test uses threefs extension 12 | require threefs 13 | 14 | # Confirm the extension is loaded and recognizes the 3fs:// protocol 15 | query I 16 | SELECT '3fs://3fs/test.csv' LIKE '3fs://%'; 17 | ---- 18 | true 19 | 20 | # Test extension configuration parameters 21 | statement ok 22 | SET threefs_cluster='test_cluster'; 23 | 24 | statement ok 25 | SET threefs_mount_root='/3fs'; 26 | 27 | statement ok 28 | SET threefs_use_usrbio=false; 29 | 30 | # Create a simple table 31 | statement ok 32 | CREATE OR REPLACE TABLE numbers AS SELECT range AS number FROM range(0, 1000); 33 | 34 | # Test writing data to 3FS 35 | statement ok 36 | COPY numbers TO '3fs://3fs/test_threefs/basic_test/test_output.csv' (FORMAT CSV, HEADER); 37 | 38 | # Test reading data from 3FS 39 | query I 40 | SELECT COUNT(*) FROM read_csv_auto('3fs://3fs/test_threefs/basic_test/test_output.csv'); 41 | ---- 42 | 1000 43 | 44 | # Test file paths 45 | query I 46 | SELECT * FROM read_csv_auto('3fs://3fs/test_threefs/basic_test/test_output.csv') LIMIT 5; 47 | ---- 48 | 0 49 | 1 50 | 2 51 | 3 52 | 4 53 | 54 | # Clean up temporary tables 55 | statement ok 56 | DROP TABLE IF EXISTS numbers; -------------------------------------------------------------------------------- /test/sql/threefs_basic.test: -------------------------------------------------------------------------------- 1 | # name: test/sql/threefs_basic.test 2 | # description: Test 3FS filesystem basic operations 3 | # group: [threefs] 4 | 5 | require threefs 6 | 7 | # Setup test environment variables 8 | statement ok 9 | SET threefs_cluster='test_cluster'; 10 | 11 | statement ok 12 | SET threefs_mount_root='/3fs'; 13 | 14 | statement ok 15 | SET threefs_use_usrbio=true; 16 | 17 | # Install parquet extension for testing different formats 18 | statement ok 19 | INSTALL parquet; 20 | 21 | statement ok 22 | LOAD parquet; 23 | 24 | # Create test tables 25 | statement ok 26 | CREATE OR REPLACE TABLE test_dirs AS 27 | SELECT * FROM (VALUES 28 | ('dir1'), 29 | ('dir2') 30 | ) t(dirname); 31 | 32 | statement ok 33 | CREATE OR REPLACE TABLE test_numbers AS 34 | SELECT * FROM (VALUES 35 | (1, 'one'), 36 | (2, 'two'), 37 | (3, 'three'), 38 | (4, 'four'), 39 | (5, 'five') 40 | ) t(id, name); 41 | 42 | # Test CSV format 43 | statement ok 44 | COPY test_numbers TO '3fs://3fs/test_threefs/basic_test/dir1/numbers.csv' (FORMAT CSV, HEADER); 45 | 46 | # Verify file exists 47 | query I 48 | SELECT COUNT(*) FROM read_csv_auto('3fs://3fs/test_threefs/basic_test/dir1/numbers.csv'); 49 | ---- 50 | 5 51 | 52 | # Test Parquet format 53 | statement ok 54 | COPY test_numbers TO '3fs://3fs/test_threefs/basic_test/dir1/numbers.parquet' (FORMAT PARQUET); 55 | 56 | # Verify file exists 57 | query I 58 | SELECT COUNT(*) FROM read_parquet('3fs://3fs/test_threefs/basic_test/dir1/numbers.parquet'); 59 | ---- 60 | 5 61 | 62 | # Install json extension for testing various formats 63 | statement ok 64 | INSTALL json; 65 | 66 | statement ok 67 | LOAD json; 68 | 69 | # Test JSON format 70 | statement ok 71 | COPY test_numbers TO '3fs://3fs/test_threefs/basic_test/dir2/numbers.json' (FORMAT JSON); 72 | 73 | # Verify file exists 74 | query I 75 | SELECT COUNT(*) FROM read_json_auto('3fs://3fs/test_threefs/basic_test/dir2/numbers.json'); 76 | ---- 77 | 5 78 | 79 | # Test reading with filters 80 | query I 81 | SELECT COUNT(*) FROM read_csv_auto('3fs://3fs/test_threefs/basic_test/dir1/numbers.csv') WHERE id > 2; 82 | ---- 83 | 3 84 | 85 | # Test reading with projection 86 | query I 87 | SELECT name FROM read_csv_auto('3fs://3fs/test_threefs/basic_test/dir1/numbers.csv') WHERE id = 3; 88 | ---- 89 | three 90 | 91 | # Clean up temporary tables 92 | statement ok 93 | DROP TABLE IF EXISTS test_dirs; 94 | 95 | statement ok 96 | DROP TABLE IF EXISTS test_numbers; -------------------------------------------------------------------------------- /test/sql/threefs_concurrency.test: -------------------------------------------------------------------------------- 1 | # name: test/sql/threefs_concurrency.test 2 | # description: Test 3FS operations with concurrency 3 | # group: [threefs] 4 | 5 | require threefs 6 | 7 | # Setup test environment variables 8 | statement ok 9 | SET threefs_cluster='test_cluster'; 10 | 11 | statement ok 12 | SET threefs_mount_root='/3fs'; 13 | 14 | statement ok 15 | SET threefs_use_usrbio=true; 16 | 17 | # Prepare concurrent test data 18 | statement ok 19 | CREATE OR REPLACE TABLE concurrent_dirs AS 20 | SELECT 'thread_' || i as thread_id 21 | FROM range(1, 10) t(i); 22 | 23 | # Create data for multiple threads 24 | statement ok 25 | CREATE OR REPLACE TABLE thread_data AS 26 | SELECT 27 | '3fs://3fs/test_threefs/concurrency_test/thread_' || t.thread_id || '/file_' || f.file_id || '.csv' as filepath, 28 | t.thread_id, 29 | f.file_id, 30 | i, 31 | 'Thread ' || t.thread_id || ' File ' || f.file_id || ' Row ' || i as content 32 | FROM range(1, 10) t(thread_id), 33 | range(1, 5) f(file_id), 34 | range(1, 100) r(i); 35 | 36 | # Each thread writes multiple files (simulate concurrency using multi-row queries) 37 | statement ok 38 | SELECT * FROM ( 39 | SELECT 40 | COPY (SELECT i, content FROM thread_data WHERE thread_id=td.thread_id AND file_id=td.file_id) 41 | TO '3fs://3fs/test_threefs/concurrency_test/thread_' || td.thread_id || '/file_' || td.file_id || '.csv' (FORMAT CSV, HEADER) 42 | FROM (SELECT DISTINCT thread_id, file_id FROM thread_data) td 43 | ); 44 | 45 | # Concurrent file reading (simulate concurrency using multi-row queries) 46 | statement ok 47 | SELECT * FROM ( 48 | SELECT 49 | COUNT(*) 50 | FROM read_csv_auto('3fs://3fs/test_threefs/concurrency_test/thread_' || thread_id || '/file_' || file_id || '.csv') 51 | WHERE thread_id IN (SELECT thread_id FROM concurrent_dirs) AND file_id IN (1, 2, 3, 4) 52 | ); 53 | 54 | # Test concurrent operations on a shared file 55 | statement ok 56 | COPY (SELECT i, 'Shared file row ' || i as content FROM range(1, 1000) t(i)) 57 | TO '3fs://3fs/test_threefs/concurrency_test/shared_file.csv' (FORMAT CSV, HEADER); 58 | 59 | # Multiple readers on shared file 60 | statement ok 61 | SELECT * FROM ( 62 | SELECT 63 | COUNT(*) 64 | FROM read_csv_auto('3fs://3fs/test_threefs/concurrency_test/shared_file.csv') 65 | WHERE i % thread_id = 0 66 | GROUP BY thread_id 67 | ORDER BY thread_id 68 | ) t; 69 | 70 | # Test file locking with multiple readers and writers 71 | # First create a file 72 | statement ok 73 | COPY (SELECT i, 'Lock test file row ' || i as content FROM range(1, 100) t(i)) 74 | TO '3fs://3fs/test_threefs/concurrency_test/lock_test.csv' (FORMAT CSV, HEADER); 75 | 76 | # Multiple readers should succeed 77 | statement ok 78 | SELECT * FROM ( 79 | SELECT 80 | COUNT(*) 81 | FROM read_csv_auto('3fs://3fs/test_threefs/concurrency_test/lock_test.csv') 82 | WHERE i % thread_id = 0 83 | GROUP BY thread_id 84 | ORDER BY thread_id 85 | ) t; 86 | 87 | # Append to file while reading (should not conflict) 88 | statement ok 89 | COPY (SELECT i+100, 'Lock test file row ' || (i+100) as content FROM range(1, 100) t(i)) 90 | TO '3fs://3fs/test_threefs/concurrency_test/lock_test.csv' (FORMAT CSV, HEADER, APPEND); 91 | 92 | # Verify appended data 93 | query I 94 | SELECT COUNT(*) FROM read_csv_auto('3fs://3fs/test_threefs/concurrency_test/lock_test.csv'); 95 | ---- 96 | 199 97 | 98 | # Clean up temporary tables 99 | statement ok 100 | DROP TABLE IF EXISTS concurrent_dirs; 101 | 102 | statement ok 103 | DROP TABLE IF EXISTS thread_data; -------------------------------------------------------------------------------- /test/sql/threefs_errors.test: -------------------------------------------------------------------------------- 1 | # name: test/sql/threefs_errors.test 2 | # description: Test 3FS error handling and edge cases 3 | # group: [threefs] 4 | 5 | require threefs 6 | 7 | # Setup test environment variables 8 | statement ok 9 | SET threefs_cluster='test_cluster'; 10 | 11 | statement ok 12 | SET threefs_mount_root='/3fs'; 13 | 14 | statement ok 15 | SET threefs_use_usrbio=true; 16 | 17 | # Test invalid paths and filenames 18 | # Non-existent file 19 | statement error 20 | SELECT * FROM read_csv_auto('3fs://3fs/test_threefs/error_test/nonexistent_file.csv'); 21 | ---- 22 | No such file or directory 23 | 24 | # Non-existent directory 25 | statement error 26 | SELECT * FROM read_csv_auto('3fs://3fs/test_threefs/nonexistent_directory/file.csv'); 27 | ---- 28 | No such file or directory 29 | 30 | # Test path with special characters 31 | statement ok 32 | CREATE OR REPLACE TABLE special_chars AS 33 | SELECT i, 'Special chars test ' || i as value 34 | FROM range(1, 10) t(i); 35 | 36 | statement ok 37 | COPY special_chars TO '3fs://3fs/test_threefs/error_test/special@#$chars/file.csv' (FORMAT CSV, HEADER); 38 | 39 | # Verify file with special chars exists 40 | query II 41 | SELECT * FROM read_csv_auto('3fs://3fs/test_threefs/error_test/special@#$chars/file.csv') LIMIT 2; 42 | ---- 43 | 1 Special chars test 1 44 | 2 Special chars test 2 45 | 46 | # Test empty file handling 47 | statement ok 48 | COPY (SELECT * FROM special_chars LIMIT 0) TO '3fs://3fs/test_threefs/error_test/empty.csv' (FORMAT CSV, HEADER); 49 | 50 | # Read empty file (should just return header) 51 | query I 52 | SELECT COUNT(*) FROM read_csv_auto('3fs://3fs/test_threefs/error_test/empty.csv'); 53 | ---- 54 | 0 55 | 56 | # Test long path/filename 57 | statement ok 58 | COPY special_chars TO '3fs://3fs/test_threefs/error_test/very_long_directory_name_to_test_path_length_limits_in_3fs_filesystem_implementation/very_long_filename_to_test_filename_length_limits_in_3fs_filesystem_implementation.csv' (FORMAT CSV, HEADER); 59 | 60 | # Verify long path file exists 61 | query I 62 | SELECT COUNT(*) FROM read_csv_auto('3fs://3fs/test_threefs/error_test/very_long_directory_name_to_test_path_length_limits_in_3fs_filesystem_implementation/very_long_filename_to_test_filename_length_limits_in_3fs_filesystem_implementation.csv'); 63 | ---- 64 | 9 65 | 66 | # Test large file creation 67 | statement ok 68 | CREATE OR REPLACE TABLE large_test AS 69 | SELECT i, repeat('Large file test data with some padding to increase size ', 10) || i as large_text 70 | FROM range(1, 10000) t(i); 71 | 72 | statement ok 73 | COPY large_test TO '3fs://3fs/test_threefs/error_test/large_file.csv' (FORMAT CSV, HEADER); 74 | 75 | # Verify large file exists 76 | query I 77 | SELECT COUNT(*) FROM read_csv_auto('3fs://3fs/test_threefs/error_test/large_file.csv'); 78 | ---- 79 | 9999 80 | 81 | # Test race conditions 82 | statement ok 83 | CREATE OR REPLACE TABLE race_test AS SELECT i FROM range(1, 1000) t(i); 84 | 85 | # Write to same file from multiple statements 86 | statement ok 87 | COPY (SELECT * FROM race_test WHERE i <= 500) TO '3fs://3fs/test_threefs/error_test/race.csv' (FORMAT CSV, HEADER); 88 | 89 | statement ok 90 | COPY (SELECT * FROM race_test WHERE i > 500) TO '3fs://3fs/test_threefs/error_test/race.csv' (FORMAT CSV, HEADER, APPEND); 91 | 92 | # Verify race file exists with correct count 93 | query I 94 | SELECT COUNT(*) FROM read_csv_auto('3fs://3fs/test_threefs/error_test/race.csv'); 95 | ---- 96 | 999 97 | 98 | # Clean up temporary tables 99 | statement ok 100 | DROP TABLE IF EXISTS special_chars; 101 | 102 | statement ok 103 | DROP TABLE IF EXISTS large_test; 104 | 105 | statement ok 106 | DROP TABLE IF EXISTS race_test; -------------------------------------------------------------------------------- /test/sql/threefs_integration.test: -------------------------------------------------------------------------------- 1 | # name: test/sql/threefs_integration.test 2 | # description: Test 3FS integration with DuckDB 3 | # group: [threefs] 4 | 5 | require threefs 6 | 7 | # Setup test environment variables 8 | statement ok 9 | SET threefs_cluster='test_cluster'; 10 | 11 | statement ok 12 | SET threefs_mount_root='/3fs'; 13 | 14 | statement ok 15 | SET threefs_use_usrbio=true; 16 | 17 | # Install required extensions 18 | statement ok 19 | INSTALL parquet; 20 | 21 | statement ok 22 | LOAD parquet; 23 | 24 | statement ok 25 | INSTALL icu; 26 | 27 | statement ok 28 | LOAD icu; 29 | 30 | # Prepare test data 31 | statement ok 32 | CREATE OR REPLACE TABLE customers AS 33 | SELECT 34 | i as customer_id, 35 | 'Customer ' || i as name, 36 | CASE WHEN i % 2 = 0 THEN 'M' ELSE 'F' END as gender, 37 | (RANDOM() * 70 + 18)::INTEGER as age, 38 | 'City ' || (i % 10 + 1) as city 39 | FROM range(1, 1000) t(i); 40 | 41 | statement ok 42 | CREATE OR REPLACE TABLE orders AS 43 | SELECT 44 | i as order_id, 45 | (RANDOM() * 999 + 1)::INTEGER as customer_id, 46 | 'Product ' || (i % 100 + 1) as product, 47 | (RANDOM() * 1000)::DECIMAL(10,2) as amount, 48 | '2023-01-01'::DATE - INTERVAL ((RANDOM() * 365)::INTEGER) DAY as order_date 49 | FROM range(1, 5000) t(i); 50 | 51 | # Save test data to 3FS in Parquet format 52 | statement ok 53 | COPY customers TO '3fs://3fs/test_threefs/integration_test/customers.parquet' (FORMAT PARQUET); 54 | 55 | statement ok 56 | COPY orders TO '3fs://3fs/test_threefs/integration_test/orders.parquet' (FORMAT PARQUET); 57 | 58 | # Test basic operations 59 | # Count records 60 | query I 61 | SELECT COUNT(*) FROM read_parquet('3fs://3fs/test_threefs/integration_test/customers.parquet'); 62 | ---- 63 | 999 64 | 65 | query I 66 | SELECT COUNT(*) FROM read_parquet('3fs://3fs/test_threefs/integration_test/orders.parquet'); 67 | ---- 68 | 4999 69 | 70 | # Test filtering 71 | query I 72 | SELECT COUNT(*) FROM read_parquet('3fs://3fs/test_threefs/integration_test/customers.parquet') 73 | WHERE gender = 'F'; 74 | ---- 75 | 499 76 | 77 | # Test projection 78 | query I 79 | SELECT AVG(age) FROM read_parquet('3fs://3fs/test_threefs/integration_test/customers.parquet') 80 | WHERE gender = 'M'; 81 | ---- 82 | 53.5 83 | 84 | # Test joins 85 | query I 86 | SELECT 87 | COUNT(*) 88 | FROM read_parquet('3fs://3fs/test_threefs/integration_test/orders.parquet') o 89 | JOIN read_parquet('3fs://3fs/test_threefs/integration_test/customers.parquet') c 90 | ON o.customer_id = c.customer_id 91 | WHERE c.gender = 'F'; 92 | ---- 93 | 2500 94 | 95 | # Test complex operations 96 | # Aggregation 97 | query II 98 | SELECT 99 | gender, 100 | ROUND(AVG(amount), 2) as avg_order_amount 101 | FROM read_parquet('3fs://3fs/test_threefs/integration_test/orders.parquet') o 102 | JOIN read_parquet('3fs://3fs/test_threefs/integration_test/customers.parquet') c 103 | ON o.customer_id = c.customer_id 104 | GROUP BY gender 105 | ORDER BY gender; 106 | ---- 107 | F 500.00 108 | M 500.00 109 | 110 | # Window functions 111 | query III 112 | SELECT 113 | c.city, 114 | COUNT(*) as order_count, 115 | ROUND(SUM(amount), 2) as total_amount 116 | FROM read_parquet('3fs://3fs/test_threefs/integration_test/orders.parquet') o 117 | JOIN read_parquet('3fs://3fs/test_threefs/integration_test/customers.parquet') c 118 | ON o.customer_id = c.customer_id 119 | GROUP BY c.city 120 | ORDER BY total_amount DESC 121 | LIMIT 5; 122 | ---- 123 | City 1 500 250000.00 124 | City 2 500 250000.00 125 | City 3 500 250000.00 126 | City 4 500 250000.00 127 | City 5 500 250000.00 128 | 129 | # Test partitioning by gender 130 | statement ok 131 | COPY (SELECT * FROM customers WHERE gender = 'M') 132 | TO '3fs://3fs/test_threefs/integration_test/gender=M/customers.parquet' (FORMAT PARQUET); 133 | 134 | statement ok 135 | COPY (SELECT * FROM customers WHERE gender = 'F') 136 | TO '3fs://3fs/test_threefs/integration_test/gender=F/customers.parquet' (FORMAT PARQUET); 137 | 138 | # Test reading partitioned data 139 | query I 140 | SELECT COUNT(*) FROM read_parquet('3fs://3fs/test_threefs/integration_test/gender=M/customers.parquet'); 141 | ---- 142 | 500 143 | 144 | query I 145 | SELECT COUNT(*) FROM read_parquet('3fs://3fs/test_threefs/integration_test/gender=F/customers.parquet'); 146 | ---- 147 | 499 148 | 149 | # Test UDFs with 3FS 150 | statement ok 151 | CREATE OR REPLACE FUNCTION age_category(age INTEGER) RETURNS VARCHAR AS 152 | CASE 153 | WHEN age < 30 THEN 'Young' 154 | WHEN age < 60 THEN 'Middle-aged' 155 | ELSE 'Senior' 156 | END; 157 | 158 | query II 159 | SELECT 160 | age_category(age) as category, 161 | COUNT(*) as count 162 | FROM read_parquet('3fs://3fs/test_threefs/integration_test/customers.parquet') 163 | GROUP BY category 164 | ORDER BY category; 165 | ---- 166 | Middle-aged 499 167 | Senior 250 168 | Young 250 169 | 170 | # Test views on 3FS data 171 | statement ok 172 | CREATE OR REPLACE VIEW customer_stats AS 173 | SELECT 174 | gender, 175 | city, 176 | COUNT(*) as customer_count, 177 | AVG(age) as avg_age 178 | FROM read_parquet('3fs://3fs/test_threefs/integration_test/customers.parquet') 179 | GROUP BY gender, city; 180 | 181 | query IIII 182 | SELECT * FROM customer_stats 183 | ORDER BY gender, city 184 | LIMIT 5; 185 | ---- 186 | F City 1 50 53.5 187 | F City 10 50 53.5 188 | F City 2 50 53.5 189 | F City 3 50 53.5 190 | F City 4 50 53.5 191 | 192 | # Test temporary tables with 3FS data 193 | statement ok 194 | CREATE OR REPLACE TEMP TABLE high_value_orders AS 195 | SELECT 196 | o.order_id, 197 | c.customer_id, 198 | c.name, 199 | o.product, 200 | o.amount 201 | FROM read_parquet('3fs://3fs/test_threefs/integration_test/orders.parquet') o 202 | JOIN read_parquet('3fs://3fs/test_threefs/integration_test/customers.parquet') c 203 | ON o.customer_id = c.customer_id 204 | WHERE o.amount > 900; 205 | 206 | query I 207 | SELECT COUNT(*) FROM high_value_orders; 208 | ---- 209 | 500 210 | 211 | # Clean up 212 | statement ok 213 | DROP VIEW IF EXISTS customer_stats; 214 | 215 | statement ok 216 | DROP TABLE IF EXISTS high_value_orders; 217 | 218 | statement ok 219 | DROP TABLE IF EXISTS customers; 220 | 221 | statement ok 222 | DROP TABLE IF EXISTS orders; -------------------------------------------------------------------------------- /test/sql/threefs_io.test: -------------------------------------------------------------------------------- 1 | # name: test/sql/threefs_io.test 2 | # description: Test 3FS filesystem I/O operations 3 | # group: [threefs] 4 | 5 | require threefs 6 | 7 | # Setup test environment variables 8 | statement ok 9 | SET threefs_cluster='test_cluster'; 10 | 11 | statement ok 12 | SET threefs_mount_root='/3fs'; 13 | 14 | statement ok 15 | SET threefs_use_usrbio=true; 16 | 17 | # Install parquet extension for testing various formats 18 | statement ok 19 | INSTALL parquet; 20 | 21 | statement ok 22 | LOAD parquet; 23 | 24 | # Generate test data 25 | statement ok 26 | CREATE OR REPLACE TABLE large_data AS 27 | SELECT i, 'Row ' || i || repeat(' padding text', 10) as content 28 | FROM range(1, 10000) t(i); 29 | 30 | # Write to CSV 31 | statement ok 32 | COPY large_data TO '3fs://3fs/test_threefs/io_test/large_data.csv' (FORMAT CSV, HEADER); 33 | 34 | # Verify file exists and can be read 35 | query I 36 | SELECT COUNT(*) FROM read_csv_auto('3fs://3fs/test_threefs/io_test/large_data.csv'); 37 | ---- 38 | 9999 39 | 40 | # Append to existing file 41 | statement ok 42 | COPY ( 43 | (SELECT * FROM read_csv_auto('3fs://3fs/test_threefs/io_test/large_data.csv')) 44 | UNION ALL 45 | (SELECT i+10000, 'Row ' || (i+10000) || repeat(' padding text', 10) as content 46 | FROM range(1, 5000) t(i)) 47 | ) TO '3fs://3fs/test_threefs/io_test/large_data.csv' (FORMAT CSV, HEADER); 48 | 49 | # Verify appended data 50 | query I 51 | SELECT COUNT(*) FROM read_csv_auto('3fs://3fs/test_threefs/io_test/large_data.csv'); 52 | ---- 53 | 14998 54 | 55 | # Test partial reads 56 | query I 57 | SELECT COUNT(*) FROM ( 58 | SELECT * FROM read_csv_auto('3fs://3fs/test_threefs/io_test/large_data.csv') LIMIT 100 59 | ); 60 | ---- 61 | 100 62 | 63 | query I 64 | SELECT COUNT(*) FROM ( 65 | SELECT * FROM read_csv_auto('3fs://3fs/test_threefs/io_test/large_data.csv') LIMIT 100 OFFSET 10000 66 | ); 67 | ---- 68 | 100 69 | 70 | # Test Parquet format 71 | statement ok 72 | COPY large_data TO '3fs://3fs/test_threefs/io_test/large_data.parquet' (FORMAT PARQUET); 73 | 74 | query I 75 | SELECT COUNT(*) FROM read_parquet('3fs://3fs/test_threefs/io_test/large_data.parquet'); 76 | ---- 77 | 9999 78 | 79 | # Install json extension for testing various formats 80 | statement ok 81 | INSTALL json; 82 | 83 | statement ok 84 | LOAD json; 85 | 86 | # Test JSON format 87 | statement ok 88 | COPY (SELECT * FROM large_data LIMIT 1000) TO '3fs://3fs/test_threefs/io_test/large_data.json' (FORMAT JSON); 89 | 90 | query I 91 | SELECT COUNT(*) FROM read_json_auto('3fs://3fs/test_threefs/io_test/large_data.json'); 92 | ---- 93 | 1000 94 | 95 | # Clean up temporary tables 96 | statement ok 97 | DROP TABLE IF EXISTS large_data; -------------------------------------------------------------------------------- /test/sql/threefs_performance.test: -------------------------------------------------------------------------------- 1 | # name: test/sql/threefs_performance.test 2 | # description: Test 3FS filesystem performance 3 | # group: [threefs] 4 | 5 | require threefs 6 | 7 | # Setup test environment variables 8 | statement ok 9 | SET threefs_cluster='test_cluster'; 10 | 11 | statement ok 12 | SET threefs_mount_root='/3fs'; 13 | 14 | statement ok 15 | SET threefs_use_usrbio=true; 16 | 17 | # Install parquet extension for testing various formats 18 | statement ok 19 | INSTALL parquet; 20 | 21 | statement ok 22 | LOAD parquet; 23 | 24 | # Generate test data for performance testing 25 | statement ok 26 | CREATE OR REPLACE TABLE medium_data AS 27 | SELECT i, 'Row ' || i || repeat(' padding text', 10) as content 28 | FROM range(1, 100000) t(i); 29 | 30 | # Generate local file for comparison 31 | statement ok 32 | COPY medium_data TO '/tmp/local_medium.csv' (FORMAT CSV, HEADER); 33 | 34 | # Generate 3FS file 35 | statement ok 36 | COPY medium_data TO '3fs://3fs/test_threefs/perf_test/medium.csv' (FORMAT CSV, HEADER); 37 | 38 | # Test file reading performance 39 | # Local file reading 40 | statement ok 41 | SELECT COUNT(*) FROM read_csv_auto('/tmp/local_medium.csv'); 42 | 43 | # 3FS file reading 44 | statement ok 45 | SELECT COUNT(*) FROM read_csv_auto('3fs://3fs/test_threefs/perf_test/medium.csv'); 46 | 47 | # Test different buffer sizes 48 | # Small buffer (1KB) 49 | statement ok 50 | SET threefs_buffer_size='1KB'; 51 | 52 | statement ok 53 | SELECT COUNT(*) FROM read_csv_auto('3fs://3fs/test_threefs/perf_test/medium.csv'); 54 | 55 | # Medium buffer (64KB) 56 | statement ok 57 | SET threefs_buffer_size='64KB'; 58 | 59 | statement ok 60 | SELECT COUNT(*) FROM read_csv_auto('3fs://3fs/test_threefs/perf_test/medium.csv'); 61 | 62 | # Large buffer (1MB) 63 | statement ok 64 | SET threefs_buffer_size='1MB'; 65 | 66 | statement ok 67 | SELECT COUNT(*) FROM read_csv_auto('3fs://3fs/test_threefs/perf_test/medium.csv'); 68 | 69 | # Test Parquet format performance 70 | statement ok 71 | COPY medium_data TO '3fs://3fs/test_threefs/perf_test/medium.parquet' (FORMAT PARQUET); 72 | 73 | # Local parquet reading 74 | statement ok 75 | COPY medium_data TO '/tmp/local_medium.parquet' (FORMAT PARQUET); 76 | 77 | statement ok 78 | SELECT COUNT(*) FROM read_parquet('/tmp/local_medium.parquet'); 79 | 80 | # 3FS parquet reading 81 | statement ok 82 | SELECT COUNT(*) FROM read_parquet('3fs://3fs/test_threefs/perf_test/medium.parquet'); 83 | 84 | # Test with filtering (pushdown) 85 | statement ok 86 | SELECT COUNT(*) FROM read_csv_auto('3fs://3fs/test_threefs/perf_test/medium.csv') WHERE i > 50000; 87 | 88 | statement ok 89 | SELECT COUNT(*) FROM read_parquet('3fs://3fs/test_threefs/perf_test/medium.parquet') WHERE i > 50000; 90 | 91 | # Test with projection 92 | statement ok 93 | SELECT AVG(LENGTH(content)) FROM read_csv_auto('3fs://3fs/test_threefs/perf_test/medium.csv') WHERE i % 1000 = 0; 94 | 95 | statement ok 96 | SELECT AVG(LENGTH(content)) FROM read_parquet('3fs://3fs/test_threefs/perf_test/medium.parquet') WHERE i % 1000 = 0; 97 | 98 | # Clean up temporary tables 99 | statement ok 100 | DROP TABLE IF EXISTS medium_data; --------------------------------------------------------------------------------