├── .github ├── FUNDING.yml ├── dependabot.yml └── workflows │ └── ci.yml ├── .gitignore ├── rust-toolchain.toml ├── rustfmt.toml ├── CONTRIBUTING.md ├── src ├── readdir.rs ├── lib.rs ├── async_file.rs ├── file.rs ├── metadata.rs ├── open_options.rs └── client.rs ├── Cargo.toml ├── .taplo.toml ├── README.md └── tests └── main.rs /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | github: [Xuanwo] 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | Cargo.lock 2 | /target 3 | .env 4 | .idea 5 | .DS_Store 6 | -------------------------------------------------------------------------------- /rust-toolchain.toml: -------------------------------------------------------------------------------- 1 | [toolchain] 2 | channel = "stable" 3 | components = ["rustfmt", "clippy"] 4 | -------------------------------------------------------------------------------- /rustfmt.toml: -------------------------------------------------------------------------------- 1 | edition = "2021" 2 | reorder_imports = true 3 | 4 | # format_code_in_doc_comments = true 5 | # group_imports = "StdExternalCrate" 6 | # imports_granularity = "Module" 7 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | # Maintain dependencies for GitHub Actions 4 | - package-ecosystem: "github-actions" 5 | directory: "/" 6 | schedule: 7 | interval: "daily" 8 | 9 | # Maintain dependencies for rust 10 | - package-ecosystem: "cargo" 11 | directory: "/" 12 | schedule: 13 | interval: "daily" 14 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing 2 | 3 | ## Get Started 4 | 5 | This is a Rust project, so [rustup](https://rustup.rs/) is the best place to start. 6 | 7 | This is a pure rust project, so only `cargo` is needed. 8 | 9 | - `cargo check` to analyze the current package and report errors. 10 | - `cargo build` to compile the current package. 11 | - `cargo clippy` to catch common mistakes and improve code. 12 | - `cargo test` to run unit tests. 13 | - `cargo bench` to run benchmark tests. 14 | -------------------------------------------------------------------------------- /src/readdir.rs: -------------------------------------------------------------------------------- 1 | use std::ops::{Deref, DerefMut}; 2 | use std::vec::IntoIter; 3 | 4 | use crate::Metadata; 5 | 6 | /// Readdir is an Iterator of metadata, returned by [`read_dir`][crate::Client::read_dir] 7 | #[derive(Debug)] 8 | pub struct Readdir { 9 | inner: IntoIter, 10 | } 11 | 12 | impl Readdir { 13 | pub fn into_inner(self) -> IntoIter { 14 | self.inner 15 | } 16 | } 17 | 18 | impl From> for Readdir { 19 | fn from(v: Vec) -> Self { 20 | Readdir { 21 | inner: v.into_iter(), 22 | } 23 | } 24 | } 25 | 26 | impl Deref for Readdir { 27 | type Target = IntoIter; 28 | 29 | fn deref(&self) -> &Self::Target { 30 | &self.inner 31 | } 32 | } 33 | 34 | impl DerefMut for Readdir { 35 | fn deref_mut(&mut self) -> &mut Self::Target { 36 | &mut self.inner 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | authors = ["Xuanwo "] 3 | categories = ["command-line-utilities"] 4 | description = "Rust native client to hdfs" 5 | documentation = "https://docs.rs/hdrs" 6 | edition = "2021" 7 | license = "Apache-2.0" 8 | name = "hdrs" 9 | repository = "https://github.com/Xuanwo/hdrs" 10 | version = "0.3.2" 11 | 12 | [package.metadata.docs.rs] 13 | all-features = true 14 | 15 | [features] 16 | async_file = ["dep:futures", "dep:blocking"] 17 | vendored = ["hdfs-sys/vendored"] 18 | 19 | [dependencies] 20 | blocking = { version = "1.3.0", optional = true } 21 | errno = "0.3" 22 | futures = { version = "0.3", optional = true } 23 | hdfs-sys = "0.3" 24 | libc = "0.2" 25 | log = "0.4" 26 | 27 | [dev-dependencies] 28 | anyhow = "1" 29 | dotenv = "0.15" 30 | env_logger = "0.11" 31 | futures = "0.3" 32 | rand = "0.8" 33 | tokio = { version = "1", features = ["full"] } 34 | uuid = { version = "1", features = ["v4", "fast-rng", "macro-diagnostics"] } 35 | -------------------------------------------------------------------------------- /.taplo.toml: -------------------------------------------------------------------------------- 1 | include = ["Cargo.toml", "**/*.toml"] 2 | 3 | [formatting] 4 | # Align consecutive entries vertically. 5 | align_entries = false 6 | # Append trailing commas for multi-line arrays. 7 | array_trailing_comma = true 8 | # Expand arrays to multiple lines that exceed the maximum column width. 9 | array_auto_expand = true 10 | # Collapse arrays that don't exceed the maximum column width and don't contain comments. 11 | array_auto_collapse = true 12 | # Omit white space padding from single-line arrays 13 | compact_arrays = true 14 | # Omit white space padding from the start and end of inline tables. 15 | compact_inline_tables = false 16 | # Maximum column width in characters, affects array expansion and collapse, this doesn't take whitespace into account. 17 | # Note that this is not set in stone, and works on a best-effort basis. 18 | column_width = 80 19 | # Indent based on tables and arrays of tables and their subtables, subtables out of order are not indented. 20 | indent_tables = false 21 | # The substring that is used for indentation, should be tabs or spaces (but technically can be anything). 22 | indent_string = ' ' 23 | # Add trailing newline at the end of the file if not present. 24 | trailing_newline = true 25 | # Alphabetically reorder keys that are not separated by empty lines. 26 | reorder_keys = true 27 | # Maximum amount of allowed consecutive blank lines. This does not affect the whitespace at the end of the document, as it is always stripped. 28 | allowed_blank_lines = 2 29 | # Use CRLF for line endings. 30 | crlf = false 31 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | //! hdrs is a HDFS Native Client in Rust based on [hdfs-sys](https://github.com/Xuanwo/hdfs-sys). 2 | //! 3 | //! # Examples 4 | //! 5 | //! ```no_run 6 | //! use std::io::{Read, Write}; 7 | //! 8 | //! use hdrs::Client; 9 | //! # fn main() -> Result<(), Box> { 10 | //! use hdrs::ClientBuilder; 11 | //! let fs = ClientBuilder::new("default").connect()?; 12 | //! 13 | //! let mut f = fs 14 | //! .open_file() 15 | //! .write(true) 16 | //! .create(true) 17 | //! .open("/tmp/hello.txt")?; 18 | //! let n = f.write("Hello, World!".as_bytes())?; 19 | //! 20 | //! let mut f = fs.open_file().read(true).open("/tmp/hello.txt")?; 21 | //! let mut buf = vec![0; 1024]; 22 | //! let n = f.read(&mut buf)?; 23 | //! 24 | //! let _ = fs.remove_file("/tmp/hello.txt")?; 25 | //! # Ok(()) 26 | //! # } 27 | //! ``` 28 | //! 29 | //! # Features 30 | //! 31 | //! - `async_file`: Enable async operation support 32 | //! - `vendored`: Ignore lib loading logic, enforce to complie and staticly link libhdfs 33 | //! 34 | //! # Compiletime 35 | //! `hdrs` depends on [hdfs-sys](https://github.com/Xuanwo/hdfs-sys) which links `libjvm` to work. 36 | //! 37 | //! Please make sure `JAVA_HOME` is set correctly: 38 | //! 39 | //! ```shell 40 | //! export JAVA_HOME=/path/to/java 41 | //! export LD_LIBRARY_PATH=${JAVA_HOME}/lib/server:${LD_LIBRARY_PATH} 42 | //! ``` 43 | //! 44 | //! - Enable `vendored` feature to compile `libhdfs` and link in static. 45 | //! - Specify `HDFS_LIB_DIR` or `HADOOP_HOME` to load from specified path instead of compile. 46 | //! - Specify `HDFS_STATIC=1` to link `libhdfs` in static. 47 | //! - And finally, we will fallback to compile `libhdfs` and link in static. 48 | //! 49 | //! # Runtime 50 | //! 51 | //! `hdrs` depends on [hdfs-sys](https://github.com/Xuanwo/hdfs-sys) which uses JNI to call functions provided by jars that provided by hadoop releases. 52 | //! 53 | //! Please also make sure `HADOOP_HOME`, `LD_LIBRARY_PATH`, `CLASSPATH` is set correctly during runtime: 54 | //! 55 | //! ```shell 56 | //! export HADOOP_HOME=/path/to/hadoop 57 | //! export LD_LIBRARY_PATH=${JAVA_HOME}/lib/server:${LD_LIBRARY_PATH} 58 | //! export CLASSPATH=$(${HADOOP_HOME}/bin/hadoop classpath --glob) 59 | //! ``` 60 | //! 61 | //! If `libhdfs` is configued to link dynamiclly, please also add `${HADOOP_HOME}/lib/native` in `LD_LIBRARY_PATH` to make sure linker can find `libhdfs.so`: 62 | //! 63 | //! ```shell 64 | //! export LD_LIBRARY_PATH=${JAVA_HOME}/lib/server:${HADOOP_HOME}/lib/native:${LD_LIBRARY_PATH} 65 | //! ``` 66 | 67 | mod client; 68 | pub use client::{Client, ClientBuilder}; 69 | 70 | mod file; 71 | pub use file::File; 72 | 73 | #[cfg(feature = "async_file")] 74 | mod async_file; 75 | #[cfg(feature = "async_file")] 76 | pub use async_file::AsyncFile; 77 | 78 | mod open_options; 79 | pub use open_options::OpenOptions; 80 | 81 | mod metadata; 82 | pub use metadata::Metadata; 83 | 84 | mod readdir; 85 | pub use readdir::Readdir; 86 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # hdrs   [![Build Status]][actions] [![Latest Version]][crates.io] 2 | 3 | [Build Status]: https://img.shields.io/github/actions/workflow/status/Xuanwo/hdrs/ci.yml?branch=main 4 | [actions]: https://github.com/Xuanwo/hdrs/actions?query=branch%3Amain 5 | [Latest Version]: https://img.shields.io/crates/v/hdrs.svg 6 | [crates.io]: https://crates.io/crates/hdrs 7 | 8 | HDFS Native Client in Rust based on [hdfs-sys](https://github.com/Xuanwo/hdfs-sys). 9 | 10 | ## Quick Start 11 | 12 | ```rust 13 | use std::io::{Read, Write}; 14 | 15 | use hdrs::Client; 16 | 17 | fn main() -> Result<(), Box> { 18 | let fs = Client::connect("hdfs://127.0.0.1:9000")?; 19 | 20 | let mut f = fs.open_file().write(true).create(true).open("/tmp/hello.txt")?; 21 | let n = f.write("Hello, World!".as_bytes())?; 22 | 23 | let mut f = fs.open_file().read(true).open("/tmp/hello.txt")?; 24 | let mut buf = vec![0; 1024]; 25 | let n = f.read(&mut buf)?; 26 | 27 | let _ = fs.remove_file("/tmp/hello.txt")?; 28 | 29 | Ok(()) 30 | } 31 | ``` 32 | 33 | ## Compiletime 34 | 35 | `hdrs` depends on [hdfs-sys](https://github.com/Xuanwo/hdfs-sys) which links `libjvm` to work. 36 | 37 | Please make sure `JAVA_HOME` is set correctly: 38 | 39 | ```shell 40 | export JAVA_HOME=/path/to/java 41 | export LD_LIBRARY_PATH=${JAVA_HOME}/lib/server:${LD_LIBRARY_PATH} 42 | ``` 43 | 44 | - Enable `vendored` feature to compile `libhdfs` and link in static. 45 | - Specify `HDFS_LIB_DIR` or `HADOOP_HOME` to load from specified path instead of compile. 46 | - Specify `HDFS_STATIC=1` to link `libhdfs` in static. 47 | - And finally, we will fallback to compile `libhdfs` and link in static. 48 | 49 | ## Runtime 50 | 51 | `hdrs` depends on [hdfs-sys](https://github.com/Xuanwo/hdfs-sys) which uses JNI to call functions provided by jars that provided by hadoop releases. 52 | 53 | Please also make sure `HADOOP_HOME`, `LD_LIBRARY_PATH`, `CLASSPATH` is set correctly during runtime: 54 | 55 | ```shell 56 | export HADOOP_HOME=/path/to/hadoop 57 | export LD_LIBRARY_PATH=${JAVA_HOME}/lib/server:${LD_LIBRARY_PATH} 58 | export CLASSPATH=$(${HADOOP_HOME}/bin/hadoop classpath --glob) 59 | ``` 60 | 61 | If `libhdfs` is configued to link dynamiclly, please also add `${HADOOP_HOME}/lib/native` in `LD_LIBRARY_PATH` to make sure linker can find `libhdfs.so`: 62 | 63 | ```shell 64 | export LD_LIBRARY_PATH=${JAVA_HOME}/lib/server:${HADOOP_HOME}/lib/native:${LD_LIBRARY_PATH} 65 | ``` 66 | 67 | ## Version Requirement 68 | 69 | `hdrs` requires at least hadoop 2.3 to work: hadoop 2.2 doesn't handle FileNotFound correctly. 70 | 71 | `hdrs` requires at least hadoop 2.6 to work: Older version of hadoop doesn't handle errno correctly. In older versions, hadoop will set errno to `3` if input path is an empty dir. 72 | 73 | ## Contributing 74 | 75 | Check out the [CONTRIBUTING.md](./CONTRIBUTING.md) guide for more details on getting started with contributing to this project. 76 | 77 | ## Getting help 78 | 79 | Submit [issues](https://github.com/Xuanwo/hdrs/issues/new/choose) for bug report or asking questions in [discussion](https://github.com/Xuanwo/hdrs/discussions/new?category=q-a). 80 | 81 | ## Acknowledgment 82 | 83 | This project is highly inspired by [clang-sys](https://github.com/KyleMayes/clang-sys) 84 | 85 | #### License 86 | 87 | 88 | Licensed under Apache License, Version 2.0. 89 | 90 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | pull_request: 8 | branches: 9 | - main 10 | 11 | concurrency: 12 | group: ${{ github.workflow }}-${{ github.ref }}-${{ github.event_name }} 13 | cancel-in-progress: true 14 | 15 | jobs: 16 | check: 17 | runs-on: ubuntu-latest 18 | steps: 19 | - uses: actions/checkout@v4 20 | 21 | - name: Format 22 | run: cargo fmt --all -- --check 23 | 24 | - name: Clippy with all features 25 | run: cargo clippy --all-features --all-targets -- -D warnings 26 | 27 | test-default: 28 | runs-on: ubuntu-latest 29 | steps: 30 | - uses: actions/checkout@v4 31 | 32 | - name: Setup java env 33 | uses: actions/setup-java@v4 34 | with: 35 | distribution: temurin 36 | java-version: "11" 37 | - name: Setup hadoop env 38 | shell: bash 39 | run: | 40 | curl -LsSf https://dlcdn.apache.org/hadoop/common/hadoop-3.3.5/hadoop-3.3.5.tar.gz | tar zxf - -C /home/runner 41 | echo "HADOOP_HOME=/home/runner/hadoop-3.3.5" >> $GITHUB_ENV 42 | 43 | - name: Test 44 | shell: bash 45 | run: | 46 | export CLASSPATH=$(${HADOOP_HOME}/bin/hadoop classpath --glob) 47 | cargo test --features async_file -- --nocapture 48 | env: 49 | LD_LIBRARY_PATH: ${{ env.JAVA_HOME }}/lib/server:${{ env.HADOOP_HOME}}/lib/native 50 | HDRS_TEST: on 51 | HDRS_NAMENODE: default 52 | HDRS_WORKDIR: /tmp/hdrs/ 53 | 54 | test-cluster: 55 | runs-on: ubuntu-latest 56 | steps: 57 | - uses: actions/checkout@v4 58 | 59 | - name: Configure Hdfs 60 | # namenode will use ports: 9870, 9000, 8020 61 | # datanode will use ports: 9864 62 | run: | 63 | docker run -d \ 64 | --name namenode \ 65 | --network host \ 66 | -e CLUSTER_NAME=test \ 67 | -e WEBHDFS_CONF_dfs_webhdfs_enabled=true \ 68 | -e CORE_CONF_hadoop_http_staticuser_user=root \ 69 | -e HDFS_CONF_dfs_permissions_enabled=false \ 70 | bde2020/hadoop-namenode:2.0.0-hadoop3.1.3-java8 71 | 72 | docker run -d \ 73 | --name datanode \ 74 | --network host \ 75 | -e CLUSTER_NAME=test \ 76 | -e WEBHDFS_CONF_dfs_webhdfs_enabled=true \ 77 | -e CORE_CONF_hadoop_http_staticuser_user=root \ 78 | -e HDFS_CONF_dfs_permissions_enabled=false \ 79 | bde2020/hadoop-datanode:2.0.0-hadoop3.1.3-java8 80 | 81 | curl --retry 30 --retry-delay 1 --retry-connrefused http://localhost:9870 82 | 83 | - name: Setup java env 84 | uses: actions/setup-java@v4 85 | with: 86 | distribution: temurin 87 | java-version: "11" 88 | - name: Setup hadoop env 89 | shell: bash 90 | run: | 91 | curl -LsSf https://archive.apache.org/dist/hadoop/common/hadoop-3.1.3/hadoop-3.1.3.tar.gz | tar zxf - -C /home/runner 92 | echo "HADOOP_HOME=/home/runner/hadoop-3.1.3" >> $GITHUB_ENV 93 | 94 | - name: Test 95 | shell: bash 96 | run: | 97 | export CLASSPATH=$(${HADOOP_HOME}/bin/hadoop classpath --glob) 98 | cargo test --features async_file -- --nocapture 99 | env: 100 | LD_LIBRARY_PATH: ${{ env.JAVA_HOME }}/lib/server:${{ env.HADOOP_HOME }}/lib/native 101 | HDRS_TEST: on 102 | HDRS_INTEGRATED_TEST: on 103 | HDRS_NAMENODE: hdfs://localhost:8020 104 | HDRS_WORKDIR: /tmp/hdrs/ 105 | 106 | test-vendored: 107 | runs-on: ubuntu-latest 108 | steps: 109 | - uses: actions/checkout@v4 110 | 111 | - name: Setup java env 112 | uses: actions/setup-java@v4 113 | with: 114 | distribution: temurin 115 | java-version: "11" 116 | - name: Setup hadoop env 117 | shell: bash 118 | run: | 119 | curl -LsSf https://dlcdn.apache.org/hadoop/common/hadoop-3.3.5/hadoop-3.3.5.tar.gz | tar zxf - -C /home/runner 120 | echo "HADOOP_HOME=/home/runner/hadoop-3.3.5" >> $GITHUB_ENV 121 | 122 | - name: Test 123 | shell: bash 124 | run: | 125 | export CLASSPATH=$(${HADOOP_HOME}/bin/hadoop classpath --glob) 126 | cargo test --features async_file,vendored -- --nocapture 127 | env: 128 | # If vendored has been enabled, we don't need to load native libs 129 | LD_LIBRARY_PATH: ${{ env.JAVA_HOME }}/lib/server 130 | HDRS_TEST: on 131 | HDRS_NAMENODE: default 132 | HDRS_WORKDIR: /tmp/hdrs/ 133 | -------------------------------------------------------------------------------- /src/async_file.rs: -------------------------------------------------------------------------------- 1 | use std::io::{Read, Result, Seek, SeekFrom, Write}; 2 | use std::pin::Pin; 3 | use std::sync::Arc; 4 | use std::task::{Context, Poll}; 5 | 6 | use blocking::Unblock; 7 | use futures::lock::Mutex; 8 | use futures::{ready, AsyncSeek}; 9 | 10 | use crate::File; 11 | 12 | /// A wrapper around `Arc` that implements `Read`, `Write`, and `Seek`. 13 | struct ArcFile(Arc); 14 | 15 | impl Read for ArcFile { 16 | fn read(&mut self, buf: &mut [u8]) -> Result { 17 | (&*self.0).read(buf) 18 | } 19 | } 20 | 21 | impl Write for ArcFile { 22 | fn write(&mut self, buf: &[u8]) -> Result { 23 | (&*self.0).write(buf) 24 | } 25 | 26 | fn flush(&mut self) -> Result<()> { 27 | (&*self.0).flush() 28 | } 29 | } 30 | 31 | impl Seek for ArcFile { 32 | fn seek(&mut self, pos: SeekFrom) -> Result { 33 | (&*self.0).seek(pos) 34 | } 35 | } 36 | 37 | /// Async version of file. 38 | /// 39 | /// Most code are inspired by [async-fs](https://github.com/smol-rs/async-fs). 40 | pub struct AsyncFile { 41 | /// Always accessible reference to the file. 42 | /// 43 | /// Not used for now, just save for future use. 44 | _file: Arc, 45 | 46 | /// Performs blocking I/O operations on a thread pool. 47 | unblock: Mutex>, 48 | 49 | /// Logical file cursor, tracked when reading from the file. 50 | /// 51 | /// This will be set to an error if the file is not seekable. 52 | read_pos: Option>, 53 | 54 | /// Set to `true` if the file needs flushing. 55 | is_dirty: bool, 56 | } 57 | 58 | impl AsyncFile { 59 | /// Creates an async file from a blocking file. 60 | pub(crate) fn new(inner: File, is_dirty: bool) -> AsyncFile { 61 | let file = Arc::new(inner); 62 | let unblock = Mutex::new(Unblock::new(ArcFile(file.clone()))); 63 | let read_pos = None; 64 | AsyncFile { 65 | _file: file, 66 | unblock, 67 | read_pos, 68 | is_dirty, 69 | } 70 | } 71 | 72 | /// Repositions the cursor after reading. 73 | /// 74 | /// When reading from a file, actual file reads run asynchronously in the background, which 75 | /// means the real file cursor is usually ahead of the logical cursor, and the data between 76 | /// them is buffered in memory. This kind of buffering is an important optimization. 77 | /// 78 | /// After reading ends, if we decide to perform a write or a seek operation, the real file 79 | /// cursor must first be repositioned back to the correct logical position. 80 | fn poll_reposition(&mut self, cx: &mut Context<'_>) -> Poll> { 81 | if let Some(Ok(read_pos)) = self.read_pos { 82 | ready!(Pin::new(self.unblock.get_mut()).poll_seek(cx, SeekFrom::Start(read_pos)))?; 83 | } 84 | self.read_pos = None; 85 | Poll::Ready(Ok(())) 86 | } 87 | } 88 | 89 | impl futures::AsyncRead for AsyncFile { 90 | fn poll_read( 91 | mut self: Pin<&mut Self>, 92 | cx: &mut Context<'_>, 93 | buf: &mut [u8], 94 | ) -> Poll> { 95 | // Before reading begins, remember the current cursor position. 96 | if self.read_pos.is_none() { 97 | // Initialize the logical cursor to the current position in the file. 98 | self.read_pos = Some(ready!(self.as_mut().poll_seek(cx, SeekFrom::Current(0)))); 99 | } 100 | 101 | let n = ready!(Pin::new(self.unblock.get_mut()).poll_read(cx, buf))?; 102 | 103 | // Update the logical cursor if the file is seekable. 104 | if let Some(Ok(pos)) = self.read_pos.as_mut() { 105 | *pos += n as u64; 106 | } 107 | 108 | Poll::Ready(Ok(n)) 109 | } 110 | } 111 | 112 | impl futures::AsyncSeek for AsyncFile { 113 | fn poll_seek( 114 | mut self: Pin<&mut Self>, 115 | cx: &mut Context<'_>, 116 | pos: SeekFrom, 117 | ) -> Poll> { 118 | ready!(self.poll_reposition(cx))?; 119 | Pin::new(self.unblock.get_mut()).poll_seek(cx, pos) 120 | } 121 | } 122 | 123 | impl futures::AsyncWrite for AsyncFile { 124 | fn poll_write( 125 | mut self: Pin<&mut Self>, 126 | cx: &mut Context<'_>, 127 | buf: &[u8], 128 | ) -> Poll> { 129 | ready!(self.poll_reposition(cx))?; 130 | self.is_dirty = true; 131 | Pin::new(self.unblock.get_mut()).poll_write(cx, buf) 132 | } 133 | 134 | fn poll_flush(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { 135 | if self.is_dirty { 136 | ready!(Pin::new(self.unblock.get_mut()).poll_flush(cx))?; 137 | self.is_dirty = false; 138 | } 139 | Poll::Ready(Ok(())) 140 | } 141 | 142 | fn poll_close(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { 143 | Pin::new(self.unblock.get_mut()).poll_close(cx) 144 | } 145 | } 146 | 147 | #[cfg(test)] 148 | mod tests { 149 | use futures::{AsyncReadExt, AsyncSeekExt, AsyncWriteExt}; 150 | 151 | use super::*; 152 | use crate::client::ClientBuilder; 153 | 154 | #[tokio::test] 155 | async fn test_file_build() { 156 | let _ = env_logger::try_init(); 157 | 158 | let fs = ClientBuilder::new("default") 159 | .connect() 160 | .expect("init success"); 161 | 162 | let path = uuid::Uuid::new_v4().to_string(); 163 | 164 | let _ = fs 165 | .open_file() 166 | .create(true) 167 | .write(true) 168 | .async_open(&format!("/tmp/{path}")) 169 | .await 170 | .expect("open file success"); 171 | } 172 | 173 | #[tokio::test] 174 | async fn test_file_write() { 175 | let _ = env_logger::try_init(); 176 | 177 | let fs = ClientBuilder::new("default") 178 | .connect() 179 | .expect("init success"); 180 | 181 | let path = uuid::Uuid::new_v4().to_string(); 182 | 183 | let mut f = fs 184 | .open_file() 185 | .create(true) 186 | .write(true) 187 | .async_open(&format!("/tmp/{path}")) 188 | .await 189 | .expect("open file success"); 190 | 191 | let n = f 192 | .write("Hello, World!".as_bytes()) 193 | .await 194 | .expect("write must success"); 195 | assert_eq!(n, 13) 196 | } 197 | 198 | #[tokio::test] 199 | async fn test_file_read() { 200 | let _ = env_logger::try_init(); 201 | 202 | let fs = ClientBuilder::new("default") 203 | .connect() 204 | .expect("init success"); 205 | 206 | let path = uuid::Uuid::new_v4().to_string(); 207 | 208 | { 209 | let mut f = fs 210 | .open_file() 211 | .create(true) 212 | .write(true) 213 | .async_open(&format!("/tmp/{path}")) 214 | .await 215 | .expect("open file success"); 216 | 217 | f.write_all("Hello, World!".as_bytes()) 218 | .await 219 | .expect("write must success"); 220 | f.close().await.expect("close must success"); 221 | } 222 | 223 | let mut f = fs 224 | .open_file() 225 | .read(true) 226 | .async_open(&format!("/tmp/{path}")) 227 | .await 228 | .expect("open file success"); 229 | 230 | let _ = f.seek(SeekFrom::Start(0)).await.expect("seek must success"); 231 | let mut s = String::new(); 232 | let n = f.read_to_string(&mut s).await.expect("read must succeed"); 233 | assert_eq!(n, 13); 234 | assert_eq!(s, "Hello, World!"); 235 | } 236 | } 237 | -------------------------------------------------------------------------------- /src/file.rs: -------------------------------------------------------------------------------- 1 | use std::io::{Error, ErrorKind, Read, Result, Seek, SeekFrom, Write}; 2 | use std::ptr; 3 | 4 | use hdfs_sys::*; 5 | use libc::c_void; 6 | use log::debug; 7 | 8 | use crate::Client; 9 | 10 | // at most 2^30 bytes, ~1GB 11 | const FILE_LIMIT: usize = 1073741824; 12 | 13 | /// File will hold the underlying pointer to `hdfsFile`. 14 | /// 15 | /// The internal file will be closed while `Drop`, so their is no need to close it manually. 16 | /// 17 | /// # Examples 18 | /// 19 | /// ```no_run 20 | /// use hdrs::{Client, ClientBuilder}; 21 | /// 22 | /// let fs = ClientBuilder::new("default") 23 | /// .with_user("default") 24 | /// .with_kerberos_ticket_cache_path("/tmp/krb5_111") 25 | /// .connect() 26 | /// .expect("client connect succeed"); 27 | /// let mut f = fs 28 | /// .open_file() 29 | /// .read(true) 30 | /// .open("/tmp/hello.txt") 31 | /// .expect("must open success"); 32 | /// ``` 33 | #[derive(Debug)] 34 | pub struct File { 35 | fs: hdfsFS, 36 | f: hdfsFile, 37 | path: String, 38 | } 39 | 40 | /// HDFS's client handle is thread safe. 41 | unsafe impl Send for File {} 42 | unsafe impl Sync for File {} 43 | 44 | impl Drop for File { 45 | fn drop(&mut self) { 46 | unsafe { 47 | debug!("file has been closed"); 48 | let _ = hdfsCloseFile(self.fs, self.f); 49 | // hdfsCloseFile will free self.f no matter success or failed. 50 | self.f = ptr::null_mut(); 51 | } 52 | } 53 | } 54 | 55 | impl File { 56 | pub(crate) fn new(fs: hdfsFS, f: hdfsFile, path: &str) -> Self { 57 | File { 58 | fs, 59 | f, 60 | path: path.to_string(), 61 | } 62 | } 63 | 64 | /// Works only for files opened in read-only mode. 65 | fn inner_seek(&self, offset: i64) -> Result<()> { 66 | let n = unsafe { hdfsSeek(self.fs, self.f, offset) }; 67 | 68 | if n == -1 { 69 | return Err(Error::last_os_error()); 70 | } 71 | 72 | Ok(()) 73 | } 74 | 75 | fn tell(&self) -> Result { 76 | let n = unsafe { hdfsTell(self.fs, self.f) }; 77 | 78 | if n == -1 { 79 | return Err(Error::last_os_error()); 80 | } 81 | 82 | Ok(n) 83 | } 84 | 85 | pub fn read_at(&self, buf: &mut [u8], offset: u64) -> Result { 86 | let n = unsafe { 87 | hdfsPread( 88 | self.fs, 89 | self.f, 90 | offset as i64, 91 | buf.as_ptr() as *mut c_void, 92 | buf.len().min(FILE_LIMIT) as i32, 93 | ) 94 | }; 95 | 96 | if n == -1 { 97 | return Err(Error::last_os_error()); 98 | } 99 | 100 | Ok(n as usize) 101 | } 102 | } 103 | 104 | impl Read for File { 105 | fn read(&mut self, buf: &mut [u8]) -> Result { 106 | let n = unsafe { 107 | hdfsRead( 108 | self.fs, 109 | self.f, 110 | buf.as_ptr() as *mut c_void, 111 | buf.len().min(FILE_LIMIT) as i32, 112 | ) 113 | }; 114 | 115 | if n == -1 { 116 | return Err(Error::last_os_error()); 117 | } 118 | 119 | Ok(n as usize) 120 | } 121 | } 122 | 123 | impl Seek for File { 124 | fn seek(&mut self, pos: SeekFrom) -> Result { 125 | match pos { 126 | SeekFrom::Start(n) => { 127 | self.inner_seek(n as i64)?; 128 | Ok(n) 129 | } 130 | SeekFrom::Current(n) => { 131 | let current = self.tell()?; 132 | let offset = (current + n) as u64; 133 | self.inner_seek(offset as i64)?; 134 | Ok(offset) 135 | } 136 | SeekFrom::End(n) => { 137 | let meta = Client::new(self.fs).metadata(&self.path)?; 138 | let offset = meta.len() as i64 + n; 139 | self.inner_seek(offset)?; 140 | Ok(offset as u64) 141 | } 142 | } 143 | } 144 | } 145 | 146 | impl Write for File { 147 | fn write(&mut self, buf: &[u8]) -> Result { 148 | let n = unsafe { 149 | hdfsWrite( 150 | self.fs, 151 | self.f, 152 | buf.as_ptr() as *const c_void, 153 | buf.len().min(FILE_LIMIT) as i32, 154 | ) 155 | }; 156 | 157 | if n == -1 { 158 | return Err(Error::last_os_error()); 159 | } 160 | 161 | Ok(n as usize) 162 | } 163 | 164 | fn flush(&mut self) -> Result<()> { 165 | let n = unsafe { hdfsFlush(self.fs, self.f) }; 166 | 167 | if n == -1 { 168 | return Err(Error::last_os_error()); 169 | } 170 | 171 | Ok(()) 172 | } 173 | } 174 | 175 | impl Read for &File { 176 | fn read(&mut self, buf: &mut [u8]) -> Result { 177 | let n = unsafe { 178 | hdfsRead( 179 | self.fs, 180 | self.f, 181 | buf.as_ptr() as *mut c_void, 182 | buf.len().min(FILE_LIMIT) as i32, 183 | ) 184 | }; 185 | 186 | if n == -1 { 187 | return Err(Error::last_os_error()); 188 | } 189 | 190 | Ok(n as usize) 191 | } 192 | } 193 | 194 | impl Seek for &File { 195 | fn seek(&mut self, pos: SeekFrom) -> Result { 196 | match pos { 197 | SeekFrom::Start(n) => { 198 | self.inner_seek(n as i64)?; 199 | Ok(n) 200 | } 201 | SeekFrom::Current(n) => { 202 | let current = self.tell()?; 203 | let offset = (current + n) as u64; 204 | self.inner_seek(offset as i64)?; 205 | Ok(offset) 206 | } 207 | SeekFrom::End(_) => Err(Error::new( 208 | ErrorKind::Unsupported, 209 | "hdfs doesn't support seek from end", 210 | )), 211 | } 212 | } 213 | } 214 | 215 | impl Write for &File { 216 | fn write(&mut self, buf: &[u8]) -> Result { 217 | let n = unsafe { 218 | hdfsWrite( 219 | self.fs, 220 | self.f, 221 | buf.as_ptr() as *const c_void, 222 | buf.len().min(FILE_LIMIT) as i32, 223 | ) 224 | }; 225 | 226 | if n == -1 { 227 | return Err(Error::last_os_error()); 228 | } 229 | 230 | Ok(n as usize) 231 | } 232 | 233 | fn flush(&mut self) -> Result<()> { 234 | let n = unsafe { hdfsFlush(self.fs, self.f) }; 235 | 236 | if n == -1 { 237 | return Err(Error::last_os_error()); 238 | } 239 | 240 | Ok(()) 241 | } 242 | } 243 | 244 | #[cfg(test)] 245 | mod tests { 246 | use super::*; 247 | use crate::client::ClientBuilder; 248 | 249 | #[test] 250 | fn test_file_build() { 251 | let _ = env_logger::try_init(); 252 | 253 | let fs = ClientBuilder::new("default") 254 | .connect() 255 | .expect("init success"); 256 | 257 | let path = uuid::Uuid::new_v4().to_string(); 258 | 259 | let f = fs 260 | .open_file() 261 | .create(true) 262 | .write(true) 263 | .open(&format!("/tmp/{path}")) 264 | .expect("open file success"); 265 | 266 | assert!(!f.f.is_null()); 267 | assert!(!f.fs.is_null()); 268 | } 269 | 270 | #[test] 271 | fn test_file_write() { 272 | let _ = env_logger::try_init(); 273 | 274 | let fs = ClientBuilder::new("default") 275 | .connect() 276 | .expect("init success"); 277 | 278 | let path = uuid::Uuid::new_v4().to_string(); 279 | 280 | let mut f = fs 281 | .open_file() 282 | .create(true) 283 | .write(true) 284 | .open(&format!("/tmp/{path}")) 285 | .expect("open file success"); 286 | 287 | let n = f 288 | .write("Hello, World!".as_bytes()) 289 | .expect("write must success"); 290 | assert_eq!(n, 13) 291 | } 292 | 293 | #[test] 294 | fn test_file_read() { 295 | let _ = env_logger::try_init(); 296 | 297 | let fs = ClientBuilder::new("default") 298 | .connect() 299 | .expect("init success"); 300 | 301 | let path = uuid::Uuid::new_v4().to_string(); 302 | 303 | let mut f = fs 304 | .open_file() 305 | .create(true) 306 | .write(true) 307 | .open(&format!("/tmp/{path}")) 308 | .expect("open file success"); 309 | 310 | let n = f 311 | .write("Hello, World!".as_bytes()) 312 | .expect("write must success"); 313 | assert_eq!(n, 13) 314 | } 315 | } 316 | -------------------------------------------------------------------------------- /src/metadata.rs: -------------------------------------------------------------------------------- 1 | use std::ffi::CStr; 2 | use std::time::{Duration, SystemTime, UNIX_EPOCH}; 3 | 4 | use hdfs_sys::*; 5 | 6 | /// Metadata of a path. 7 | #[derive(Debug, Clone, PartialEq, Eq)] 8 | pub struct Metadata { 9 | /// the name of the file, like `file:/path/to/file` 10 | path: String, 11 | /// the size of the file in bytes 12 | size: i64, 13 | /// file or directory 14 | kind: u32, 15 | /// the permissions associated with the file 16 | permissions: i16, 17 | /// the count of replicas 18 | replication: i16, 19 | /// the block size for the file 20 | block_size: i64, 21 | /// the owner of the file 22 | owner: String, 23 | /// the group associated with the file 24 | group: String, 25 | /// the last modification time for the file in seconds 26 | last_mod: i64, 27 | /// the last access time for the file in seconds 28 | last_access: i64, 29 | } 30 | 31 | impl Metadata { 32 | /// the path of the file, like `/path/to/file` 33 | /// 34 | /// # Notes 35 | /// 36 | /// Hadoop has [restrictions](https://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-common/filesystem/introduction.html) of path name: 37 | /// 38 | /// - A Path is comprised of Path elements separated by "/". 39 | /// - A path element is a unicode string of 1 or more characters. 40 | /// - Path element MUST NOT include the characters ":" or "/". 41 | /// - Path element SHOULD NOT include characters of ASCII/UTF-8 value 0-31 . 42 | /// - Path element MUST NOT be "." or ".." 43 | /// - Note also that the Azure blob store documents say that paths SHOULD NOT use a trailing "." (as their .NET URI class strips it). 44 | /// - Paths are compared based on unicode code-points. 45 | /// - Case-insensitive and locale-specific comparisons MUST NOT not be used. 46 | pub fn path(&self) -> &str { 47 | &self.path 48 | } 49 | 50 | /// the size of the file in bytes 51 | /// 52 | /// Metadata is not a collection, so we will not provide `is_empty`. 53 | /// Keep the same style with `std::fs::File` 54 | #[allow(clippy::len_without_is_empty)] 55 | pub fn len(&self) -> u64 { 56 | self.size as u64 57 | } 58 | 59 | /// file or directory 60 | pub fn is_dir(&self) -> bool { 61 | self.kind == tObjectKind_kObjectKindDirectory 62 | } 63 | 64 | /// file or directory 65 | pub fn is_file(&self) -> bool { 66 | self.kind == tObjectKind_kObjectKindFile 67 | } 68 | 69 | /// the permissions associated with the file 70 | pub fn permissions(&self) -> i16 { 71 | self.permissions 72 | } 73 | 74 | /// the count of replicas 75 | pub fn replication(&self) -> i16 { 76 | self.replication 77 | } 78 | 79 | /// the block size for the file 80 | pub fn block_size(&self) -> i64 { 81 | self.block_size 82 | } 83 | 84 | /// the owner of the file 85 | pub fn owner(&self) -> &str { 86 | &self.owner 87 | } 88 | 89 | /// the group associated with the file 90 | pub fn group(&self) -> &str { 91 | &self.group 92 | } 93 | 94 | /// the last modification time for the file in seconds 95 | pub fn modified(&self) -> SystemTime { 96 | UNIX_EPOCH 97 | .checked_add(Duration::from_secs(self.last_mod as u64)) 98 | .expect("must be valid SystemTime") 99 | } 100 | 101 | /// the last access time for the file in seconds 102 | pub fn accessed(&self) -> SystemTime { 103 | UNIX_EPOCH 104 | .checked_add(Duration::from_secs(self.last_access as u64)) 105 | .expect("must be valid SystemTime") 106 | } 107 | } 108 | 109 | impl From for Metadata { 110 | fn from(hfi: hdfsFileInfo) -> Self { 111 | Self { 112 | path: { 113 | let p = unsafe { 114 | CStr::from_ptr(hfi.mName) 115 | .to_str() 116 | .expect("hdfs owner must be valid utf-8") 117 | }; 118 | 119 | match p.find(':') { 120 | None => p.to_string(), 121 | Some(idx) => match &p[..idx] { 122 | // `file:/path/to/file` => `/path/to/file` 123 | "file" => p[idx + 1..].to_string(), 124 | // `hdfs://127.0.0.1:9000/path/to/file` => `/path/to/file` 125 | _ => { 126 | // length of `hdfs://` 127 | let scheme = idx + 2; 128 | // the first occur of `/` in `127.0.0.1:9000/path/to/file` 129 | let endpoint = &p[scheme + 1..] 130 | .find('/') 131 | .expect("hdfs must returns an absolute path"); 132 | p[scheme + endpoint + 1..].to_string() 133 | } 134 | }, 135 | } 136 | }, 137 | size: hfi.mSize, 138 | kind: hfi.mKind, 139 | permissions: hfi.mPermissions, 140 | replication: hfi.mReplication, 141 | block_size: hfi.mBlockSize, 142 | owner: unsafe { 143 | CStr::from_ptr(hfi.mOwner) 144 | .to_str() 145 | .expect("hdfs owner must be valid utf-8") 146 | .into() 147 | }, 148 | group: unsafe { 149 | CStr::from_ptr(hfi.mGroup) 150 | .to_str() 151 | .expect("hdfs owner must be valid utf-8") 152 | .into() 153 | }, 154 | last_mod: hfi.mLastMod, 155 | last_access: hfi.mLastAccess, 156 | } 157 | } 158 | } 159 | 160 | #[cfg(test)] 161 | mod tests { 162 | use std::ffi::CString; 163 | 164 | use super::*; 165 | 166 | #[test] 167 | fn test_from_hdfs_file_info() -> anyhow::Result<()> { 168 | let cases = vec![ 169 | ( 170 | hdfsFileInfo { 171 | mKind: 0, 172 | mName: CString::new("file:/path/to/file")?.into_raw(), 173 | mLastMod: 0, 174 | mSize: 123, 175 | mReplication: 0, 176 | mBlockSize: 0, 177 | mOwner: CString::new("xuanwo")?.into_raw(), 178 | mGroup: CString::new("xuanwo")?.into_raw(), 179 | mPermissions: 0, 180 | mLastAccess: 0, 181 | }, 182 | Metadata { 183 | path: "/path/to/file".into(), 184 | size: 123, 185 | kind: 0, 186 | permissions: 0, 187 | replication: 0, 188 | block_size: 0, 189 | owner: "xuanwo".into(), 190 | group: "xuanwo".into(), 191 | last_mod: 0, 192 | last_access: 0, 193 | }, 194 | ), 195 | ( 196 | hdfsFileInfo { 197 | mKind: 0, 198 | mName: CString::new("hdfs://127.0.0.1:9000/path/to/file")?.into_raw(), 199 | mLastMod: 455, 200 | mSize: 0, 201 | mReplication: 0, 202 | mBlockSize: 0, 203 | mOwner: CString::new("xuanwo")?.into_raw(), 204 | mGroup: CString::new("xuanwo")?.into_raw(), 205 | mPermissions: 0, 206 | mLastAccess: 0, 207 | }, 208 | Metadata { 209 | path: "/path/to/file".into(), 210 | size: 0, 211 | kind: 0, 212 | permissions: 0, 213 | replication: 0, 214 | block_size: 0, 215 | owner: "xuanwo".into(), 216 | group: "xuanwo".into(), 217 | last_mod: 455, 218 | last_access: 0, 219 | }, 220 | ), 221 | ( 222 | hdfsFileInfo { 223 | mKind: 0, 224 | mName: CString::new("/path/to/file")?.into_raw(), 225 | mLastMod: 455, 226 | mSize: 0, 227 | mReplication: 0, 228 | mBlockSize: 0, 229 | mOwner: CString::new("xuanwo")?.into_raw(), 230 | mGroup: CString::new("xuanwo")?.into_raw(), 231 | mPermissions: 0, 232 | mLastAccess: 0, 233 | }, 234 | Metadata { 235 | path: "/path/to/file".into(), 236 | size: 0, 237 | kind: 0, 238 | permissions: 0, 239 | replication: 0, 240 | block_size: 0, 241 | owner: "xuanwo".into(), 242 | group: "xuanwo".into(), 243 | last_mod: 455, 244 | last_access: 0, 245 | }, 246 | ), 247 | ]; 248 | 249 | for case in cases { 250 | let meta = Metadata::from(case.0); 251 | 252 | assert_eq!(meta, case.1); 253 | } 254 | 255 | Ok(()) 256 | } 257 | } 258 | -------------------------------------------------------------------------------- /tests/main.rs: -------------------------------------------------------------------------------- 1 | use std::{env, io}; 2 | 3 | use anyhow::Result; 4 | use hdrs::ClientBuilder; 5 | use log::debug; 6 | use rand::{Rng, RngCore}; 7 | 8 | #[test] 9 | fn test_connect() -> Result<()> { 10 | dotenv::from_filename(".env").ok(); 11 | 12 | let name_node = env::var("HDRS_NAMENODE")?; 13 | 14 | let fs = ClientBuilder::new(&name_node).connect(); 15 | assert!(fs.is_ok()); 16 | 17 | Ok(()) 18 | } 19 | 20 | #[test] 21 | fn test_mkdir() -> Result<()> { 22 | let _ = env_logger::try_init(); 23 | dotenv::from_filename(".env").ok(); 24 | 25 | if env::var("HDRS_TEST").unwrap_or_default() != "on" { 26 | return Ok(()); 27 | } 28 | 29 | let name_node = env::var("HDRS_NAMENODE")?; 30 | let work_dir = env::var("HDRS_WORKDIR").unwrap_or_default(); 31 | 32 | let fs = ClientBuilder::new(&name_node).connect()?; 33 | 34 | let path = format!("{work_dir}{}", uuid::Uuid::new_v4()); 35 | 36 | fs.create_dir(&path).expect("mkdir should succeed"); 37 | fs.remove_dir(&path).expect("rmdir should succeed"); 38 | 39 | Ok(()) 40 | } 41 | 42 | #[test] 43 | fn test_read_dir() -> Result<()> { 44 | let _ = env_logger::try_init(); 45 | dotenv::from_filename(".env").ok(); 46 | 47 | if env::var("HDRS_TEST").unwrap_or_default() != "on" { 48 | return Ok(()); 49 | } 50 | 51 | let name_node = env::var("HDRS_NAMENODE")?; 52 | let work_dir = env::var("HDRS_WORKDIR").unwrap_or_default(); 53 | 54 | let fs = ClientBuilder::new(&name_node).connect()?; 55 | 56 | let path = format!("{work_dir}{}", uuid::Uuid::new_v4()); 57 | 58 | fs.create_dir(&path).expect("mkdir should succeed"); 59 | debug!("read dir {}", path); 60 | let readdir = fs.read_dir(&path).expect("readdir should succeed"); 61 | debug!("readdir: {:?}", readdir); 62 | assert_eq!(readdir.len(), 0); 63 | 64 | Ok(()) 65 | } 66 | 67 | #[test] 68 | fn test_rename() -> Result<()> { 69 | use std::io::{Read, Write}; 70 | 71 | dotenv::from_filename(".env").ok(); 72 | 73 | let name_node = env::var("HDRS_NAMENODE")?; 74 | let work_dir = env::var("HDRS_WORKDIR").unwrap_or_default(); 75 | 76 | let fs = ClientBuilder::new(&name_node).connect()?; 77 | 78 | let path = format!("{work_dir}{}", uuid::Uuid::new_v4()); 79 | { 80 | let mut f = fs.open_file().create(true).write(true).open(&path)?; 81 | f.write_all(b"test file content")?; 82 | f.flush()?; 83 | } 84 | let new_path = format!("{work_dir}{}", uuid::Uuid::new_v4()); 85 | fs.rename_file(&path, &new_path)?; 86 | 87 | { 88 | let maybe_metadata = fs.metadata(&path); 89 | assert!(maybe_metadata.is_err()); 90 | let err = maybe_metadata.unwrap_err(); 91 | assert_eq!(err.kind(), io::ErrorKind::NotFound); 92 | } 93 | { 94 | let maybe_metadata = fs.metadata(&new_path); 95 | assert!(maybe_metadata.is_ok()); 96 | let metadata = maybe_metadata.unwrap(); 97 | assert!(metadata.is_file()); 98 | } 99 | { 100 | let mut f = fs.open_file().read(true).open(&new_path)?; 101 | let mut content = String::new(); 102 | f.read_to_string(&mut content)?; 103 | assert_eq!(content.as_str(), "test file content"); 104 | } 105 | 106 | Ok(()) 107 | } 108 | 109 | #[test] 110 | fn test_file() -> Result<()> { 111 | use std::io::{Read, Seek, SeekFrom, Write}; 112 | 113 | let _ = env_logger::try_init(); 114 | dotenv::from_filename(".env").ok(); 115 | 116 | if env::var("HDRS_TEST").unwrap_or_default() != "on" { 117 | return Ok(()); 118 | } 119 | 120 | let name_node = env::var("HDRS_NAMENODE")?; 121 | let work_dir = env::var("HDRS_WORKDIR").unwrap_or_default(); 122 | 123 | let fs = ClientBuilder::new(&name_node).connect()?; 124 | 125 | let path = format!("{work_dir}{}", uuid::Uuid::new_v4()); 126 | 127 | let mut rng = rand::thread_rng(); 128 | let mut content = vec![0; rng.gen_range(1024..4 * 1024 * 1024)]; 129 | rng.fill_bytes(&mut content); 130 | 131 | { 132 | // Write file 133 | debug!("test file write"); 134 | let mut f = fs.open_file().create(true).write(true).open(&path)?; 135 | f.write_all(&content)?; 136 | // Flush file 137 | debug!("test file flush"); 138 | f.flush()?; 139 | } 140 | 141 | { 142 | // Read file 143 | debug!("test file read"); 144 | let mut f = fs.open_file().read(true).open(&path)?; 145 | let mut buf = Vec::new(); 146 | let n = f.read_to_end(&mut buf)?; 147 | assert_eq!(n, content.len()); 148 | assert_eq!(buf, content); 149 | } 150 | 151 | { 152 | // Read not exist file 153 | debug!("test not exist file read"); 154 | let f = fs 155 | .open_file() 156 | .read(true) 157 | .open(&format!("{work_dir}{}", uuid::Uuid::new_v4())); 158 | assert!(f.is_err()); 159 | assert_eq!(f.unwrap_err().kind(), io::ErrorKind::NotFound) 160 | } 161 | 162 | { 163 | // Stat file. 164 | debug!("test file stat"); 165 | let fi = fs.metadata(&path)?; 166 | assert!(fi.is_file()); 167 | assert_eq!(&path, fi.path()); 168 | assert_eq!(fi.len(), content.len() as u64); 169 | } 170 | 171 | { 172 | // Seek file. 173 | debug!("test file seek"); 174 | let mut f = fs.open_file().read(true).open(&path)?; 175 | let offset = content.len() / 2; 176 | let size = content.len() - offset; 177 | let mut buf = Vec::new(); 178 | let _ = f.seek(SeekFrom::Start(offset as u64))?; 179 | let n = f.read_to_end(&mut buf)?; 180 | assert_eq!(n, size); 181 | assert_eq!(buf, content[offset..]); 182 | } 183 | 184 | { 185 | // Remove file 186 | debug!("test file remove"); 187 | let result = fs.remove_file(&path); 188 | assert!(result.is_ok()); 189 | } 190 | 191 | { 192 | // Stat it again, we should get a NotFound. 193 | debug!("test file stat again"); 194 | let fi = fs.metadata(&path); 195 | assert!(fi.is_err()); 196 | assert_eq!(fi.unwrap_err().kind(), io::ErrorKind::NotFound); 197 | } 198 | 199 | Ok(()) 200 | } 201 | 202 | #[cfg(feature = "async_file")] 203 | #[tokio::test] 204 | async fn test_tokio_file() -> Result<()> { 205 | use futures::io::*; 206 | 207 | let _ = env_logger::try_init(); 208 | dotenv::from_filename(".env").ok(); 209 | 210 | if env::var("HDRS_TEST").unwrap_or_default() != "on" { 211 | return Ok(()); 212 | } 213 | 214 | let name_node = env::var("HDRS_NAMENODE")?; 215 | let work_dir = env::var("HDRS_WORKDIR").unwrap_or_default(); 216 | 217 | let fs = ClientBuilder::new(&name_node).connect()?; 218 | 219 | let path = format!("{work_dir}{}", uuid::Uuid::new_v4()); 220 | 221 | let mut rng = rand::thread_rng(); 222 | let mut content = vec![0; rng.gen_range(1024..4 * 1024 * 1024)]; 223 | rng.fill_bytes(&mut content); 224 | 225 | { 226 | // Write file 227 | debug!("test file write"); 228 | let mut f = fs 229 | .open_file() 230 | .create(true) 231 | .write(true) 232 | .async_open(&path) 233 | .await?; 234 | f.write_all(&content).await?; 235 | // Flush file 236 | debug!("test file flush"); 237 | f.flush().await?; 238 | } 239 | 240 | { 241 | // Read file 242 | debug!("test file read"); 243 | let mut f = fs.open_file().read(true).async_open(&path).await?; 244 | let mut buf = Vec::new(); 245 | let n = f.read_to_end(&mut buf).await?; 246 | assert_eq!(n, content.len()); 247 | assert_eq!(buf, content); 248 | } 249 | 250 | { 251 | // Stat file. 252 | debug!("test file stat"); 253 | let fi = fs.metadata(&path)?; 254 | assert!(fi.is_file()); 255 | assert_eq!(&path, fi.path()); 256 | assert_eq!(fi.len(), content.len() as u64); 257 | } 258 | 259 | { 260 | // Seek file. 261 | debug!("test file seek"); 262 | let mut f = fs.open_file().read(true).async_open(&path).await?; 263 | let offset = content.len() / 2; 264 | let size = content.len() - offset; 265 | let mut buf = Vec::new(); 266 | let _ = f.seek(SeekFrom::Start(offset as u64)).await?; 267 | let n = f.read_to_end(&mut buf).await?; 268 | assert_eq!(n, size); 269 | assert_eq!(buf, content[offset..]); 270 | } 271 | 272 | { 273 | // Remove file 274 | debug!("test file remove"); 275 | let result = fs.remove_file(&path); 276 | assert!(result.is_ok()); 277 | } 278 | 279 | { 280 | // Stat it again, we should get a NotFound. 281 | debug!("test file stat again"); 282 | let fi = fs.metadata(&path); 283 | assert!(fi.is_err()); 284 | assert_eq!(fi.unwrap_err().kind(), io::ErrorKind::NotFound); 285 | } 286 | 287 | Ok(()) 288 | } 289 | 290 | #[cfg(feature = "async_file")] 291 | #[tokio::test] 292 | async fn test_futures_file() -> Result<()> { 293 | use futures::{AsyncReadExt, AsyncSeekExt, AsyncWriteExt}; 294 | use tokio::io::*; 295 | 296 | let _ = env_logger::try_init(); 297 | dotenv::from_filename(".env").ok(); 298 | 299 | if env::var("HDRS_TEST").unwrap_or_default() != "on" { 300 | return Ok(()); 301 | } 302 | 303 | let name_node = env::var("HDRS_NAMENODE")?; 304 | let work_dir = env::var("HDRS_WORKDIR").unwrap_or_default(); 305 | 306 | let fs = ClientBuilder::new(&name_node).connect()?; 307 | 308 | let path = format!("{work_dir}{}", uuid::Uuid::new_v4()); 309 | 310 | let mut rng = rand::thread_rng(); 311 | let mut content = vec![0; rng.gen_range(1024..4 * 1024 * 1024)]; 312 | rng.fill_bytes(&mut content); 313 | 314 | { 315 | // Write file 316 | debug!("test file write"); 317 | let mut f = fs 318 | .open_file() 319 | .create(true) 320 | .write(true) 321 | .async_open(&path) 322 | .await?; 323 | f.write_all(&content).await?; 324 | // Flush file 325 | debug!("test file flush"); 326 | f.flush().await?; 327 | } 328 | 329 | { 330 | // Read file 331 | debug!("test file read"); 332 | let mut f = fs.open_file().read(true).async_open(&path).await?; 333 | let mut buf = Vec::new(); 334 | let n = f.read_to_end(&mut buf).await?; 335 | assert_eq!(n, content.len()); 336 | assert_eq!(buf, content); 337 | } 338 | 339 | { 340 | // Stat file. 341 | debug!("test file stat"); 342 | let fi = fs.metadata(&path)?; 343 | assert!(fi.is_file()); 344 | assert_eq!(&path, fi.path()); 345 | assert_eq!(fi.len(), content.len() as u64); 346 | } 347 | 348 | { 349 | // Seek file. 350 | debug!("test file seek"); 351 | let mut f = fs.open_file().read(true).async_open(&path).await?; 352 | let offset = content.len() / 2; 353 | let size = content.len() - offset; 354 | let mut buf = Vec::new(); 355 | let _ = f.seek(SeekFrom::Start(offset as u64)).await?; 356 | let n = f.read_to_end(&mut buf).await?; 357 | assert_eq!(n, size); 358 | assert_eq!(buf, content[offset..]); 359 | } 360 | 361 | { 362 | // Remove file 363 | debug!("test file remove"); 364 | let result = fs.remove_file(&path); 365 | assert!(result.is_ok()); 366 | } 367 | 368 | { 369 | // Stat it again, we should get a NotFound. 370 | debug!("test file stat again"); 371 | let fi = fs.metadata(&path); 372 | assert!(fi.is_err()); 373 | assert_eq!(fi.unwrap_err().kind(), io::ErrorKind::NotFound); 374 | } 375 | 376 | Ok(()) 377 | } 378 | 379 | #[test] 380 | fn test_client_with_user() -> Result<()> { 381 | let _ = env_logger::try_init(); 382 | 383 | dotenv::from_filename(".env").ok(); 384 | if std::env::var("HDRS_INTEGRATED_TEST").unwrap_or_default() != "on" { 385 | return Ok(()); 386 | } 387 | let name_node = env::var("HDRS_NAMENODE")?; 388 | let work_dir = env::var("HDRS_WORKDIR").unwrap_or_default(); 389 | 390 | let fs = ClientBuilder::new(&name_node) 391 | .with_user("test_user") 392 | .connect()?; 393 | let test_dir = format!("{}/test_dir", work_dir); 394 | let _ = fs.create_dir(&test_dir); 395 | let meta = fs.metadata(&test_dir); 396 | assert!(meta.is_ok()); 397 | assert_eq!(meta.unwrap().owner(), "test_user"); 398 | 399 | Ok(()) 400 | } 401 | -------------------------------------------------------------------------------- /src/open_options.rs: -------------------------------------------------------------------------------- 1 | use std::ffi::{c_int, c_short, CString}; 2 | use std::io::{Error, ErrorKind, Result}; 3 | 4 | use hdfs_sys::*; 5 | use log::debug; 6 | 7 | use crate::File; 8 | 9 | /// Options and flags which can be used to configure how a file is opened. 10 | /// 11 | /// This builder exposes the ability to configure how a [`File`] is opened and 12 | /// what operations are permitted on the open file. 13 | /// 14 | /// # Examples 15 | /// 16 | /// Opening a file to read: 17 | /// 18 | /// ```no_run 19 | /// use hdrs::{Client, ClientBuilder}; 20 | /// 21 | /// let fs = ClientBuilder::new("default") 22 | /// .with_user("default") 23 | /// .with_kerberos_ticket_cache_path("/tmp/krb5_111") 24 | /// .connect() 25 | /// .expect("client connect succeed"); 26 | /// let file = fs.open_file().read(true).open("foo.txt"); 27 | /// ``` 28 | /// 29 | /// Opening a file for both reading and writing, as well as creating it if it 30 | /// doesn't exist: 31 | /// 32 | /// ```no_run 33 | /// use hdrs::{Client, ClientBuilder}; 34 | /// 35 | /// let fs = ClientBuilder::new("default") 36 | /// .with_user("default") 37 | /// .with_kerberos_ticket_cache_path("/tmp/krb5_111") 38 | /// .connect() 39 | /// .expect("client connect succeed"); 40 | /// let file = fs 41 | /// .open_file() 42 | /// .read(true) 43 | /// .write(true) 44 | /// .create(true) 45 | /// .open("foo.txt"); 46 | /// ``` 47 | #[derive(Debug, Clone)] 48 | pub struct OpenOptions { 49 | fs: hdfsFS, 50 | 51 | read: bool, 52 | write: bool, 53 | append: bool, 54 | truncate: bool, 55 | create: bool, 56 | create_new: bool, 57 | buffer_size: usize, 58 | replication: usize, 59 | blocksize: usize, 60 | } 61 | 62 | /// HDFS's client handle is thread safe. 63 | unsafe impl Send for OpenOptions {} 64 | unsafe impl Sync for OpenOptions {} 65 | 66 | impl OpenOptions { 67 | pub(crate) fn new(fs: hdfsFS) -> Self { 68 | OpenOptions { 69 | fs, 70 | 71 | read: false, 72 | write: false, 73 | append: false, 74 | truncate: false, 75 | create: false, 76 | create_new: false, 77 | buffer_size: 0, 78 | replication: 0, 79 | blocksize: 0, 80 | } 81 | } 82 | 83 | /// Sets size of buffer for read/write. 84 | /// 85 | /// Pass `0` if you want to use the default configured values. 86 | /// 87 | /// `0` by default. 88 | pub fn with_buffer_size(&mut self, buffer_size: usize) -> &mut Self { 89 | self.buffer_size = buffer_size; 90 | self 91 | } 92 | 93 | /// Sets block replication. 94 | /// 95 | /// Pass `0` if you want to use the default configured values. 96 | /// 97 | /// `0` by default. 98 | pub fn with_replication(&mut self, replication: usize) -> &mut Self { 99 | self.replication = replication; 100 | self 101 | } 102 | 103 | /// Sets size of block. 104 | /// 105 | /// Pass `0` if you want to use the default configured values. 106 | /// 107 | /// `0` by default. 108 | pub fn with_blocksize(&mut self, blocksize: usize) -> &mut Self { 109 | self.blocksize = blocksize; 110 | self 111 | } 112 | 113 | /// Sets the option for read access. 114 | /// 115 | /// This option, when true, will indicate that the file should be 116 | /// `read`-able if opened. 117 | /// 118 | /// # Examples 119 | /// 120 | /// ```no_run 121 | /// use hdrs::{Client, ClientBuilder}; 122 | /// 123 | /// let fs = ClientBuilder::new("default") 124 | /// .with_user("default") 125 | /// .with_kerberos_ticket_cache_path("/tmp/krb5_111") 126 | /// .connect() 127 | /// .expect("client connect succeed"); 128 | /// let file = fs.open_file().read(true).open("foo.txt"); 129 | /// ``` 130 | pub fn read(&mut self, read: bool) -> &mut Self { 131 | self.read = read; 132 | self 133 | } 134 | 135 | /// Sets the option for write access. 136 | /// 137 | /// This option, when true, will indicate that the file should be 138 | /// `write`-able if opened. 139 | /// 140 | /// If the file already exists, any write calls on it will overwrite its 141 | /// contents, without truncating it. 142 | /// 143 | /// # Examples 144 | /// 145 | /// ```no_run 146 | /// use hdrs::{Client, ClientBuilder}; 147 | /// 148 | /// let fs = ClientBuilder::new("default") 149 | /// .with_user("default") 150 | /// .with_kerberos_ticket_cache_path("/tmp/krb5_111") 151 | /// .connect() 152 | /// .expect("client connect succeed"); 153 | /// let file = fs.open_file().write(true).open("foo.txt"); 154 | /// ``` 155 | pub fn write(&mut self, write: bool) -> &mut Self { 156 | self.write = write; 157 | self 158 | } 159 | 160 | /// Sets the option for the append mode. 161 | /// 162 | /// This option, when true, means that writes will append to a file instead 163 | /// of overwriting previous contents. 164 | /// Note that setting `.write(true).append(true)` has the same effect as 165 | /// setting only `.append(true)`. 166 | /// 167 | /// One maybe obvious note when using append-mode: make sure that all data 168 | /// that belongs together is written to the file in one operation. This 169 | /// can be done by concatenating strings before passing them to [`write()`], 170 | /// or using a buffered writer (with a buffer of adequate size), 171 | /// and calling [`flush()`] when the message is complete. 172 | /// 173 | /// If a file is opened with both read and append access, beware that after 174 | /// opening, and after every write, the position for reading may be set at the 175 | /// end of the file. So, before writing, save the current position (using 176 | /// [seek]\([SeekFrom]::[Current]\(0))), and restore it before the next read. 177 | /// 178 | /// ## Note 179 | /// 180 | /// This function doesn't create the file if it doesn't exist. Use the 181 | /// [`OpenOptions::create`] method to do so. 182 | /// 183 | /// [`write()`]: Write::write "io::Write::write" 184 | /// [`flush()`]: Write::flush "io::Write::flush" 185 | /// [seek]: Seek::seek "io::Seek::seek" 186 | /// [Current]: SeekFrom::Current "io::SeekFrom::Current" 187 | /// 188 | /// # Examples 189 | /// 190 | /// ```no_run 191 | /// use hdrs::{Client, ClientBuilder}; 192 | /// 193 | /// let fs = ClientBuilder::new("default") 194 | /// .with_user("default") 195 | /// .with_kerberos_ticket_cache_path("/tmp/krb5_111") 196 | /// .connect() 197 | /// .expect("client connect succeed"); 198 | /// let file = fs.open_file().append(true).open("foo.txt"); 199 | /// ``` 200 | pub fn append(&mut self, append: bool) -> &mut Self { 201 | self.append = append; 202 | self 203 | } 204 | 205 | /// Sets the option for truncating a previous file. 206 | /// 207 | /// If a file is successfully opened with this option set it will truncate 208 | /// the file to 0 length if it already exists. 209 | /// 210 | /// The file must be opened with write access for truncate to work. 211 | /// 212 | /// # Examples 213 | /// 214 | /// ```no_run 215 | /// use hdrs::{Client, ClientBuilder}; 216 | /// 217 | /// let fs = ClientBuilder::new("default") 218 | /// .with_user("default") 219 | /// .with_kerberos_ticket_cache_path("/tmp/krb5_111") 220 | /// .connect() 221 | /// .expect("client connect succeed"); 222 | /// let file = fs.open_file().truncate(true).open("foo.txt"); 223 | /// ``` 224 | pub fn truncate(&mut self, truncate: bool) -> &mut Self { 225 | self.truncate = truncate; 226 | self 227 | } 228 | 229 | /// Sets the option to create a new file, or open it if it already exists. 230 | /// 231 | /// In order for the file to be created, [`OpenOptions::write`] or 232 | /// [`OpenOptions::append`] access must be used. 233 | /// 234 | /// # Examples 235 | /// 236 | /// ```no_run 237 | /// use hdrs::{Client, ClientBuilder}; 238 | /// 239 | /// let fs = ClientBuilder::new("default") 240 | /// .with_user("default") 241 | /// .with_kerberos_ticket_cache_path("/tmp/krb5_111") 242 | /// .connect() 243 | /// .expect("client connect succeed"); 244 | /// let file = fs.open_file().create(true).open("foo.txt"); 245 | /// ``` 246 | pub fn create(&mut self, create: bool) -> &mut Self { 247 | self.create = create; 248 | self 249 | } 250 | 251 | /// Sets the option to create a new file, failing if it already exists. 252 | /// 253 | /// No file is allowed to exist at the target location, also no (dangling) symlink. In this 254 | /// way, if the call succeeds, the file returned is guaranteed to be new. 255 | /// 256 | /// This option is useful because it is atomic. Otherwise between checking 257 | /// whether a file exists and creating a new one, the file may have been 258 | /// created by another process (a TOCTOU race condition / attack). 259 | /// 260 | /// If `.create_new(true)` is set, [`.create()`] and [`.truncate()`] are 261 | /// ignored. 262 | /// 263 | /// The file must be opened with write or append access in order to create 264 | /// a new file. 265 | /// 266 | /// [`.create()`]: OpenOptions::create 267 | /// [`.truncate()`]: OpenOptions::truncate 268 | /// 269 | /// # Examples 270 | /// 271 | /// ```no_run 272 | /// use hdrs::{Client, ClientBuilder}; 273 | /// 274 | /// let fs = ClientBuilder::new("default") 275 | /// .with_user("default") 276 | /// .with_kerberos_ticket_cache_path("/tmp/krb5_111") 277 | /// .connect() 278 | /// .expect("client connect succeed"); 279 | /// let file = fs.open_file().write(true).create_new(true).open("foo.txt"); 280 | /// ``` 281 | pub fn create_new(&mut self, create_new: bool) -> &mut Self { 282 | self.create = create_new; 283 | self 284 | } 285 | 286 | /// Borrowed from rust-lang 287 | fn get_access_mode(&self) -> Result { 288 | match (self.read, self.write, self.append) { 289 | (true, false, false) => Ok(libc::O_RDONLY), 290 | (false, true, false) => Ok(libc::O_WRONLY), 291 | (true, true, false) => Ok(libc::O_RDWR), 292 | (false, _, true) => Ok(libc::O_WRONLY | libc::O_APPEND), 293 | (true, _, true) => Ok(libc::O_RDWR | libc::O_APPEND), 294 | (false, false, false) => Err(Error::from_raw_os_error(libc::EINVAL)), 295 | } 296 | } 297 | 298 | /// Borrowed from rust-lang 299 | fn get_creation_mode(&self) -> Result { 300 | match (self.write, self.append) { 301 | (true, false) => {} 302 | (false, false) => { 303 | if self.truncate || self.create || self.create_new { 304 | return Err(Error::from_raw_os_error(libc::EINVAL)); 305 | } 306 | } 307 | (_, true) => { 308 | if self.truncate && !self.create_new { 309 | return Err(Error::from_raw_os_error(libc::EINVAL)); 310 | } 311 | } 312 | } 313 | 314 | Ok(match (self.create, self.truncate, self.create_new) { 315 | (false, false, false) => 0, 316 | (true, false, false) => libc::O_CREAT, 317 | (false, true, false) => libc::O_TRUNC, 318 | (true, true, false) => libc::O_CREAT | libc::O_TRUNC, 319 | (_, _, true) => libc::O_CREAT | libc::O_EXCL, 320 | }) 321 | } 322 | 323 | /// Opens a file at `path` with the options specified by `self`. 324 | /// 325 | /// # Errors 326 | /// 327 | /// This function will return an error under a number of different 328 | /// circumstances. Some of these error conditions are listed here, together 329 | /// with their [`io::ErrorKind`]. The mapping to [`io::ErrorKind`]s is not 330 | /// part of the compatibility contract of the function. 331 | /// 332 | /// * [`NotFound`]: The specified file does not exist and neither `create` 333 | /// or `create_new` is set. 334 | /// * [`NotFound`]: One of the directory components of the file path does 335 | /// not exist. 336 | /// * [`PermissionDenied`]: The user lacks permission to get the specified 337 | /// access rights for the file. 338 | /// * [`PermissionDenied`]: The user lacks permission to open one of the 339 | /// directory components of the specified path. 340 | /// * [`AlreadyExists`]: `create_new` was specified and the file already 341 | /// exists. 342 | /// * [`InvalidInput`]: Invalid combinations of open options (truncate 343 | /// without write access, no access mode set, incompatible integer values, 344 | /// etc.). 345 | /// 346 | /// The following errors don't match any existing [`io::ErrorKind`] at the moment: 347 | /// * One of the directory components of the specified file path 348 | /// was not, in fact, a directory. 349 | /// * Filesystem-level errors: full disk, write permission 350 | /// requested on a read-only file system, exceeded disk quota, too many 351 | /// open files, too long filename, too many symbolic links in the 352 | /// specified path (Unix-like systems only), etc. 353 | /// 354 | /// # Examples 355 | /// 356 | /// ```no_run 357 | /// use hdrs::{Client, ClientBuilder}; 358 | /// 359 | /// let fs = ClientBuilder::new("default") 360 | /// .with_user("default") 361 | /// .with_kerberos_ticket_cache_path("/tmp/krb5_111") 362 | /// .connect() 363 | /// .expect("client connect succeed"); 364 | /// let file = fs.open_file().write(true).open("foo.txt"); 365 | /// ``` 366 | /// 367 | /// [`AlreadyExists`]: io::ErrorKind::AlreadyExists 368 | /// [`InvalidInput`]: io::ErrorKind::InvalidInput 369 | /// [`NotFound`]: io::ErrorKind::NotFound 370 | /// [`PermissionDenied`]: io::ErrorKind::PermissionDenied 371 | pub fn open(&self, path: &str) -> Result { 372 | let flags = libc::O_CLOEXEC | self.get_access_mode()? | self.get_creation_mode()?; 373 | 374 | debug!("open file {} with flags {}", path, flags); 375 | let b = unsafe { 376 | let p = CString::new(path)?; 377 | let buffer_size: c_int = self.buffer_size.try_into().map_err(|_| { 378 | Error::new( 379 | ErrorKind::InvalidInput, 380 | format!("`buffer_size` {} exceeds valid `c_int`", self.buffer_size), 381 | ) 382 | })?; 383 | let replication: c_short = self.replication.try_into().map_err(|_| { 384 | Error::new( 385 | ErrorKind::InvalidInput, 386 | format!("`replication` {} exceeds valid `c_short`", self.replication), 387 | ) 388 | })?; 389 | let blocksize: i32 = self.blocksize.try_into().map_err(|_| { 390 | Error::new( 391 | ErrorKind::InvalidInput, 392 | format!("`blocksize` {} exceeds valid `i32`", self.blocksize), 393 | ) 394 | })?; 395 | hdfsOpenFile( 396 | self.fs, 397 | p.as_ptr(), 398 | flags, 399 | buffer_size, 400 | replication, 401 | blocksize, 402 | ) 403 | }; 404 | 405 | if b.is_null() { 406 | return Err(Error::last_os_error()); 407 | } 408 | 409 | debug!("file {} with flags {} opened", path, flags); 410 | Ok(File::new(self.fs, b, path)) 411 | } 412 | 413 | #[cfg(feature = "async_file")] 414 | pub async fn async_open(&self, path: &str) -> Result { 415 | let opt = self.clone(); 416 | let path = path.to_string(); 417 | 418 | let file = blocking::unblock(move || opt.open(&path)).await?; 419 | Ok(super::AsyncFile::new(file, false)) 420 | } 421 | } 422 | -------------------------------------------------------------------------------- /src/client.rs: -------------------------------------------------------------------------------- 1 | use std::ffi::CString; 2 | use std::io; 3 | use std::mem::MaybeUninit; 4 | 5 | use errno::{set_errno, Errno}; 6 | use hdfs_sys::*; 7 | use log::debug; 8 | 9 | use crate::metadata::Metadata; 10 | use crate::{OpenOptions, Readdir}; 11 | 12 | /// Client holds the underlying connection to hdfs clusters. 13 | /// 14 | /// The connection will be disconnected while `Drop`, so their is no need to terminate it manually. 15 | /// 16 | /// # Note 17 | /// 18 | /// Hadoop will have it's own filesystem logic which may return the same filesystem instance while 19 | /// `hdfsConnect`. If we call `hdfsDisconnect`, all clients that hold this filesystem instance will 20 | /// meet `java.io.IOException: Filesystem closed` during I/O operations. 21 | /// 22 | /// So it's better for us to not call `hdfsDisconnect` manually. 23 | /// Aka, don't implement `Drop` to disconnect the connection. 24 | /// 25 | /// Reference: [IOException: Filesystem closed exception when running oozie workflo](https://stackoverflow.com/questions/23779186/ioexception-filesystem-closed-exception-when-running-oozie-workflow) 26 | /// 27 | /// # Examples 28 | /// 29 | /// ```no_run 30 | /// use hdrs::{Client, ClientBuilder}; 31 | /// 32 | /// let fs = ClientBuilder::new("default") 33 | /// .with_user("default") 34 | /// .with_kerberos_ticket_cache_path("/tmp/krb5_111") 35 | /// .connect(); 36 | /// ``` 37 | #[derive(Debug)] 38 | pub struct Client { 39 | fs: hdfsFS, 40 | } 41 | 42 | /// The builder of connecting to hdfs clusters. 43 | /// 44 | /// # Examples 45 | /// 46 | /// ```no_run 47 | /// use hdrs::{Client, ClientBuilder}; 48 | /// 49 | /// let fs = ClientBuilder::new("default") 50 | /// .with_user("default") 51 | /// .with_kerberos_ticket_cache_path("/tmp/krb5_111") 52 | /// .connect(); 53 | /// ``` 54 | pub struct ClientBuilder { 55 | name_node: String, 56 | user: Option, 57 | kerberos_ticket_cache_path: Option, 58 | } 59 | 60 | impl ClientBuilder { 61 | /// Create a ClientBuilder with name node 62 | /// 63 | /// Returns an [`hdrs::ClientBuilder`] 64 | /// 65 | /// # Notes 66 | /// 67 | /// The NameNode to use. 68 | /// 69 | /// If the string given is profile name like 'default', the related 70 | /// NameNode configuration will be used (from the XML configuration 71 | /// files). 72 | /// 73 | /// If the string starts with a protocol type such as `file://` or 74 | /// `hdfs://`, this protocol type will be used. If not, the `hdfs://` 75 | /// protocol type will be used. You may specify a NameNode port in the 76 | /// usual way by passing a string of the format `hdfs://:`. 77 | /// 78 | /// # Examples 79 | /// 80 | /// ```no_run 81 | /// use hdrs::{Client, ClientBuilder}; 82 | /// 83 | /// let builder = ClientBuilder::new("default"); 84 | /// ``` 85 | pub fn new(name_node: &str) -> ClientBuilder { 86 | ClientBuilder { 87 | name_node: name_node.to_string(), 88 | user: None, 89 | kerberos_ticket_cache_path: None, 90 | } 91 | } 92 | 93 | /// Set the user for existing ClientBuilder 94 | /// 95 | /// # Examples 96 | /// 97 | /// ```no_run 98 | /// use hdrs::{Client, ClientBuilder}; 99 | /// 100 | /// let client = ClientBuilder::new("default").with_user("default").connect(); 101 | /// ``` 102 | pub fn with_user(mut self, user: &str) -> ClientBuilder { 103 | self.user = Some(user.to_string()); 104 | self 105 | } 106 | 107 | /// Set the krb5 ticket cache path for existing ClientBuilder 108 | /// 109 | /// # Examples 110 | /// 111 | /// ```no_run 112 | /// use hdrs::{Client, ClientBuilder}; 113 | /// 114 | /// let mut client = ClientBuilder::new("default") 115 | /// .with_kerberos_ticket_cache_path("/tmp/krb5_1001") 116 | /// .connect(); 117 | /// ``` 118 | pub fn with_kerberos_ticket_cache_path( 119 | mut self, 120 | kerberos_ticket_cache_path: &str, 121 | ) -> ClientBuilder { 122 | self.kerberos_ticket_cache_path = Some(kerberos_ticket_cache_path.to_string()); 123 | self 124 | } 125 | 126 | /// Connect for existing ClientBuilder to get a hdfs client 127 | /// 128 | /// Returns an [`io::Result`] if any error happens. 129 | /// 130 | /// # Examples 131 | /// 132 | /// ```no_run 133 | /// use hdrs::{Client, ClientBuilder}; 134 | /// 135 | /// let mut client = ClientBuilder::new("default").connect(); 136 | /// ``` 137 | pub fn connect(self) -> io::Result { 138 | set_errno(Errno(0)); 139 | 140 | debug!("connect name node {}", &self.name_node); 141 | 142 | let fs = { 143 | let builder = unsafe { hdfsNewBuilder() }; 144 | 145 | let name_node = CString::new(self.name_node.as_bytes())?; 146 | let mut user = MaybeUninit::uninit(); 147 | let mut ticket_cache_path = MaybeUninit::uninit(); 148 | 149 | unsafe { hdfsBuilderSetNameNode(builder, name_node.as_ptr()) }; 150 | 151 | if let Some(v) = self.user { 152 | user.write(CString::new(v)?); 153 | unsafe { 154 | hdfsBuilderSetUserName(builder, user.assume_init_ref().as_ptr()); 155 | } 156 | } 157 | 158 | if let Some(v) = self.kerberos_ticket_cache_path { 159 | ticket_cache_path.write(CString::new(v)?); 160 | unsafe { 161 | hdfsBuilderSetKerbTicketCachePath( 162 | builder, 163 | ticket_cache_path.assume_init_ref().as_ptr(), 164 | ); 165 | } 166 | } 167 | 168 | unsafe { hdfsBuilderConnect(builder) } 169 | }; 170 | 171 | if fs.is_null() { 172 | return Err(io::Error::last_os_error()); 173 | } 174 | 175 | debug!("name node {} connected", self.name_node); 176 | Ok(Client::new(fs)) 177 | } 178 | } 179 | 180 | /// HDFS's client handle is thread safe. 181 | unsafe impl Send for Client {} 182 | unsafe impl Sync for Client {} 183 | 184 | impl Client { 185 | pub(crate) fn new(fs: hdfsFS) -> Self { 186 | Self { fs } 187 | } 188 | 189 | /// Open will create a stream builder for later IO operations. 190 | /// 191 | /// # Examples 192 | /// 193 | /// ```no_run 194 | /// use hdrs::{Client, ClientBuilder}; 195 | /// 196 | /// let fs = ClientBuilder::new("default") 197 | /// .with_user("default") 198 | /// .connect() 199 | /// .expect("client connect succeed"); 200 | /// let open_options = fs.open_file(); 201 | /// ``` 202 | pub fn open_file(&self) -> OpenOptions { 203 | OpenOptions::new(self.fs) 204 | } 205 | 206 | /// Delete a file. 207 | /// 208 | /// # Examples 209 | /// 210 | /// ```no_run 211 | /// use hdrs::{Client, ClientBuilder}; 212 | /// 213 | /// let fs = ClientBuilder::new("default") 214 | /// .with_user("default") 215 | /// .connect() 216 | /// .expect("client connect succeed"); 217 | /// let _ = fs.remove_file("/tmp/hello.txt"); 218 | /// ``` 219 | pub fn remove_file(&self, path: &str) -> io::Result<()> { 220 | debug!("remove file {}", path); 221 | 222 | let n = unsafe { 223 | let p = CString::new(path)?; 224 | hdfsDelete(self.fs, p.as_ptr(), false.into()) 225 | }; 226 | 227 | if n == -1 { 228 | return Err(io::Error::last_os_error()); 229 | } 230 | 231 | debug!("delete file {} finished", path); 232 | Ok(()) 233 | } 234 | 235 | /// Rename a file. 236 | /// 237 | /// **ATTENTION**: the destination directory must exist. 238 | /// 239 | /// # Examples 240 | /// 241 | /// ```no_run 242 | /// use hdrs::{Client, ClientBuilder}; 243 | /// 244 | /// let fs = ClientBuilder::new("default") 245 | /// .with_user("default") 246 | /// .connect() 247 | /// .expect("client connect succeed"); 248 | /// let _ = fs.rename_file("/tmp/hello.txt._COPY_", "/tmp/hello.txt"); 249 | /// ``` 250 | pub fn rename_file(&self, old_path: &str, new_path: &str) -> io::Result<()> { 251 | debug!("rename file {} -> {}", old_path, new_path); 252 | 253 | let n = { 254 | let old_path = CString::new(old_path)?; 255 | let new_path = CString::new(new_path)?; 256 | unsafe { hdfsRename(self.fs, old_path.as_ptr(), new_path.as_ptr()) } 257 | }; 258 | 259 | if n == -1 { 260 | return Err(io::Error::last_os_error()); 261 | } 262 | 263 | debug!("rename file {} -> {} finished", old_path, new_path); 264 | Ok(()) 265 | } 266 | 267 | /// Delete a dir. 268 | /// 269 | /// # Examples 270 | /// 271 | /// ```no_run 272 | /// use hdrs::{Client, ClientBuilder}; 273 | /// 274 | /// let fs = ClientBuilder::new("default") 275 | /// .with_user("default") 276 | /// .connect() 277 | /// .expect("client connect succeed"); 278 | /// let _ = fs.remove_dir("/tmp/xxx"); 279 | /// ``` 280 | pub fn remove_dir(&self, path: &str) -> io::Result<()> { 281 | debug!("remove dir {}", path); 282 | 283 | let n = unsafe { 284 | let p = CString::new(path)?; 285 | hdfsDelete(self.fs, p.as_ptr(), false.into()) 286 | }; 287 | 288 | if n == -1 { 289 | return Err(io::Error::last_os_error()); 290 | } 291 | 292 | debug!("delete dir {} finished", path); 293 | Ok(()) 294 | } 295 | 296 | /// Delete a dir recursively. 297 | /// 298 | /// # Examples 299 | /// 300 | /// ```no_run 301 | /// use hdrs::{Client, ClientBuilder}; 302 | /// 303 | /// let fs = ClientBuilder::new("default") 304 | /// .with_user("default") 305 | /// .connect() 306 | /// .expect("client connect succeed"); 307 | /// let _ = fs.remove_dir_all("/tmp/xxx/"); 308 | /// ``` 309 | pub fn remove_dir_all(&self, path: &str) -> io::Result<()> { 310 | debug!("remove dir all {}", path); 311 | 312 | let n = unsafe { 313 | let p = CString::new(path)?; 314 | hdfsDelete(self.fs, p.as_ptr(), true.into()) 315 | }; 316 | 317 | if n == -1 { 318 | return Err(io::Error::last_os_error()); 319 | } 320 | 321 | debug!("delete dir all {} finished", path); 322 | Ok(()) 323 | } 324 | 325 | /// Stat a path to get file info. 326 | /// 327 | /// # Examples 328 | /// 329 | /// ## Stat a path to file info 330 | /// 331 | /// ```no_run 332 | /// use hdrs::{Client, ClientBuilder}; 333 | /// 334 | /// let fs = ClientBuilder::new("default") 335 | /// .with_user("default") 336 | /// .connect() 337 | /// .expect("client connect succeed"); 338 | /// let fi = fs.metadata("/tmp/hello.txt"); 339 | /// ``` 340 | /// 341 | /// ## Stat a non-exist path 342 | /// 343 | /// ```no_run 344 | /// use std::io; 345 | /// 346 | /// use hdrs::{Client, ClientBuilder}; 347 | /// 348 | /// let fs = ClientBuilder::new("default") 349 | /// .with_user("default") 350 | /// .connect() 351 | /// .expect("client connect succeed"); 352 | /// let fi = fs.metadata("/tmp/not-exist.txt"); 353 | /// assert!(fi.is_err()); 354 | /// assert_eq!(fi.unwrap_err().kind(), io::ErrorKind::NotFound) 355 | /// ``` 356 | pub fn metadata(&self, path: &str) -> io::Result { 357 | set_errno(Errno(0)); 358 | 359 | let hfi = unsafe { 360 | let p = CString::new(path)?; 361 | hdfsGetPathInfo(self.fs, p.as_ptr()) 362 | }; 363 | 364 | if hfi.is_null() { 365 | return Err(io::Error::last_os_error()); 366 | } 367 | 368 | // Safety: hfi must be valid 369 | let fi = unsafe { Metadata::from(*hfi) }; 370 | 371 | // Make sure hfi has been freed. 372 | unsafe { hdfsFreeFileInfo(hfi, 1) }; 373 | 374 | Ok(fi) 375 | } 376 | 377 | /// readdir will read file entries from a file. 378 | /// 379 | /// # Examples 380 | /// 381 | /// ```no_run 382 | /// use hdrs::{Client, ClientBuilder}; 383 | /// 384 | /// let fs = ClientBuilder::new("default") 385 | /// .with_user("default") 386 | /// .connect() 387 | /// .expect("client connect succeed"); 388 | /// let fis = fs.read_dir("/tmp/hello/"); 389 | /// ``` 390 | pub fn read_dir(&self, path: &str) -> io::Result { 391 | set_errno(Errno(0)); 392 | 393 | let mut entries = 0; 394 | let hfis = unsafe { 395 | let p = CString::new(path)?; 396 | hdfsListDirectory(self.fs, p.as_ptr(), &mut entries) 397 | }; 398 | 399 | // hfis will be NULL on error or empty directory. 400 | // We will try to check last_os_error's code. 401 | // - If there is no error, return empty vec directly. 402 | // - If errno == 0, there is no error, return empty vec directly. 403 | // - If errno != 0, return the last os error. 404 | if hfis.is_null() { 405 | let e = io::Error::last_os_error(); 406 | 407 | return match e.raw_os_error() { 408 | None => Ok(Vec::new().into()), 409 | Some(0) => Ok(Vec::new().into()), 410 | Some(_) => Err(e), 411 | }; 412 | } 413 | 414 | let mut fis = Vec::with_capacity(entries as usize); 415 | 416 | for i in 0..entries { 417 | let m = unsafe { Metadata::from(*hfis.offset(i as isize)) }; 418 | 419 | fis.push(m) 420 | } 421 | 422 | // Make sure hfis has been freed. 423 | unsafe { hdfsFreeFileInfo(hfis, entries) }; 424 | 425 | Ok(fis.into()) 426 | } 427 | 428 | /// mkdir create dir and all it's parent directories. 429 | /// 430 | /// The behavior is similar to `mkdir -p /path/to/dir`. 431 | /// 432 | /// # Examples 433 | /// 434 | /// ```no_run 435 | /// use hdrs::{Client, ClientBuilder}; 436 | /// 437 | /// let fs = ClientBuilder::new("default") 438 | /// .with_user("default") 439 | /// .connect() 440 | /// .expect("client connect succeed"); 441 | /// let _ = fs.create_dir("/tmp"); 442 | /// ``` 443 | pub fn create_dir(&self, path: &str) -> io::Result<()> { 444 | let n = unsafe { 445 | let p = CString::new(path)?; 446 | hdfsCreateDirectory(self.fs, p.as_ptr()) 447 | }; 448 | 449 | if n == -1 { 450 | return Err(io::Error::last_os_error()); 451 | } 452 | 453 | Ok(()) 454 | } 455 | } 456 | 457 | #[cfg(test)] 458 | mod tests { 459 | use std::io; 460 | 461 | use log::debug; 462 | 463 | use crate::client::ClientBuilder; 464 | 465 | #[test] 466 | fn test_client_connect() { 467 | let _ = env_logger::try_init(); 468 | 469 | let fs = ClientBuilder::new("default") 470 | .connect() 471 | .expect("init success"); 472 | assert!(!fs.fs.is_null()) 473 | } 474 | 475 | #[test] 476 | fn test_client_open() { 477 | let _ = env_logger::try_init(); 478 | 479 | let fs = ClientBuilder::new("default") 480 | .connect() 481 | .expect("init success"); 482 | 483 | let path = uuid::Uuid::new_v4().to_string(); 484 | 485 | let _ = fs.open_file().read(true).open(&format!("/tmp/{path}")); 486 | } 487 | 488 | #[test] 489 | fn test_client_stat() { 490 | let _ = env_logger::try_init(); 491 | 492 | let fs = ClientBuilder::new("default") 493 | .connect() 494 | .expect("init success"); 495 | debug!("Client: {:?}", fs); 496 | 497 | let path = uuid::Uuid::new_v4().to_string(); 498 | 499 | let f = fs.metadata(&format!("/tmp/{path}")); 500 | assert!(f.is_err()); 501 | assert_eq!(f.unwrap_err().kind(), io::ErrorKind::NotFound); 502 | } 503 | 504 | #[test] 505 | fn test_client_readdir() { 506 | let _ = env_logger::try_init(); 507 | 508 | let fs = ClientBuilder::new("default") 509 | .connect() 510 | .expect("init success"); 511 | debug!("Client: {:?}", fs); 512 | 513 | let f = fs.read_dir("/tmp").expect("open file success"); 514 | debug!("Metadata: {:?}", f); 515 | assert!(f.len() > 0) 516 | } 517 | 518 | #[test] 519 | fn test_client_mkdir() { 520 | let _ = env_logger::try_init(); 521 | 522 | let fs = ClientBuilder::new("default") 523 | .connect() 524 | .expect("init success"); 525 | debug!("Client: {:?}", fs); 526 | 527 | fs.create_dir("/tmp") 528 | .expect("mkdir on exist dir should succeed"); 529 | } 530 | } 531 | --------------------------------------------------------------------------------