├── .gitignore ├── Cargo.toml ├── LICENSE.txt ├── README.md ├── dev ├── _test.sh ├── build-set-env.sh └── docker │ ├── hdfs-native.dockerfile │ └── libhdfs3.dockerfile ├── header ├── libhdfs3-hdfs-client.xml └── src ├── dfs.rs ├── err.rs ├── hdfs_store.rs ├── lib.rs ├── raw.rs └── util.rs /.gitignore: -------------------------------------------------------------------------------- 1 | target 2 | Cargo.lock 3 | .idea 4 | .DS_Store 5 | .docker 6 | .vscode 7 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | [package] 19 | name = "hdfs-native" 20 | version = "0.1.0" 21 | edition = "2018" 22 | 23 | [lib] 24 | name = "hdfs_native" 25 | path = "src/lib.rs" 26 | 27 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 28 | 29 | [dependencies] 30 | libc = "0.2" 31 | log = "0.4" 32 | url = "2" 33 | thiserror = "1" 34 | 35 | async-trait = "0.1.41" 36 | chrono = "0.4" 37 | datafusion = { git = "https://github.com/apache/arrow-datafusion.git", branch = "master" } 38 | futures = "0.3" 39 | tokio = { version = "1.0", features = ["macros", "rt", "rt-multi-thread", "sync"] } 40 | tokio-stream = "0.1" 41 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 19 | 20 | # DataFusion-hdfs-native 21 | 22 | Connecting DataFusion to HDFS through Native HDFS client (libhdfs3). 23 | 24 | ## Setup libhdfs3 25 | 26 | 1. Install libhdfs3 27 | 28 | You can either install it via [Conda](https://docs.conda.io/en/latest/) 29 | 30 | ```shell 31 | conda install -c conda-forge libhdfs3 32 | ``` 33 | 34 | or build it from source 35 | 36 | ```shell 37 | 38 | # A specific version that could be compiled on osx for HDFS of 2.6.x version 39 | git clone https://github.com/ClickHouse-Extras/libhdfs3.git 40 | cd libhdfs3 41 | git checkout 24b058c356794ef6cc2d31323dc9adf0386652ff 42 | 43 | # then build it 44 | mkdir build && cd build 45 | ../bootstrap --prefix=/usr/local 46 | make 47 | make install 48 | ``` 49 | 50 | 51 | ## Configuration 52 | 53 | ```shell 54 | # client conf to use, env LIBHDFS3_CONF or hdfs-client.xml in working directory 55 | export LIBHDFS3_CONF=/path/to/libhdfs3-hdfs-client.xml 56 | ``` 57 | -------------------------------------------------------------------------------- /dev/_test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Licensed to the Apache Software Foundation (ASF) under one 4 | # or more contributor license agreements. See the NOTICE file 5 | # distributed with this work for additional information 6 | # regarding copyright ownership. The ASF licenses this file 7 | # to you under the Apache License, Version 2.0 (the 8 | # "License"); you may not use this file except in compliance 9 | # with the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, 14 | # software distributed under the License is distributed on an 15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | # KIND, either express or implied. See the License for the 17 | # specific language governing permissions and limitations 18 | # under the License. 19 | 20 | # TODO enable debug with docker 21 | 22 | set -e 23 | 24 | . ./dev/build-set-env.sh 25 | 26 | # Use --progress=plain for detailed, non scrolled docker output 27 | 28 | docker build -t hdfs-native:$HN_VERSION -f dev/docker/hdfs-native.dockerfile . 29 | -------------------------------------------------------------------------------- /dev/build-set-env.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Licensed to the Apache Software Foundation (ASF) under one 4 | # or more contributor license agreements. See the NOTICE file 5 | # distributed with this work for additional information 6 | # regarding copyright ownership. The ASF licenses this file 7 | # to you under the Apache License, Version 2.0 (the 8 | # "License"); you may not use this file except in compliance 9 | # with the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, 14 | # software distributed under the License is distributed on an 15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | # KIND, either express or implied. See the License for the 17 | # specific language governing permissions and limitations 18 | # under the License. 19 | 20 | export HN_VERSION=$(awk -F'[ ="]+' '$1 == "version" { print $2 }' Cargo.toml) 21 | -------------------------------------------------------------------------------- /dev/docker/hdfs-native.dockerfile: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | # Turn .dockerignore to .dockerallow by excluding everything and explicitly 19 | # allowing specific files and directories. This enables us to quickly add 20 | # dependency files to the docker content without scanning the whole directory. 21 | # This setup requires to all of our docker containers have arrow's source 22 | # as a mounted directory. 23 | 24 | #ARG RELEASE_FLAG=--release 25 | FROM yijieshen/hdfs26:0.2.0 AS base 26 | WORKDIR /tmp/hdfs-jni 27 | 28 | FROM base as planner 29 | RUN mkdir /tmp/hdfs-jni/src 30 | ADD Cargo.toml . 31 | COPY src ./src/ 32 | RUN cargo chef prepare --recipe-path recipe.json 33 | 34 | FROM base as cacher 35 | COPY --from=planner /tmp/hdfs-jni/recipe.json recipe.json 36 | RUN cargo chef cook $RELEASE_FLAG --recipe-path recipe.json 37 | 38 | FROM base as builder 39 | RUN mkdir /tmp/hdfs-jni/src 40 | ADD Cargo.toml . 41 | ADD build.rs . 42 | COPY src ./src/ 43 | COPY --from=cacher /tmp/hdfs-jni/target target 44 | 45 | #ARG RELEASE_FLAG=--release 46 | 47 | ENV LD_LIBRARY_PATH /usr/local/hadoop/lib/native:/usr/lib/jvm/adoptopenjdk-8-hotspot-amd64/jre/lib/amd64/server 48 | ENV LIBRARY_PATH /usr/local/hadoop/lib/native:/usr/lib/jvm/adoptopenjdk-8-hotspot-amd64/jre/lib/amd64/server 49 | 50 | ENV RUST_LOG=info 51 | ENV RUST_BACKTRACE=full 52 | 53 | # force build.rs to run to generate configure_me code. 54 | ENV FORCE_REBUILD='true' 55 | RUN export CLASSPATH=$($HADOOP_HOME/bin/hadoop classpath --glob) && RUST_LOG=info cargo test -vv 56 | -------------------------------------------------------------------------------- /dev/docker/libhdfs3.dockerfile: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | # TODO adapt https://github.com/wesm/arrow-io-test to install libhdfs3 19 | -------------------------------------------------------------------------------- /header: -------------------------------------------------------------------------------- 1 | Licensed to the Apache Software Foundation (ASF) under one 2 | or more contributor license agreements. See the NOTICE file 3 | distributed with this work for additional information 4 | regarding copyright ownership. The ASF licenses this file 5 | to you under the Apache License, Version 2.0 (the 6 | "License"); you may not use this file except in compliance 7 | with the License. You may obtain a copy of the License at 8 | 9 | http://www.apache.org/licenses/LICENSE-2.0 10 | 11 | Unless required by applicable law or agreed to in writing, software 12 | distributed under the License is distributed on an "AS IS" BASIS, 13 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | See the License for the specific language governing permissions and 15 | limitations under the License. 16 | 17 | -------------------------------------------------------------------------------- /libhdfs3-hdfs-client.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 53 | 54 | 55 | 56 | hadoop.security.authentication 57 | simple 58 | 59 | the RPC authentication method, valid values include "simple" or "kerberos". default is "simple" 60 | 61 | 62 | 63 | 64 | rpc.client.timeout 65 | 3600000 66 | 67 | timeout interval of a RPC invocation in millisecond. default is 3600000. 68 | 69 | 70 | 71 | 72 | rpc.client.connect.tcpnodelay 73 | true 74 | 75 | whether set socket TCP_NODELAY to true when connect to RPC server. default is true. 76 | 77 | 78 | 79 | 80 | rpc.client.max.idle 81 | 10000 82 | 83 | the max idle time of a RPC connection in millisecond. default is 10000. 84 | 85 | 86 | 87 | 88 | rpc.client.ping.interval 89 | 10000 90 | 91 | the interval which the RPC client send a heart beat to server. 0 means disable, default is 10000. 92 | 93 | 94 | 95 | 96 | rpc.client.connect.timeout 97 | 600000 98 | 99 | the timeout interval in millisecond when the RPC client is trying to setup the connection. default is 600000. 100 | 101 | 102 | 103 | 104 | rpc.client.connect.retry 105 | 10 106 | 107 | the max retry times if the RPC client fail to setup the connection to server. default is 10. 108 | 109 | 110 | 111 | 112 | rpc.client.read.timeout 113 | 3600000 114 | 115 | the timeout interval in millisecond when the RPC client is trying to read from server. default is 3600000. 116 | 117 | 118 | 119 | 120 | rpc.client.write.timeout 121 | 3600000 122 | 123 | the timeout interval in millisecond when the RPC client is trying to write to server. default is 3600000. 124 | 125 | 126 | 127 | 128 | rpc.client.socket.linger.timeout 129 | -1 130 | 131 | set value to socket SO_LINGER when connect to RPC server. -1 means default OS value. default is -1. 132 | 133 | 134 | 135 | 136 | 137 | dfs.client.read.shortcircuit 138 | false 139 | 140 | whether reading block file bypass datanode if the block and the client are 141 | on the same node. default is true. 142 | 143 | 144 | 145 | 146 | dfs.default.replica 147 | 1 148 | 149 | the default number of replica. default is 3. 150 | 151 | 152 | 153 | 154 | dfs.prefetchsize 155 | 10 156 | 157 | the default number of blocks which information will be prefetched. default is 10. 158 | 159 | 160 | 161 | 162 | dfs.client.failover.max.attempts 163 | 15 164 | 165 | if multiply namenodes are configured, it is the max retry times when the dfs client try to issue a RPC call. default is 15. 166 | 167 | 168 | 169 | 170 | dfs.default.blocksize 171 | 67108864 172 | 173 | default block size. default is 67108864. 174 | 175 | 176 | 177 | 178 | dfs.client.log.severity 179 | INFO 180 | 181 | the minimal log severity level, valid values include FATAL, ERROR, INFO, DEBUG1, DEBUG2, DEBUG3. default is INFO. 182 | 183 | 184 | 185 | 186 | 187 | input.connect.timeout 188 | 600000 189 | 190 | the timeout interval in millisecond when the input stream is trying to setup the connection to datanode. default is 600000. 191 | 192 | 193 | 194 | 195 | input.read.timeout 196 | 3600000 197 | 198 | the timeout interval in millisecond when the input stream is trying to read from datanode. default is 3600000. 199 | 200 | 201 | 202 | 203 | input.write.timeout 204 | 3600000 205 | 206 | the timeout interval in millisecond when the input stream is trying to write to datanode. default is 3600000. 207 | 208 | 209 | 210 | 211 | input.localread.default.buffersize 212 | 1048576 213 | 214 | number of bytes of the buffer which is used to hold the data from block file and verify checksum. 215 | it is only used when "dfs.client.read.shortcircuit" is set to true. default is 1048576. 216 | 217 | 218 | 219 | 220 | input.localread.blockinfo.cachesize 221 | 1000 222 | 223 | the size of block file path information cache. default is 1000. 224 | 225 | 226 | 227 | 228 | input.read.getblockinfo.retry 229 | 3 230 | 231 | the max retry times when the client fail to get block information from namenode. default is 3. 232 | 233 | 234 | 235 | 236 | 237 | output.replace-datanode-on-failure 238 | false 239 | 240 | whether the client add new datanode into pipeline if the number of nodes in 241 | pipeline is less the specified number of replicas. default is true. 242 | 243 | 244 | 245 | 246 | output.default.chunksize 247 | 512 248 | 249 | the number of bytes of a chunk in pipeline. default is 512. 250 | 251 | 252 | 253 | 254 | output.default.packetsize 255 | 65536 256 | 257 | the number of bytes of a packet in pipeline. default is 65536. 258 | 259 | 260 | 261 | 262 | output.default.write.retry 263 | 10 264 | 265 | the max retry times when the client fail to setup the pipeline. default is 10. 266 | 267 | 268 | 269 | 270 | output.connect.timeout 271 | 600000 272 | 273 | the timeout interval in millisecond when the output stream is trying to setup the connection to datanode. default is 600000. 274 | 275 | 276 | 277 | 278 | output.read.timeout 279 | 3600000 280 | 281 | the timeout interval in millisecond when the output stream is trying to read from datanode. default is 3600000. 282 | 283 | 284 | 285 | 286 | output.write.timeout 287 | 3600000 288 | 289 | the timeout interval in millisecond when the output stream is trying to write to datanode. default is 3600000. 290 | 291 | 292 | 293 | 294 | output.packetpool.size 295 | 1024 296 | 297 | the max number of packets in a file's packet pool. default is 1024. 298 | 299 | 300 | 301 | 302 | output.close.timeout 303 | 900000 304 | 305 | the timeout interval in millisecond when close an output stream. default is 900000. 306 | 307 | 308 | 309 | -------------------------------------------------------------------------------- /src/dfs.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | use std::marker::PhantomData; 19 | use std::mem; 20 | use std::slice; 21 | use std::string::String; 22 | use std::sync::Arc; 23 | 24 | use libc::{c_char, c_int, c_short, c_void, time_t}; 25 | 26 | use crate::err::HdfsErr; 27 | use crate::raw::*; 28 | use crate::{b2i, from_raw, to_raw}; 29 | use std::cmp::min; 30 | use std::fmt::{Debug, Formatter}; 31 | 32 | const O_RDONLY: c_int = 0; 33 | const O_WRONLY: c_int = 1; 34 | const O_APPEND: c_int = 1024; 35 | 36 | /// Options for zero-copy read 37 | pub struct RzOptions { 38 | ptr: *const hadoopRzOptions, 39 | } 40 | 41 | impl Drop for RzOptions { 42 | fn drop(&mut self) { 43 | unsafe { hadoopRzOptionsFree(self.ptr) } 44 | } 45 | } 46 | 47 | impl Default for RzOptions { 48 | fn default() -> Self { 49 | RzOptions::new() 50 | } 51 | } 52 | 53 | impl RzOptions { 54 | pub fn new() -> RzOptions { 55 | RzOptions { 56 | ptr: unsafe { hadoopRzOptionsAlloc() }, 57 | } 58 | } 59 | 60 | pub fn skip_checksum(&self, skip: bool) -> Result { 61 | let res = unsafe { hadoopRzOptionsSetSkipChecksum(self.ptr, b2i!(skip)) }; 62 | 63 | if res == 0 { 64 | Ok(true) 65 | } else { 66 | Err(HdfsErr::Unknown) 67 | } 68 | } 69 | 70 | pub fn set_bytebuffer_pool(&self, class_name: &str) -> Result { 71 | let res = unsafe { hadoopRzOptionsSetByteBufferPool(self.ptr, to_raw!(class_name)) }; 72 | 73 | if res == 0 { 74 | Ok(true) 75 | } else { 76 | Err(HdfsErr::Unknown) 77 | } 78 | } 79 | } 80 | 81 | /// A buffer returned from zero-copy read. 82 | /// This buffer will be automatically freed when its lifetime is finished. 83 | pub struct RzBuffer<'a> { 84 | file: &'a HdfsFile<'a>, 85 | ptr: *const hadoopRzBuffer, 86 | } 87 | 88 | impl<'a> Drop for RzBuffer<'a> { 89 | fn drop(&mut self) { 90 | unsafe { hadoopRzBufferFree(self.file.file, self.ptr) } 91 | } 92 | } 93 | 94 | impl<'a> RzBuffer<'a> { 95 | /// Get the length of a raw buffer returned from zero-copy read. 96 | #[allow(clippy::len_without_is_empty)] 97 | pub fn len(&self) -> i32 { 98 | (unsafe { hadoopRzBufferLength(self.ptr) }) as i32 99 | } 100 | 101 | /// Get a pointer to the raw buffer returned from zero-copy read. 102 | pub fn as_ptr(&self) -> Result<*const u8, HdfsErr> { 103 | let ptr = unsafe { hadoopRzBufferGet(self.ptr) }; 104 | 105 | if !ptr.is_null() { 106 | Ok(ptr as *const u8) 107 | } else { 108 | Err(HdfsErr::Unknown) 109 | } 110 | } 111 | 112 | /// Get a Slice transformed from a raw buffer 113 | pub fn as_slice(&'a self) -> Result<&[u8], HdfsErr> { 114 | let ptr = unsafe { hadoopRzBufferGet(self.ptr) as *const u8 }; 115 | 116 | let len = unsafe { hadoopRzBufferLength(self.ptr) as usize }; 117 | 118 | if !ptr.is_null() { 119 | Ok(unsafe { mem::transmute(slice::from_raw_parts(ptr, len as usize)) }) 120 | } else { 121 | Err(HdfsErr::Unknown) 122 | } 123 | } 124 | } 125 | 126 | /// Includes hostnames where a particular block of a file is stored. 127 | pub struct BlockHosts { 128 | ptr: *const *const *const c_char, 129 | } 130 | 131 | impl Drop for BlockHosts { 132 | fn drop(&mut self) { 133 | unsafe { hdfsFreeHosts(self.ptr) }; 134 | } 135 | } 136 | 137 | struct HdfsFileInfoPtr { 138 | pub ptr: *const hdfsFileInfo, 139 | pub len: i32, 140 | } 141 | 142 | impl<'a> Drop for HdfsFileInfoPtr { 143 | fn drop(&mut self) { 144 | unsafe { hdfsFreeFileInfo(self.ptr, self.len) }; 145 | } 146 | } 147 | 148 | impl HdfsFileInfoPtr { 149 | fn new(ptr: *const hdfsFileInfo) -> HdfsFileInfoPtr { 150 | HdfsFileInfoPtr { ptr, len: 1 } 151 | } 152 | 153 | pub fn new_array(ptr: *const hdfsFileInfo, len: i32) -> HdfsFileInfoPtr { 154 | HdfsFileInfoPtr { ptr, len } 155 | } 156 | } 157 | 158 | /// Interface that represents the client side information for a file or directory. 159 | pub struct FileStatus<'fs> { 160 | raw: Arc, 161 | idx: u32, 162 | _marker: PhantomData<&'fs HdfsFs>, 163 | } 164 | 165 | impl<'fs> FileStatus<'fs> { 166 | #[inline] 167 | /// create FileStatus from *const hdfsFileInfo 168 | fn new(ptr: *const hdfsFileInfo) -> FileStatus<'fs> { 169 | FileStatus { 170 | raw: Arc::new(HdfsFileInfoPtr::new(ptr)), 171 | idx: 0, 172 | _marker: PhantomData, 173 | } 174 | } 175 | 176 | /// create FileStatus from *const hdfsFileInfo which points 177 | /// to dynamically allocated array. 178 | #[inline] 179 | fn from_array(raw: Arc, idx: u32) -> FileStatus<'fs> { 180 | FileStatus { 181 | raw, 182 | idx, 183 | _marker: PhantomData, 184 | } 185 | } 186 | 187 | #[inline] 188 | fn ptr(&self) -> *const hdfsFileInfo { 189 | unsafe { self.raw.ptr.offset(self.idx as isize) } 190 | } 191 | 192 | /// Get the name of the file 193 | #[inline] 194 | pub fn name(&self) -> &'fs str { 195 | from_raw!((*self.ptr()).mName) 196 | } 197 | 198 | /// Is this a file? 199 | #[inline] 200 | pub fn is_file(&self) -> bool { 201 | match unsafe { &*self.ptr() }.mKind { 202 | tObjectKind::kObjectKindFile => true, 203 | tObjectKind::kObjectKindDirectory => false, 204 | } 205 | } 206 | 207 | /// Is this a directory? 208 | #[inline] 209 | pub fn is_directory(&self) -> bool { 210 | match unsafe { &*self.ptr() }.mKind { 211 | tObjectKind::kObjectKindFile => false, 212 | tObjectKind::kObjectKindDirectory => true, 213 | } 214 | } 215 | 216 | /// Get the owner of the file 217 | #[inline] 218 | pub fn owner(&self) -> &'fs str { 219 | from_raw!((*self.ptr()).mOwner) 220 | } 221 | 222 | /// Get the group associated with the file 223 | #[inline] 224 | pub fn group(&self) -> &'fs str { 225 | from_raw!((*self.ptr()).mGroup) 226 | } 227 | 228 | /// Get the permissions associated with the file 229 | #[inline] 230 | pub fn permission(&self) -> i16 { 231 | unsafe { &*self.ptr() }.mPermissions as i16 232 | } 233 | 234 | /// Get the length of this file, in bytes. 235 | #[inline] 236 | #[allow(clippy::len_without_is_empty)] 237 | pub fn len(&self) -> usize { 238 | unsafe { &*self.ptr() }.mSize as usize 239 | } 240 | 241 | /// Get the block size of the file. 242 | #[inline] 243 | pub fn block_size(&self) -> usize { 244 | unsafe { &*self.ptr() }.mBlockSize as usize 245 | } 246 | 247 | /// Get the replication factor of a file. 248 | #[inline] 249 | pub fn replica_count(&self) -> i16 { 250 | unsafe { &*self.ptr() }.mReplication as i16 251 | } 252 | 253 | /// Get the last modification time for the file in seconds 254 | #[inline] 255 | pub fn last_modified(&self) -> time_t { 256 | unsafe { &*self.ptr() }.mLastMod 257 | } 258 | 259 | /// Get the last access time for the file in seconds 260 | #[inline] 261 | pub fn last_accced(&self) -> time_t { 262 | unsafe { &*self.ptr() }.mLastAccess 263 | } 264 | } 265 | 266 | /// Hdfs Filesystem 267 | /// 268 | /// It is basically thread safe because the native API for hdfsFs is thread-safe. 269 | #[derive(Clone)] 270 | pub struct HdfsFs { 271 | pub url: String, 272 | raw: *const hdfsFS, 273 | } 274 | 275 | unsafe impl Send for HdfsFs {} 276 | 277 | unsafe impl Sync for HdfsFs {} 278 | 279 | impl Debug for HdfsFs { 280 | fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { 281 | f.debug_struct("Hdfs").field("url", &self.url).finish() 282 | } 283 | } 284 | 285 | impl HdfsFs { 286 | /// create HdfsFs instance. Please use HdfsFsCache rather than using this API directly. 287 | #[inline] 288 | pub(crate) fn new(url: String, raw: *const hdfsFS) -> HdfsFs { 289 | HdfsFs { url, raw } 290 | } 291 | 292 | /// Get HDFS namenode url 293 | #[inline] 294 | pub fn url(&self) -> &str { 295 | &self.url 296 | } 297 | 298 | /// Get a raw pointer of JNI API's HdfsFs 299 | #[inline] 300 | pub fn raw(&self) -> *const hdfsFS { 301 | self.raw 302 | } 303 | 304 | /// Open a file for append 305 | pub fn append(&self, path: &str) -> Result, HdfsErr> { 306 | if !self.exist(path) { 307 | return Err(HdfsErr::FileNotFound(path.to_owned())); 308 | } 309 | 310 | let file = unsafe { hdfsOpenFile(self.raw, to_raw!(path), O_APPEND, 0, 0, 0) }; 311 | 312 | if file.is_null() { 313 | Err(HdfsErr::Unknown) 314 | } else { 315 | Ok(HdfsFile { 316 | fs: self, 317 | path: path.to_owned(), 318 | file, 319 | }) 320 | } 321 | } 322 | 323 | /// set permission 324 | pub fn chmod(&self, path: &str, mode: i16) -> bool { 325 | (unsafe { hdfsChmod(self.raw, to_raw!(path), mode as c_short) }) == 0 326 | } 327 | 328 | pub fn chown(&self, path: &str, owner: &str, group: &str) -> bool { 329 | (unsafe { hdfsChown(self.raw, to_raw!(path), to_raw!(owner), to_raw!(group)) }) == 0 330 | } 331 | 332 | #[inline] 333 | pub fn create(&self, path: &str) -> Result, HdfsErr> { 334 | self.create_with_params(path, false, 0, 0, 0) 335 | } 336 | 337 | #[inline] 338 | pub fn create_with_overwrite( 339 | &self, 340 | path: &str, 341 | overwrite: bool, 342 | ) -> Result, HdfsErr> { 343 | self.create_with_params(path, overwrite, 0, 0, 0) 344 | } 345 | 346 | pub fn create_with_params( 347 | &self, 348 | path: &str, 349 | overwrite: bool, 350 | buf_size: i32, 351 | replica_num: i16, 352 | block_size: i32, 353 | ) -> Result, HdfsErr> { 354 | if !overwrite && self.exist(path) { 355 | return Err(HdfsErr::FileAlreadyExists(path.to_owned())); 356 | } 357 | 358 | let file = unsafe { 359 | hdfsOpenFile( 360 | self.raw, 361 | to_raw!(path), 362 | O_WRONLY, 363 | buf_size as c_int, 364 | replica_num as c_short, 365 | block_size as i32, 366 | ) 367 | }; 368 | 369 | if file.is_null() { 370 | Err(HdfsErr::Unknown) 371 | } else { 372 | Ok(HdfsFile { 373 | fs: self, 374 | path: path.to_owned(), 375 | file, 376 | }) 377 | } 378 | } 379 | 380 | /// Get the default blocksize. 381 | pub fn default_blocksize(&self) -> Result { 382 | let block_sz = unsafe { hdfsGetDefaultBlockSize(self.raw) }; 383 | 384 | if block_sz > 0 { 385 | Ok(block_sz as usize) 386 | } else { 387 | Err(HdfsErr::Unknown) 388 | } 389 | } 390 | 391 | /// Get the default blocksize at the filesystem indicated by a given path. 392 | pub fn block_size(&self, path: &str) -> Result { 393 | let block_sz = unsafe { hdfsGetDefaultBlockSizeAtPath(self.raw, to_raw!(path)) }; 394 | 395 | if block_sz > 0 { 396 | Ok(block_sz as usize) 397 | } else { 398 | Err(HdfsErr::Unknown) 399 | } 400 | } 401 | 402 | /// Return the raw capacity of the filesystem. 403 | pub fn capacity(&self) -> Result { 404 | let block_sz = unsafe { hdfsGetCapacity(self.raw) }; 405 | 406 | if block_sz > 0 { 407 | Ok(block_sz as usize) 408 | } else { 409 | Err(HdfsErr::Unknown) 410 | } 411 | } 412 | 413 | /// Delete file. 414 | pub fn delete(&self, path: &str, recursive: bool) -> Result { 415 | let res = unsafe { hdfsDelete(self.raw, to_raw!(path), recursive as c_int) }; 416 | 417 | if res == 0 { 418 | Ok(true) 419 | } else { 420 | Err(HdfsErr::Unknown) 421 | } 422 | } 423 | 424 | /// Checks if a given path exsits on the filesystem 425 | pub fn exist(&self, path: &str) -> bool { 426 | unsafe { hdfsExists(self.raw, to_raw!(path)) == 0 } 427 | } 428 | 429 | /// Get hostnames where a particular block (determined by 430 | /// pos & blocksize) of a file is stored. The last element in the array 431 | /// is NULL. Due to replication, a single block could be present on 432 | /// multiple hosts. 433 | pub fn get_hosts( 434 | &self, 435 | path: &str, 436 | start: usize, 437 | length: usize, 438 | ) -> Result { 439 | let ptr = unsafe { hdfsGetHosts(self.raw, to_raw!(path), start as i64, length as i64) }; 440 | 441 | if !ptr.is_null() { 442 | Ok(BlockHosts { ptr }) 443 | } else { 444 | Err(HdfsErr::Unknown) 445 | } 446 | } 447 | 448 | /// create a directory 449 | pub fn mkdir(&self, path: &str) -> Result { 450 | if unsafe { hdfsCreateDirectory(self.raw, to_raw!(path)) } == 0 { 451 | Ok(true) 452 | } else { 453 | Err(HdfsErr::Unknown) 454 | } 455 | } 456 | 457 | /// open a file to read 458 | #[inline] 459 | pub fn open(&self, path: &str) -> Result, HdfsErr> { 460 | self.open_with_bufsize(path, 0) 461 | } 462 | 463 | /// open a file to read with a buffer size 464 | pub fn open_with_bufsize(&self, path: &str, buf_size: i32) -> Result, HdfsErr> { 465 | let file = 466 | unsafe { hdfsOpenFile(self.raw, to_raw!(path), O_RDONLY, buf_size as c_int, 0, 0) }; 467 | 468 | if file.is_null() { 469 | Err(HdfsErr::Unknown) 470 | } else { 471 | Ok(HdfsFile { 472 | fs: self, 473 | path: path.to_owned(), 474 | file, 475 | }) 476 | } 477 | } 478 | 479 | /// Set the replication of the specified file to the supplied value 480 | pub fn set_replication(&self, path: &str, num: i16) -> Result { 481 | let res = unsafe { hdfsSetReplication(self.raw, to_raw!(path), num as i16) }; 482 | 483 | if res == 0 { 484 | Ok(true) 485 | } else { 486 | Err(HdfsErr::Unknown) 487 | } 488 | } 489 | 490 | /// Rename file. 491 | pub fn rename(&self, old_path: &str, new_path: &str) -> Result { 492 | let res = unsafe { hdfsRename(self.raw, to_raw!(old_path), to_raw!(new_path)) }; 493 | 494 | if res == 0 { 495 | Ok(true) 496 | } else { 497 | Err(HdfsErr::Unknown) 498 | } 499 | } 500 | 501 | /// Return the total raw size of all files in the filesystem. 502 | pub fn used(&self) -> Result { 503 | let block_sz = unsafe { hdfsGetUsed(self.raw) }; 504 | 505 | if block_sz > 0 { 506 | Ok(block_sz as usize) 507 | } else { 508 | Err(HdfsErr::Unknown) 509 | } 510 | } 511 | 512 | pub fn list_status(&self, path: &str) -> Result, HdfsErr> { 513 | let mut entry_num: c_int = 0; 514 | 515 | let ptr = unsafe { hdfsListDirectory(self.raw, to_raw!(path), &mut entry_num) }; 516 | 517 | if ptr.is_null() { 518 | return Err(HdfsErr::Unknown); 519 | } 520 | 521 | let shared_ptr = Arc::new(HdfsFileInfoPtr::new_array(ptr, entry_num)); 522 | 523 | let mut list = Vec::new(); 524 | for idx in 0..entry_num { 525 | list.push(FileStatus::from_array(shared_ptr.clone(), idx as u32)); 526 | } 527 | 528 | Ok(list) 529 | } 530 | 531 | pub fn get_file_status(&self, path: &str) -> Result { 532 | let ptr = unsafe { hdfsGetPathInfo(self.raw, to_raw!(path)) }; 533 | 534 | if ptr.is_null() { 535 | Err(HdfsErr::Unknown) 536 | } else { 537 | Ok(FileStatus::new(ptr)) 538 | } 539 | } 540 | } 541 | 542 | /// open hdfs file 543 | pub struct HdfsFile<'a> { 544 | fs: &'a HdfsFs, 545 | path: String, 546 | file: *const hdfsFile, 547 | } 548 | 549 | #[derive(Clone)] 550 | pub struct RawHdfsFileWrapper { 551 | pub path: String, 552 | pub file: *const hdfsFile, 553 | } 554 | 555 | impl<'a> From<&HdfsFile<'a>> for RawHdfsFileWrapper { 556 | fn from(file: &HdfsFile<'a>) -> Self { 557 | RawHdfsFileWrapper { 558 | path: file.path.clone(), 559 | file: file.file, 560 | } 561 | } 562 | } 563 | 564 | unsafe impl Send for RawHdfsFileWrapper {} 565 | 566 | unsafe impl Sync for RawHdfsFileWrapper {} 567 | 568 | impl<'a> HdfsFile<'a> { 569 | pub fn from_raw(rw: &RawHdfsFileWrapper, fs: &'a HdfsFs) -> HdfsFile<'a> { 570 | let path = rw.path.clone(); 571 | HdfsFile { 572 | fs, 573 | path, 574 | file: rw.file, 575 | } 576 | } 577 | 578 | pub fn available(&self) -> Result { 579 | if unsafe { hdfsAvailable(self.fs.raw, self.file) } == 0 { 580 | Ok(true) 581 | } else { 582 | Err(HdfsErr::Unknown) 583 | } 584 | } 585 | 586 | /// Close the opened file 587 | pub fn close(&self) -> Result { 588 | if unsafe { hdfsCloseFile(self.fs.raw, self.file) } == 0 { 589 | Ok(true) 590 | } else { 591 | Err(HdfsErr::Unknown) 592 | } 593 | } 594 | 595 | /// Flush the data. 596 | pub fn flush(&self) -> bool { 597 | (unsafe { hdfsFlush(self.fs.raw, self.file) }) == 0 598 | } 599 | 600 | /// Flush out the data in client's user buffer. After the return of this 601 | /// call, new readers will see the data. 602 | pub fn hflush(&self) -> bool { 603 | (unsafe { hdfsHFlush(self.fs.raw, self.file) }) == 0 604 | } 605 | 606 | /// Similar to posix fsync, Flush out the data in client's 607 | /// user buffer. all the way to the disk device (but the disk may have 608 | /// it in its cache). 609 | pub fn hsync(&self) -> bool { 610 | (unsafe { hdfsHSync(self.fs.raw, self.file) }) == 0 611 | } 612 | 613 | /// Determine if a file is open for read. 614 | pub fn is_readable(&self) -> bool { 615 | (unsafe { hdfsFileIsOpenForRead(self.file) }) == 1 616 | } 617 | 618 | /// Determine if a file is open for write. 619 | pub fn is_writable(&self) -> bool { 620 | (unsafe { hdfsFileIsOpenForWrite(self.file) }) == 1 621 | } 622 | 623 | /// Return a file path 624 | pub fn path(&'a self) -> &'a str { 625 | &self.path 626 | } 627 | 628 | /// Get the current offset in the file, in bytes. 629 | pub fn pos(&self) -> Result { 630 | let pos = unsafe { hdfsTell(self.fs.raw, self.file) }; 631 | 632 | if pos > 0 { 633 | Ok(pos as u64) 634 | } else { 635 | Err(HdfsErr::Unknown) 636 | } 637 | } 638 | 639 | /// Read data from an open file. 640 | pub fn read(&self, buf: &mut [u8]) -> Result { 641 | let read_len = unsafe { 642 | hdfsRead( 643 | self.fs.raw, 644 | self.file, 645 | buf.as_ptr() as *mut c_void, 646 | buf.len() as tSize, 647 | ) 648 | }; 649 | 650 | if read_len > 0 { 651 | Ok(read_len as i32) 652 | } else { 653 | Err(HdfsErr::Unknown) 654 | } 655 | } 656 | 657 | /// Positional read of data from an open file. 658 | pub fn read_with_pos(&self, pos: i64, buf: &mut [u8]) -> Result { 659 | let read_len = unsafe { 660 | hdfsPread( 661 | self.fs.raw, 662 | self.file, 663 | pos as tOffset, 664 | buf.as_ptr() as *mut c_void, 665 | buf.len() as tSize, 666 | ) 667 | }; 668 | 669 | if read_len > 0 { 670 | Ok(read_len as i32) 671 | } else { 672 | Err(HdfsErr::Unknown) 673 | } 674 | } 675 | 676 | /// Read data from an open file. 677 | pub fn read_length(&self, buf: &mut [u8], length: usize) -> Result { 678 | let required_len = min(length, buf.len()); 679 | let read_len = unsafe { 680 | hdfsRead( 681 | self.fs.raw, 682 | self.file, 683 | buf.as_ptr() as *mut c_void, 684 | required_len as tSize, 685 | ) 686 | }; 687 | 688 | if read_len > 0 { 689 | Ok(read_len as i32) 690 | } else { 691 | Err(HdfsErr::Unknown) 692 | } 693 | } 694 | 695 | /// Positional read of data from an open file. 696 | pub fn read_with_pos_length( 697 | &self, 698 | pos: i64, 699 | buf: &mut [u8], 700 | length: usize, 701 | ) -> Result { 702 | let required_len = min(length, buf.len()); 703 | let read_len = unsafe { 704 | hdfsPread( 705 | self.fs.raw, 706 | self.file, 707 | pos as tOffset, 708 | buf.as_ptr() as *mut c_void, 709 | required_len as tSize, 710 | ) 711 | }; 712 | 713 | if read_len > 0 { 714 | Ok(read_len as i32) 715 | } else { 716 | Err(HdfsErr::Unknown) 717 | } 718 | } 719 | 720 | /// Perform a byte buffer read. If possible, this will be a zero-copy 721 | /// (mmap) read. 722 | pub fn read_zc(&'a self, opts: &RzOptions, max_len: i32) -> Result, HdfsErr> { 723 | let buf: *const hadoopRzBuffer = 724 | unsafe { hadoopReadZero(self.file, opts.ptr, max_len as i32) }; 725 | 726 | if !buf.is_null() { 727 | Ok(RzBuffer { 728 | file: self, 729 | ptr: buf, 730 | }) 731 | } else { 732 | Err(HdfsErr::Unknown) 733 | } 734 | } 735 | 736 | /// Seek to given offset in file. 737 | pub fn seek(&self, offset: u64) -> bool { 738 | (unsafe { hdfsSeek(self.fs.raw, self.file, offset as tOffset) }) == 0 739 | } 740 | 741 | /// Write data into an open file. 742 | pub fn write(&self, buf: &[u8]) -> Result { 743 | let written_len = unsafe { 744 | hdfsWrite( 745 | self.fs.raw, 746 | self.file, 747 | buf.as_ptr() as *mut c_void, 748 | buf.len() as tSize, 749 | ) 750 | }; 751 | 752 | if written_len > 0 { 753 | Ok(written_len) 754 | } else { 755 | Err(HdfsErr::Unknown) 756 | } 757 | } 758 | } 759 | -------------------------------------------------------------------------------- /src/err.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | use datafusion::error::DataFusionError; 19 | use std::io::ErrorKind; 20 | use thiserror::Error; 21 | 22 | /// Errors which can occur during accessing Hdfs cluster 23 | #[derive(Error, Debug)] 24 | pub enum HdfsErr { 25 | #[error("Unknown hdfs error")] 26 | Unknown, 27 | /// file path 28 | #[error("File not found `{0}`")] 29 | FileNotFound(String), 30 | /// file path 31 | #[error("File already exists `{0}`")] 32 | FileAlreadyExists(String), 33 | /// namenode address 34 | #[error("Cannot connect to NameNode `{0}`")] 35 | CannotConnectToNameNode(String), 36 | /// URL 37 | #[error("Invalid URL `{0}`")] 38 | InvalidUrl(String), 39 | } 40 | 41 | fn get_error_kind(e: &HdfsErr) -> ErrorKind { 42 | match e { 43 | HdfsErr::Unknown => ErrorKind::Other, 44 | HdfsErr::FileNotFound(_) => ErrorKind::NotFound, 45 | HdfsErr::FileAlreadyExists(_) => ErrorKind::AlreadyExists, 46 | HdfsErr::CannotConnectToNameNode(_) => ErrorKind::ConnectionRefused, 47 | HdfsErr::InvalidUrl(_) => ErrorKind::AddrNotAvailable, 48 | } 49 | } 50 | 51 | impl From for DataFusionError { 52 | fn from(e: HdfsErr) -> DataFusionError { 53 | let transformed_kind = get_error_kind(&e); 54 | DataFusionError::IoError(std::io::Error::new(transformed_kind, e)) 55 | } 56 | } 57 | 58 | impl From for std::io::Error { 59 | fn from(e: HdfsErr) -> std::io::Error { 60 | let transformed_kind = get_error_kind(&e); 61 | std::io::Error::new(transformed_kind, e) 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /src/hdfs_store.rs: -------------------------------------------------------------------------------- 1 | use async_trait::async_trait; 2 | use futures::{AsyncRead, Future}; 3 | use std::sync::{Arc, Mutex}; 4 | 5 | use crate::{FileStatus, HdfsErr, HdfsFile, HdfsFs, HdfsRegistry, RawHdfsFileWrapper}; 6 | use chrono::{Local, TimeZone, Utc}; 7 | use datafusion::datasource::object_store::{ 8 | FileMeta, FileMetaStream, ListEntry, ListEntryStream, ObjectReader, ObjectStore, 9 | }; 10 | use datafusion::error::{DataFusionError, Result}; 11 | use std::collections::HashMap; 12 | use std::convert::TryFrom; 13 | use std::fmt::Debug; 14 | use std::io::{ErrorKind, Read}; 15 | use std::pin::Pin; 16 | use std::task::{Context, Poll}; 17 | use tokio::{ 18 | sync::mpsc::{channel, Receiver, Sender}, 19 | task, 20 | }; 21 | use tokio_stream::wrappers::ReceiverStream; 22 | use tokio_stream::StreamExt; 23 | 24 | #[derive(Debug)] 25 | pub struct HdfsStore { 26 | fs_registry: HdfsRegistry, 27 | } 28 | 29 | impl HdfsStore { 30 | #[allow(dead_code)] 31 | // We will finally move HdfsStore into its own crate when Hdfs-native is mature, 32 | // therefore ignore the warning here. 33 | pub fn new() -> Result { 34 | Ok(HdfsStore { 35 | fs_registry: HdfsRegistry::new(), 36 | }) 37 | } 38 | 39 | pub fn new_from(fs: Arc>>) -> Self { 40 | HdfsStore { 41 | fs_registry: HdfsRegistry::new_from(fs), 42 | } 43 | } 44 | 45 | pub fn get_fs(&self, prefix: &str) -> std::result::Result { 46 | self.fs_registry.get(prefix) 47 | } 48 | 49 | fn all_fs(&self) -> Arc>> { 50 | self.fs_registry.all_fs.clone() 51 | } 52 | } 53 | 54 | fn list_dir_sync( 55 | all_fs: Arc>>, 56 | prefix: &str, 57 | response_tx: Sender>, 58 | ) -> Result<()> { 59 | let store = HdfsStore::new_from(all_fs); 60 | let fs = store.get_fs(prefix)?; 61 | let all_status = fs.list_status(prefix)?; 62 | for status in &all_status { 63 | response_tx 64 | .blocking_send(Ok(ListEntry::from(status))) 65 | .map_err(|e| DataFusionError::Execution(e.to_string()))?; 66 | } 67 | Ok(()) 68 | } 69 | 70 | impl<'a> TryFrom<&FileStatus<'a>> for FileMeta { 71 | type Error = DataFusionError; 72 | 73 | fn try_from(status: &FileStatus) -> Result { 74 | let rs: ListEntry = status.into(); 75 | match rs { 76 | ListEntry::FileMeta(f) => Ok(f), 77 | ListEntry::Prefix(path) => { 78 | Err(std::io::Error::new(ErrorKind::Other, format!("{} is not a file", path)).into()) 79 | } 80 | } 81 | } 82 | } 83 | 84 | impl<'a> From<&FileStatus<'a>> for ListEntry { 85 | fn from(status: &FileStatus) -> Self { 86 | if status.is_directory() { 87 | ListEntry::Prefix(status.name().to_owned()) 88 | } else { 89 | let time = Local 90 | .timestamp(status.last_modified(), 0) 91 | .with_timezone(&Utc); 92 | ListEntry::FileMeta(FileMeta { 93 | path: status.name().to_owned(), 94 | last_modified: Some(time), 95 | size: status.len() as u64, 96 | }) 97 | } 98 | } 99 | } 100 | 101 | #[async_trait] 102 | impl ObjectStore for HdfsStore { 103 | async fn list_file(&self, prefix: &str) -> Result { 104 | let entry_stream = self.list_dir(prefix, None).await?; 105 | let result = entry_stream.map(|r| match r { 106 | Ok(entry) => match entry { 107 | ListEntry::FileMeta(fm) => Ok(fm), 108 | ListEntry::Prefix(path) => Err(DataFusionError::from(std::io::Error::new( 109 | ErrorKind::InvalidInput, 110 | format!("{} is not a file", path), 111 | ))), 112 | }, 113 | Err(e) => Err(e), 114 | }); 115 | 116 | Ok(Box::pin(result)) 117 | } 118 | 119 | async fn list_dir(&self, prefix: &str, _delimiter: Option) -> Result { 120 | let (response_tx, response_rx): (Sender>, Receiver>) = 121 | channel(2); 122 | let prefix = prefix.to_owned(); 123 | let all_fs = self.all_fs(); 124 | task::spawn_blocking(move || { 125 | if let Err(e) = list_dir_sync(all_fs, &prefix, response_tx) { 126 | println!("List status thread terminated due to error {:?}", e) 127 | } 128 | }); 129 | Ok(Box::pin(ReceiverStream::new(response_rx))) 130 | } 131 | 132 | fn file_reader(&self, file: FileMeta) -> Result> { 133 | let fs = self.all_fs(); 134 | let reader = HdfsFileReader::new(HdfsStore::new_from(fs), file); 135 | Ok(Arc::new(reader)) 136 | } 137 | } 138 | 139 | pub struct HdfsFileReader { 140 | store: HdfsStore, 141 | file: FileMeta, 142 | } 143 | 144 | struct HdfsAsyncRead { 145 | store: HdfsStore, 146 | file: RawHdfsFileWrapper, 147 | start: u64, 148 | length: usize, 149 | } 150 | 151 | impl AsyncRead for HdfsAsyncRead { 152 | fn poll_read( 153 | self: Pin<&mut Self>, 154 | cx: &mut Context<'_>, 155 | buf: &mut [u8], 156 | ) -> Poll> { 157 | let path = self.file.path.clone(); 158 | let all_fs = self.store.all_fs(); 159 | let file_wrapper = self.file.clone(); 160 | let start = self.start as i64; 161 | let length = self.length; 162 | let buf_len = buf.len(); 163 | 164 | let mut read_sync = task::spawn_blocking(move || { 165 | let store = HdfsStore::new_from(all_fs); 166 | let fs = store.get_fs(&*path); 167 | let mut vec = vec![0u8; buf_len]; 168 | match fs { 169 | Ok(fs) => { 170 | let file = HdfsFile::from_raw(&file_wrapper, &fs); 171 | file.read_with_pos_length(start as i64, &mut *vec, length) 172 | .map_err(std::io::Error::from) 173 | .map(|s| (vec, s as usize)) 174 | } 175 | Err(e) => Err(std::io::Error::from(e)), 176 | } 177 | }); 178 | 179 | match Pin::new(&mut read_sync).poll(cx) { 180 | Poll::Ready(r) => match r { 181 | Ok(vl_r) => match vl_r { 182 | Ok(vl) => match vl.0.as_slice().read(buf) { 183 | Ok(_) => Poll::Ready(Ok(vl.1)), 184 | Err(e) => Poll::Ready(Err(e)), 185 | }, 186 | Err(e) => Poll::Ready(Err(e)), 187 | }, 188 | Err(e) => Poll::Ready(Err(std::io::Error::from(e))), 189 | }, 190 | Poll::Pending => Poll::Pending, 191 | } 192 | } 193 | } 194 | 195 | impl HdfsFileReader { 196 | pub fn new(store: HdfsStore, file: FileMeta) -> Self { 197 | Self { store, file } 198 | } 199 | } 200 | 201 | #[async_trait] 202 | impl ObjectReader for HdfsFileReader { 203 | async fn chunk_reader(&self, start: u64, length: usize) -> Result> { 204 | let file = self.file.path.clone(); 205 | let fs = self.store.all_fs(); 206 | let x = task::spawn_blocking(move || { 207 | let store = HdfsStore::new_from(fs); 208 | let fs_result = store.get_fs(&*file).map_err(DataFusionError::from); 209 | match fs_result { 210 | Ok(fs) => { 211 | let file_result = fs.open(&*file).map_err(DataFusionError::from); 212 | match file_result { 213 | Ok(file) => { 214 | let x = (&file).into(); 215 | Ok(HdfsAsyncRead { 216 | store: HdfsStore::new_from(store.all_fs()), 217 | file: x, 218 | start, 219 | length, 220 | }) 221 | } 222 | Err(e) => Err(e), 223 | } 224 | } 225 | Err(e) => Err(e), 226 | } 227 | }) 228 | .await; 229 | match x { 230 | Ok(r) => Ok(Arc::new(r?)), 231 | Err(e) => Err(DataFusionError::Execution(format!( 232 | "Open hdfs file thread terminated due to error: {:?}", 233 | e 234 | ))), 235 | } 236 | } 237 | 238 | fn length(&self) -> u64 { 239 | self.file.size 240 | } 241 | } 242 | 243 | #[cfg(test)] 244 | mod tests { 245 | use crate::hdfs_store::HdfsStore; 246 | 247 | #[test] 248 | fn it_works() { 249 | let _hdfs_store = HdfsStore::new(); 250 | } 251 | } 252 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | //! A rust wrapper over libhdfs3 19 | 20 | /// Rust APIs wrapping libhdfs3 API, providing better semantic and abstraction 21 | pub mod dfs; 22 | pub mod err; 23 | mod hdfs_store; 24 | /// libhdfs3 raw binding APIs 25 | pub mod raw; 26 | pub mod util; 27 | 28 | pub use crate::dfs::*; 29 | pub use crate::err::HdfsErr; 30 | pub use crate::util::HdfsUtil; 31 | 32 | use crate::raw::{ 33 | hdfsBuilderConnect, hdfsBuilderSetNameNode, hdfsBuilderSetNameNodePort, hdfsFS, hdfsNewBuilder, 34 | }; 35 | use log::info; 36 | use std::collections::HashMap; 37 | use std::sync::{Arc, Mutex}; 38 | use url::Url; 39 | 40 | static LOCAL_FS_SCHEME: &str = "file"; 41 | 42 | /// HdfsRegistry which stores seen HdfsFs instances. 43 | #[derive(Debug)] 44 | pub struct HdfsRegistry { 45 | all_fs: Arc>>, 46 | } 47 | 48 | impl Default for HdfsRegistry { 49 | fn default() -> Self { 50 | HdfsRegistry::new() 51 | } 52 | } 53 | 54 | struct HostPort { 55 | host: String, 56 | port: u16, 57 | } 58 | 59 | enum NNScheme { 60 | Local, 61 | Remote(HostPort), 62 | } 63 | 64 | impl ToString for NNScheme { 65 | fn to_string(&self) -> String { 66 | match self { 67 | NNScheme::Local => "file:///".to_string(), 68 | NNScheme::Remote(hp) => format!("{}:{}", hp.host, hp.port), 69 | } 70 | } 71 | } 72 | 73 | impl HdfsRegistry { 74 | pub fn new() -> HdfsRegistry { 75 | HdfsRegistry { 76 | all_fs: Arc::new(Mutex::new(HashMap::new())), 77 | } 78 | } 79 | 80 | pub fn new_from(fs: Arc>>) -> HdfsRegistry { 81 | HdfsRegistry { all_fs: fs } 82 | } 83 | 84 | fn get_namenode(&self, path: &str) -> Result { 85 | match Url::parse(path) { 86 | Ok(url) => { 87 | if url.scheme() == LOCAL_FS_SCHEME { 88 | Ok(NNScheme::Local) 89 | } else if url.host().is_some() && url.port().is_some() { 90 | Ok(NNScheme::Remote(HostPort { 91 | host: format!("{}://{}", &url.scheme(), url.host().unwrap()), 92 | port: url.port().unwrap(), 93 | })) 94 | } else { 95 | Err(HdfsErr::InvalidUrl(path.to_string())) 96 | } 97 | } 98 | Err(_) => Err(HdfsErr::InvalidUrl(path.to_string())), 99 | } 100 | } 101 | 102 | pub fn get(&self, path: &str) -> Result { 103 | let host_port = self.get_namenode(path)?; 104 | 105 | let mut map = self.all_fs.lock().unwrap(); 106 | 107 | let entry: &mut HdfsFs = map.entry(host_port.to_string()).or_insert({ 108 | let hdfs_fs: *const hdfsFS = unsafe { 109 | let hdfs_builder = hdfsNewBuilder(); 110 | match host_port { 111 | NNScheme::Local => {} //NO-OP 112 | NNScheme::Remote(ref hp) => { 113 | hdfsBuilderSetNameNode(hdfs_builder, to_raw!(&*hp.host)); 114 | hdfsBuilderSetNameNodePort(hdfs_builder, hp.port); 115 | } 116 | } 117 | info!("Connecting to NameNode ({})", &host_port.to_string()); 118 | hdfsBuilderConnect(hdfs_builder) 119 | }; 120 | 121 | if hdfs_fs.is_null() { 122 | return Err(HdfsErr::CannotConnectToNameNode(host_port.to_string())); 123 | } 124 | info!("Connected to NameNode ({})", &host_port.to_string()); 125 | HdfsFs::new(host_port.to_string(), hdfs_fs) 126 | }); 127 | 128 | Ok(entry.clone()) 129 | } 130 | } 131 | 132 | #[cfg(test)] 133 | mod test { 134 | use super::HdfsRegistry; 135 | use crate::HdfsErr; 136 | use log::debug; 137 | 138 | #[test] 139 | fn test_hdfs_connection() -> Result<(), HdfsErr> { 140 | let port = 9000; 141 | 142 | let dfs_addr = format!("hdfs://localhost:{}", port); 143 | let fs_registry = HdfsRegistry::new(); 144 | 145 | let test_path = format!("hdfs://localhost:{}/users/test", port); 146 | debug!("Trying to get {}", &test_path); 147 | 148 | assert_eq!(dfs_addr, fs_registry.get(&test_path)?.url); 149 | 150 | // create a file, check existence, and close 151 | let fs = fs_registry.get(&test_path)?; 152 | let test_file = "/test_file"; 153 | if fs.exist(test_file) { 154 | fs.delete(test_file, true)?; 155 | } 156 | let created_file = match fs.create(test_file) { 157 | Ok(f) => f, 158 | Err(e) => panic!("Couldn't create a file {:?}", e), 159 | }; 160 | assert!(created_file.close().is_ok()); 161 | assert!(fs.exist(test_file)); 162 | 163 | // open a file and close 164 | let opened_file = fs.open(test_file)?; 165 | assert!(opened_file.close().is_ok()); 166 | 167 | match fs.mkdir("/dir1") { 168 | Ok(_) => debug!("/dir1 created"), 169 | Err(_) => panic!("Couldn't create /dir1 directory"), 170 | }; 171 | 172 | let file_info = fs.get_file_status("/dir1")?; 173 | 174 | assert_eq!("/dir1", file_info.name()); 175 | assert!(!file_info.is_file()); 176 | assert!(file_info.is_directory()); 177 | 178 | let sub_dir_num = 3; 179 | let mut expected_list = Vec::new(); 180 | for x in 0..sub_dir_num { 181 | let filename = format!("/dir1/{}", x); 182 | expected_list.push(format!("/dir1/{}", x)); 183 | 184 | match fs.mkdir(&filename) { 185 | Ok(_) => debug!("{} created", filename), 186 | Err(_) => panic!("Couldn't create {} directory", filename), 187 | }; 188 | } 189 | 190 | let mut list = fs.list_status("/dir1")?; 191 | assert_eq!(sub_dir_num, list.len()); 192 | 193 | list.sort_by(|a, b| Ord::cmp(a.name(), b.name())); 194 | 195 | for (expected, name) in expected_list 196 | .iter() 197 | .zip(list.iter().map(|status| status.name())) 198 | { 199 | assert_eq!(expected, name); 200 | } 201 | Ok(()) 202 | } 203 | } 204 | -------------------------------------------------------------------------------- /src/raw.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | //! libhdfs FFI Binding APIs 19 | #![allow(non_camel_case_types)] 20 | #![allow(non_snake_case)] 21 | 22 | use libc::{c_char, c_int, c_short, c_void, size_t, time_t}; 23 | 24 | /// Opaque Pointer of hdfsFS 25 | pub enum hdfsFS {} 26 | 27 | /// Opaque Pointer of hdfsFile 28 | pub enum hdfsFile {} 29 | 30 | /// Opaque Pointer of hdfsBuilder 31 | pub enum hdfsBuilder {} 32 | 33 | /// Opaque Pointer of hadoopRzOptions 34 | pub enum hadoopRzOptions {} 35 | 36 | /// Opaque Pointer of hadoopRzBuffer 37 | pub enum hadoopRzBuffer {} 38 | 39 | /// size of data for read/write io ops 40 | pub type tSize = i32; 41 | /// time type in seconds 42 | pub type tTime = time_t; 43 | /// offset within the file 44 | pub type tOffset = i64; 45 | /// port 46 | pub type tPort = u16; 47 | 48 | #[repr(C)] 49 | pub enum tObjectKind { 50 | kObjectKindFile = 0x46, // 'F' 51 | kObjectKindDirectory = 0x44, // 'D' 52 | } 53 | 54 | /// Information about a file/directory. 55 | #[repr(C)] 56 | pub struct hdfsReadStatistics { 57 | pub totalBytesRead: u64, 58 | pub totalLocalBytesRead: u64, 59 | pub totalShortCircuitBytesRead: u64, 60 | pub totalZeroCopyBytesRead: u64, 61 | } 62 | 63 | #[repr(C)] 64 | pub struct hdfsFileInfo { 65 | /// file or directory 66 | pub mKind: tObjectKind, 67 | /// the name of the file 68 | pub mName: *const c_char, 69 | /// the last modification time for the file in seconds 70 | pub mLastMod: tTime, 71 | /// the size of the file in bytes 72 | pub mSize: tOffset, 73 | /// the count of replicas 74 | pub mReplication: c_short, 75 | /// the block size for the file 76 | pub mBlockSize: tOffset, 77 | /// the owner of the file 78 | pub mOwner: *const c_char, 79 | /// the group associated with the file 80 | pub mGroup: *const c_char, 81 | /// the permissions associated with the file 82 | pub mPermissions: c_short, 83 | /// the last access time for the file in seconds 84 | pub mLastAccess: tTime, 85 | } 86 | 87 | #[link(name = "hdfs3", kind = "dylib")] 88 | extern "C" { 89 | 90 | /// Determine if a file is open for read. 91 | /// 92 | /// #### Params 93 | /// * ```file``` - the HDFS file 94 | /// 95 | /// #### Return 96 | /// Return 1 if the file is open for read; 0 otherwise 97 | pub fn hdfsFileIsOpenForRead(fs: *const hdfsFile) -> c_int; 98 | 99 | /// Determine if a file is open for write. 100 | /// 101 | /// #### Params 102 | /// * ```file``` - the HDFS file 103 | /// 104 | /// #### Return 105 | /// Return 1 if the file is open for write; 0 otherwise. 106 | pub fn hdfsFileIsOpenForWrite(file: *const hdfsFile) -> c_int; 107 | 108 | /// Get read statistics about a file. This is only applicable to files 109 | /// opened for reading. 110 | /// 111 | /// #### Params 112 | /// * ```file``` - The HDFS file 113 | /// * ```stats``` - (out parameter) on a successful return, the read statistics. 114 | /// Unchanged otherwise. You must free the returned statistics with 115 | /// hdfsFileFreeReadStatistics. 116 | /// 117 | /// #### Return 118 | /// * 0 if the statistics were successfully returned, 119 | /// * -1 otherwise. On a failure, please check errno against 120 | /// * ENOTSUP. webhdfs, LocalFilesystem, and so forth may 121 | /// not support read statistics. 122 | pub fn hdfsFileGetReadStatistics( 123 | file: *const hdfsFile, 124 | stats: &mut *mut hdfsReadStatistics, 125 | ) -> c_int; 126 | 127 | /// HDFS read statistics for a file, 128 | /// 129 | /// #### Params 130 | /// * ```stats``` - HDFS read statistics for a file. 131 | /// 132 | /// #### Return 133 | /// Return the number of remote bytes read. 134 | pub fn hdfsReadStatisticsGetRemoteBytesRead(stats: *const hdfsReadStatistics) -> i64; 135 | 136 | /// Free some HDFS read statistics. 137 | /// 138 | /// #### Params 139 | /// * ```stats``` - The HDFS read statistics to free. 140 | pub fn hdfsFileFreeReadStatistics(stats: *mut hdfsReadStatistics); 141 | 142 | /// Connect to a hdfs file system as a specific user. 143 | /// 144 | /// #### Params 145 | /// * ```nn``` - The NameNode. See hdfsBuilderSetNameNode for details. 146 | /// * ```port``` - The port on which the server is listening. 147 | /// * ```param``` - user the user name (this is hadoop domain user). 148 | /// Or ```NULL``` is equivelant to hhdfsConnect(host, port) 149 | /// 150 | /// #### Return 151 | /// Returns a handle to the filesystem or ```NULL``` on error. 152 | pub fn hdfsConnectAsUser(host: *const c_char, u16: u16, user: *const c_char) -> *const hdfsFS; 153 | 154 | /// Connect to a hdfs file system. 155 | /// 156 | /// This API is deprecated. Use hdfsBuilderConnect instead. 157 | /// 158 | /// #### Params 159 | /// * ```nn``` - The NameNode. See hdfsBuilderSetNameNode for details. 160 | /// * ```port``` - The port on which the server is listening. 161 | /// 162 | /// #### Return 163 | /// Returns a handle to the filesystem or ```NULL``` on error. 164 | pub fn hdfsConnect(host: *const c_char, u16: tPort) -> *const hdfsFS; 165 | 166 | /// Connect to an hdfs file system. 167 | /// 168 | /// Forces a new instance to be created. This API is deprecated. 169 | /// Use hdfsBuilderConnect instead. 170 | /// 171 | /// #### Params 172 | /// * ```nn``` - The NameNode. See hdfsBuilderSetNameNode for details. 173 | /// * ```port``` - The port on which the server is listening. 174 | /// * ```user``` - The user name to use when connecting 175 | /// 176 | /// #### Return 177 | /// Returns a handle to the filesystem or ```NULL``` on error. 178 | pub fn hdfsConnectAsUserNewInstance( 179 | host: *const c_char, 180 | u16: tPort, 181 | user: *const c_char, 182 | ) -> *const hdfsFS; 183 | 184 | /// Connect to an hdfs file system. 185 | /// 186 | /// Forces a new instance to be created. This API is deprecated. 187 | /// Use hdfsBuilderConnect instead. 188 | /// 189 | /// #### Params 190 | /// * ```nn``` - The NameNode. See hdfsBuilderSetNameNode for details. 191 | /// * ```port``` - The port on which the server is listening. 192 | /// 193 | /// #### Return 194 | /// Returns a handle to the filesystem or ```NULL``` on error. 195 | pub fn hdfsConnectNewInstance(host: *const c_char, u16: tPort) -> *const hdfsFS; 196 | 197 | /// Connect to HDFS using the parameters defined by the builder. 198 | /// 199 | /// The HDFS builder will be freed, whether or not the connection was successful. 200 | /// 201 | /// Every successful call to hdfsBuilderConnect should be matched with a call 202 | /// to hdfsDisconnect, when the hdfsFS is no longer needed. 203 | /// 204 | /// #### Params 205 | /// * ```bld``` - The HDFS builder 206 | /// 207 | /// #### Return 208 | /// Returns a handle to the filesystem, or ```NULL``` on error. 209 | pub fn hdfsBuilderConnect(bld: *mut hdfsBuilder) -> *const hdfsFS; 210 | 211 | /// Create an HDFS builder. 212 | /// 213 | /// #### Return 214 | /// The HDFS builder, or ```NULL``` on error. 215 | pub fn hdfsNewBuilder() -> *mut hdfsBuilder; 216 | 217 | /// Force the builder to always create a new instance of the FileSystem, 218 | /// rather than possibly finding one in the cache. 219 | /// 220 | /// #### Params 221 | /// * ```bld``` - The HDFS builder 222 | pub fn hdfsBuilderSetForceNewInstance(bld: *mut hdfsBuilder); 223 | 224 | /// Set the HDFS NameNode to connect to. 225 | /// 226 | /// #### Params 227 | /// * bld - The HDFS builder 228 | /// * nn - The NameNode to use. If the string given is 'default', the default NameNode 229 | /// configuration will be used (from the XML configuration files). 230 | /// If ```NULL``` is given, a LocalFileSystem will be created. 231 | /// If the string starts with a protocol type such as ```file://``` or 232 | /// ```hdfs://```, this protocol type will be used. If not, the 233 | /// ```hdfs://``` protocol type will be used. 234 | /// You may specify a NameNode port in the usual way by 235 | /// passing a string of the format ```hdfs://:```. 236 | /// Alternately, you may set the port with hdfsBuilderSetNameNodePort. 237 | /// However, you must not pass the port in two different ways. 238 | pub fn hdfsBuilderSetNameNode(bld: *mut hdfsBuilder, host: *const c_char); 239 | 240 | /// Set the port of the HDFS NameNode to connect to. 241 | /// 242 | /// #### Params 243 | /// * bld - The HDFS builder 244 | /// * port - The port. 245 | pub fn hdfsBuilderSetNameNodePort(bld: *mut hdfsBuilder, port: u16); 246 | 247 | /// Set the username to use when connecting to the HDFS cluster. 248 | /// 249 | /// #### Params 250 | /// * bld - The HDFS builder 251 | /// * userName - The user name. The string will be shallow-copied. 252 | pub fn hdfsBuilderSetUserName(bld: *mut hdfsBuilder, userName: *const c_char); 253 | 254 | /// Set the path to the Kerberos ticket cache to use when connecting to 255 | /// the HDFS cluster. 256 | /// 257 | /// #### Params 258 | /// * ```bld``` - The HDFS builder 259 | /// * ```kerbTicketCachePath``` - The Kerberos ticket cache path. The string 260 | /// will be shallow-copied. 261 | pub fn hdfsBuilderSetKerbTicketCachePath( 262 | bld: *mut hdfsBuilder, 263 | kerbTicketCachePath: *const c_char, 264 | ); 265 | 266 | /// Free an HDFS builder. 267 | /// 268 | /// It is normally not necessary to call this function since 269 | /// hdfsBuilderConnect frees the builder. 270 | /// 271 | /// #### Params 272 | /// * ```bld``` - The HDFS builder 273 | pub fn hdfsFreeBuilder(bld: *mut hdfsBuilder); 274 | 275 | /// Set a configuration string for an HdfsBuilder. 276 | /// 277 | /// #### Params 278 | /// * ```key``` - The key to set. 279 | /// * ```val``` - The value, or ```NULL``` to set no value. 280 | /// This will be shallow-copied. You are responsible for 281 | /// ensuring that it remains valid until the builder is freed. 282 | /// 283 | /// #### Return 284 | /// 0 on success; nonzero error code otherwise. 285 | pub fn hdfsBuilderConfSetStr( 286 | bld: *mut hdfsBuilder, 287 | key: *const c_char, 288 | value: *const c_char, 289 | ) -> c_int; 290 | 291 | /// Get a configuration string. 292 | /// 293 | /// #### Params 294 | /// * ```key``` - The key to find 295 | /// * ```val``` - (out param) The value. This will be set to NULL if the 296 | /// key isn't found. You must free this string with 297 | /// ```hdfsConfStrFree```. 298 | /// 299 | /// #### Return 300 | /// 0 on success; nonzero error code otherwise. 301 | /// Failure to find the key is not an error. 302 | pub fn hdfsConfGetStr(value: *const c_char, val: *mut *mut c_char) -> c_int; 303 | 304 | /// Get a configuration integer. 305 | /// 306 | /// #### Params 307 | /// * ```key``` - The key to find 308 | /// * ```val``` - (out param) The value. This will NOT be changed if the 309 | /// key isn't found. 310 | /// 311 | /// #### Return 312 | /// 0 on success; nonzero error code otherwise. 313 | /// Failure to find the key is not an error. 314 | pub fn hdfsConfGetInt(key: *const c_char, val: *mut i32) -> c_int; 315 | 316 | /// Free a configuration string found with hdfsConfGetStr. 317 | /// 318 | /// #### Params 319 | /// * ```val``` - A configuration string obtained from hdfsConfGetStr 320 | pub fn hdfsConfStrFree(val: *const c_char); 321 | 322 | /// hdfsDisconnect - Disconnect from the hdfs file system. 323 | /// Disconnect from hdfs. 324 | /// 325 | /// #### Params 326 | /// * ```fs``` - The configured filesystem handle. 327 | /// 328 | /// #### Return 329 | /// Returns 0 on success, -1 on error. 330 | /// Even if there is an error, the resources associated with the 331 | /// hdfsFS will be freed. 332 | pub fn hdfsDisconnect(fs: *const hdfsFS) -> c_int; 333 | 334 | /// Open a hdfs file in given mode. 335 | /// 336 | /// #### Params 337 | /// * ```fs``` - The configured filesystem handle. 338 | /// * ```file``` - The file handle. 339 | /// * ```flags``` - an ```|``` of ```bits/fcntl.h``` file flags - 340 | /// supported flags are O_RDONLY, O_WRONLY (meaning create or overwrite 341 | /// i.e., implies O_TRUNCAT), O_WRONLY|O_APPEND. Other flags are generally 342 | /// ignored other than (O_RDWR || (O_EXCL & O_CREAT)) which return ```NULL``` and 343 | /// set errno equal ENOTSUP. 344 | /// * ```bufferSize``` - Size of buffer for read/write - pass 0 if you want 345 | /// to use the default configured values. 346 | /// * ```replication``` Block replication - pass 0 if you want to use 347 | /// the default configured values. 348 | /// * ```blocksize``` - Size of block - pass 0 if you want to use the 349 | /// default configured values. 350 | /// 351 | /// #### Return 352 | /// Returns 0 on success, -1 on error. On error, errno will be set appropriately. 353 | /// If the hdfs file was valid, the memory associated with it will 354 | /// be freed at the end of this call, even if there was an I/O error. 355 | pub fn hdfsOpenFile( 356 | fs: *const hdfsFS, 357 | path: *const c_char, 358 | flags: c_int, 359 | bufferSize: c_int, 360 | replication: c_short, 361 | blocksize: i32, 362 | ) -> *const hdfsFile; 363 | 364 | /// Close an open file. 365 | /// 366 | /// #### Params 367 | /// * ```fs``` - The configured filesystem handle. 368 | /// * ```file``` - The file handle. 369 | /// 370 | /// #### Return 371 | /// Returns 0 on success, -1 on error. On error, errno will be set 372 | /// appropriately.If the hdfs file was valid, the memory associated 373 | /// with it will be freed at the end of this call, even if there was 374 | /// an I/O error. 375 | pub fn hdfsCloseFile(fs: *const hdfsFS, file: *const hdfsFile) -> c_int; 376 | 377 | /// Checks if a given path exsits on the filesystem 378 | /// 379 | /// #### Params 380 | /// * ```fs``` - The configured filesystem handle. 381 | /// * ```path``` - The path to look for 382 | /// 383 | /// #### Return 384 | /// Returns 0 on success, -1 on error. 385 | pub fn hdfsExists(fs: *const hdfsFS, path: *const c_char) -> c_int; 386 | 387 | /// Seek to given offset in file. 388 | /// 389 | /// This works only for files opened in read-only mode. 390 | /// 391 | /// #### Params 392 | /// ```fs``` The configured filesystem handle. 393 | /// ```file``` The file handle. 394 | /// ```desiredPos``` Offset into the file to seek into. 395 | /// 396 | /// #### Return 397 | /// @return Returns 0 on success, -1 on error. 398 | pub fn hdfsSeek(fs: *const hdfsFS, file: *const hdfsFile, desiredPos: tOffset) -> c_int; 399 | 400 | /// Get the current offset in the file, in bytes. 401 | /// 402 | /// #### Params 403 | /// 404 | /// ```fs``` - The configured filesystem handle. 405 | /// ```file``` - The file handle. 406 | /// 407 | /// #### Return 408 | /// Current offset, -1 on error. 409 | pub fn hdfsTell(fs: *const hdfsFS, file: *const hdfsFile) -> tOffset; 410 | 411 | /// Read data from an open file. 412 | /// 413 | /// #### Params 414 | /// * ```fs``` - The configured filesystem handle. 415 | /// * ```file``` - The file handle. 416 | /// * ```buffer``` - The buffer to copy read bytes into. 417 | /// * ```length``` - The length of the buffer. 418 | /// 419 | /// #### Return 420 | /// On success, a positive number indicating how many bytes were read. 421 | /// On end-of-file, 0. On error, -1. Errno will be set to the error code. 422 | /// Just like the POSIX read function, hdfsRead will return -1 423 | /// and set errno to EINTR if data is temporarily unavailable, 424 | /// but we are not yet at the end of the file. 425 | pub fn hdfsRead( 426 | fs: *const hdfsFS, 427 | file: *const hdfsFile, 428 | buffer: *mut c_void, 429 | length: tSize, 430 | ) -> tSize; 431 | 432 | /// Positional read of data from an open file. 433 | /// 434 | /// #### Params 435 | /// * ```fs``` - The configured filesystem handle. 436 | /// * ```file``` - The file handle. 437 | /// * ```position``` - Position from which to read 438 | /// * ```buffer``` - The buffer to copy read bytes into. 439 | /// * ```length``` - The length of the buffer. 440 | /// 441 | /// #### Return 442 | /// See hdfsRead 443 | pub fn hdfsPread( 444 | fs: *const hdfsFS, 445 | file: *const hdfsFile, 446 | position: tOffset, 447 | buffer: *mut c_void, 448 | length: tSize, 449 | ) -> tSize; 450 | 451 | /// Write data into an open file. 452 | /// 453 | /// #### Params 454 | /// * ```fs``` - The configured filesystem handle. 455 | /// * ```file``` - The file handle. 456 | /// * ```buffer``` - The data. 457 | /// * ```length``` - The no. of bytes to write. 458 | /// 459 | /// #### Return 460 | /// the number of bytes written, -1 on error. 461 | pub fn hdfsWrite( 462 | fs: *const hdfsFS, 463 | file: *const hdfsFile, 464 | buffer: *const c_void, 465 | length: tSize, 466 | ) -> tSize; 467 | 468 | /// Flush the data. 469 | /// 470 | /// #### Params 471 | /// * ```fs``` - The configured filesystem handle. 472 | /// * ```file``` - The file handle. 473 | /// 474 | /// #### Return 475 | /// Returns 0 on success, -1 on error. 476 | pub fn hdfsFlush(fs: *const hdfsFS, file: *const hdfsFile) -> c_int; 477 | 478 | /// Flush out the data in client's user buffer. After the return of this 479 | /// call, new readers will see the data. 480 | /// 481 | /// #### Params 482 | /// * ```fs``` - The configured filesystem handle. 483 | /// * ```file``` - The file handle. 484 | /// 485 | /// #### Return 486 | /// 0 on success, -1 on error and sets errno 487 | pub fn hdfsHFlush(fs: *const hdfsFS, file: *const hdfsFile) -> c_int; 488 | 489 | /// Similar to posix fsync, Flush out the data in client's 490 | /// user buffer. all the way to the disk device (but the disk may have 491 | /// it in its cache). 492 | /// 493 | /// #### Params 494 | /// * ```fs``` - The configured filesystem handle. 495 | /// * ```file``` - The file handle. 496 | /// 497 | /// #### Return 498 | /// 0 on success, -1 on error and sets errno 499 | pub fn hdfsHSync(fs: *const hdfsFS, file: *const hdfsFile) -> c_int; 500 | 501 | /// Number of bytes that can be read from this input stream without 502 | /// blocking. 503 | /// 504 | /// #### Params 505 | /// * ```fs``` - The configured filesystem handle. 506 | /// * ```file``` - The file handle. 507 | /// 508 | /// #### Return 509 | /// 0 on success, -1 on error and sets errno 510 | pub fn hdfsAvailable(fs: *const hdfsFS, file: *const hdfsFile) -> c_int; 511 | 512 | /// Copy file from one filesystem to another. 513 | /// 514 | /// #### Params 515 | /// * ```srcFS``` - The handle to source filesystem. 516 | /// * ```src``` - The path of source file. 517 | /// * ```dstFS``` - The handle to destination filesystem. 518 | /// * ```dst``` - The path of destination file. 519 | /// 520 | /// #### Return 521 | /// Returns 0 on success, -1 on error. 522 | pub fn hdfsCopy( 523 | srcFS: *const hdfsFS, 524 | src: *const c_char, 525 | dstFS: *const hdfsFS, 526 | dst: *const c_char, 527 | ) -> c_int; 528 | 529 | /// Move file from one filesystem to another. 530 | /// 531 | /// #### Params 532 | /// * ```srcFS``` - The handle to source filesystem. 533 | /// * ```src``` - The path of source file. 534 | /// * ```dstFS``` - The handle to destination filesystem. 535 | /// * ```dst``` - The path of destination file. 536 | /// 537 | /// #### Return 538 | /// Returns 0 on success, -1 on error. 539 | pub fn hdfsMove( 540 | srcFS: *const hdfsFS, 541 | src: *const c_char, 542 | dstFS: *const hdfsFS, 543 | dst: *const c_char, 544 | ) -> c_int; 545 | 546 | /// Delete file. 547 | /// 548 | /// #### Params 549 | /// * ```fs``` - The configured filesystem handle. 550 | /// * ```path``` - The path of the file. 551 | /// * ```recursive``` - if path is a directory and set to 552 | /// non-zero, the directory is deleted else throws an exception. In 553 | /// case of a file the recursive argument is irrelevant. 554 | /// 555 | /// #### Return 556 | /// Returns 0 on success, -1 on error. 557 | pub fn hdfsDelete(fs: *const hdfsFS, path: *const c_char, recursive: c_int) -> c_int; 558 | 559 | /// Rename file. 560 | /// 561 | /// #### Params 562 | /// * ```fs``` - The configured filesystem handle. 563 | /// * ```oldPath``` - The path of the source file. 564 | /// * ```newPath``` - The path of the destination file. 565 | /// 566 | /// #### Return 567 | /// Returns 0 on success, -1 on error. 568 | pub fn hdfsRename(fs: *const hdfsFS, oldPath: *const c_char, newPath: *const c_char) -> c_int; 569 | 570 | /// Get the current working directory for the given filesystem. 571 | /// 572 | /// #### Params 573 | /// * ```fs``` - The configured filesystem handle. 574 | /// * ```buffer``` - The user-buffer to copy path of cwd into. 575 | /// * ```bufferSize``` - The length of user-buffer. 576 | /// 577 | /// #### Return 578 | /// Returns buffer, ```NULL``` on error. 579 | pub fn hdfsGetWorkingDirectory( 580 | fs: *const hdfsFS, 581 | buffer: *mut c_char, 582 | bufferSize: size_t, 583 | ) -> *mut c_char; 584 | 585 | /// Set the working directory. All relative paths will be resolved relative 586 | /// to it. 587 | /// 588 | /// #### Params 589 | /// * ```fs``` - The configured filesystem handle. 590 | /// * ```path``` - The path of the new 'cwd'. 591 | /// 592 | /// #### Return 593 | /// Returns 0 on success, -1 on error. 594 | pub fn hdfsSetWorkingDirectory(fs: *const hdfsFS, path: *const c_char) -> c_int; 595 | 596 | /// Make the given file and all non-existent parents into directories. 597 | /// 598 | /// #### Params 599 | /// * ```fs``` - The configured filesystem handle. 600 | /// * ```path``` - The path of the directory. 601 | /// 602 | /// #### Return 603 | /// Returns 0 on success, -1 on error. 604 | pub fn hdfsCreateDirectory(fs: *const hdfsFS, path: *const c_char) -> c_int; 605 | 606 | /// Set the replication of the specified file to the supplied value 607 | /// 608 | /// #### Params 609 | /// * ```fs``` The configured filesystem handle. 610 | /// * ```path``` The path of the directory. 611 | /// 612 | /// #### Return 613 | /// Returns 0 on success, -1 on error. 614 | pub fn hdfsSetReplication(fs: *const hdfsFS, path: *const c_char, replication: i16) -> c_int; 615 | 616 | /// Get list of files/directories for a given directory-path. 617 | /// hdfsFreeFileInfo should be called to deallocate memory. 618 | /// 619 | /// #### Params 620 | /// * ```fs``` - The configured filesystem handle. 621 | /// * ```path``` - The path of the directory. 622 | /// * ```numEntries``` - Set to the number of files/directories in path. 623 | /// 624 | /// #### Return 625 | /// Returns a dynamically-allocated array of hdfsFileInfo objects; ```NULL``` on 626 | /// error. 627 | pub fn hdfsListDirectory( 628 | fs: *const hdfsFS, 629 | path: *const c_char, 630 | numEntries: *mut c_int, 631 | ) -> *const hdfsFileInfo; 632 | 633 | /// Get information about a path as a (dynamically allocated) single 634 | /// hdfsFileInfo struct. hdfsFreeFileInfo should be called when the 635 | /// pointer is no longer needed. 636 | /// 637 | /// #### Params 638 | /// * ```fs``` - The configured filesystem handle. 639 | /// * ```path``` The path of the file. 640 | /// 641 | /// #### Params 642 | /// Returns a dynamically-allocated hdfsFileInfo object; ```NULL``` on error. 643 | pub fn hdfsGetPathInfo(fs: *const hdfsFS, path: *const c_char) -> *const hdfsFileInfo; 644 | 645 | /// Free up the hdfsFileInfo array (including fields) 646 | /// 647 | /// #### Params 648 | /// * ```hdfsFileInfo``` The array of dynamically-allocated hdfsFileInfo objects. 649 | /// * ```numEntries``` The size of the array. 650 | pub fn hdfsFreeFileInfo(hdfsFileInfo: *const hdfsFileInfo, numEntries: c_int); 651 | 652 | /// hdfsFileIsEncrypted: determine if a file is encrypted based on its 653 | /// hdfsFileInfo. 654 | /// 655 | /// #### Return 656 | /// -1 if there was an error (errno will be set), 0 if the file is 657 | /// not encrypted, 1 if the file is encrypted. 658 | pub fn hdfsFileIsEncrypted(hdfsFileInfo: *const hdfsFileInfo) -> c_int; 659 | 660 | /// Get hostnames where a particular block (determined by pos & blocksize) 661 | /// of a file is stored. The last element in the array is ```NULL```. 662 | /// Due to replication, a single block could be present on multiple hosts. 663 | /// 664 | /// #### Params 665 | /// * ```fs``` The configured filesystem handle. 666 | /// * ```path``` - The path of the file. 667 | /// * ```start``` - The start of the block. 668 | /// * ```length``` - The length of the block. 669 | /// 670 | /// #### Return 671 | /// Returns a dynamically-allocated 2-d array of blocks-hosts; ```NULL``` 672 | /// on error. 673 | pub fn hdfsGetHosts( 674 | fs: *const hdfsFS, 675 | path: *const c_char, 676 | start: tOffset, 677 | length: tOffset, 678 | ) -> *const *const *const c_char; 679 | 680 | /// Free up the structure returned by hdfsGetHosts 681 | /// 682 | /// #### Params 683 | /// * ```hdfsFileInfo``` - The array of dynamically-allocated 684 | /// hdfsFileInfo objects. 685 | /// * ```numEntries``` - The size of the array. 686 | pub fn hdfsFreeHosts(blockHosts: *const *const *const c_char); 687 | 688 | /// Get the default blocksize. 689 | /// 690 | /// This API is deprecated. Use hdfsGetDefaultBlockSizeAtPath instead. 691 | /// 692 | /// #### Params 693 | /// * ```fs``` - The configured filesystem handle. 694 | /// 695 | /// #### Return 696 | /// Returns the default blocksize, or -1 on error. 697 | pub fn hdfsGetDefaultBlockSize(fs: *const hdfsFS) -> tOffset; 698 | 699 | /// Get the default blocksize at the filesystem indicated by a given path. 700 | /// 701 | /// #### Params 702 | /// * ```fs``` - The configured filesystem handle. 703 | /// * ```path``` - The given path will be used to locate the actual 704 | /// filesystem. The full path does not have to exist. 705 | /// 706 | /// #### Return 707 | /// Returns the default blocksize, or -1 on error. 708 | pub fn hdfsGetDefaultBlockSizeAtPath(fs: *const hdfsFS, path: *const c_char) -> tOffset; 709 | 710 | /// Return the raw capacity of the filesystem. 711 | /// 712 | /// #### Params 713 | /// * ```fs``` - The configured filesystem handle. 714 | /// 715 | /// #### Return 716 | /// Returns the raw-capacity; -1 on error. 717 | pub fn hdfsGetCapacity(fs: *const hdfsFS) -> tOffset; 718 | 719 | /// Return the total raw size of all files in the filesystem. 720 | /// 721 | /// #### Params 722 | /// * ```fs``` - The configured filesystem handle. 723 | /// #### Return 724 | /// Returns the total-size; -1 on error. 725 | pub fn hdfsGetUsed(fs: *const hdfsFS) -> tOffset; 726 | 727 | /// Change the user and/or group of a file or directory. 728 | /// 729 | /// #### Params 730 | /// * ```fs``` - The configured filesystem handle. 731 | /// * ```path``` - the path to the file or directory 732 | /// * ```owner``` - User string. Set to ```NULL``` for 'no change' 733 | /// * ```group``` - Group string. Set to ```NULL``` for 'no change' 734 | /// 735 | /// #### Return 736 | /// 0 on success else -1 737 | pub fn hdfsChown( 738 | fs: *const hdfsFS, 739 | path: *const c_char, 740 | owner: *const c_char, 741 | group: *const c_char, 742 | ) -> c_int; 743 | 744 | /// hdfsChmod 745 | /// 746 | /// #### Params 747 | /// * ```fs``` - The configured filesystem handle. 748 | /// * ```path``` - the path to the file or directory 749 | /// 750 | /// #### Return 751 | /// 0 on success else -1 752 | pub fn hdfsChmod(fs: *const hdfsFS, path: *const c_char, mode: c_short) -> c_int; 753 | 754 | /// hdfsUtime 755 | /// 756 | /// #### Params 757 | /// * ```fs``` - The configured filesystem handle. 758 | /// * ```path``` - the path to the file or directory 759 | /// * ```mtime``` - new modification time or -1 for no change 760 | /// * ```atime``` - new access time or -1 for no change 761 | /// 762 | /// #### Return 763 | /// 0 on success else -1 764 | pub fn hdfsUtime(fs: *const hdfsFS, path: *const c_char, mtime: tTime, atime: tTime) -> c_int; 765 | 766 | /// Allocate a zero-copy options structure. 767 | /// 768 | /// You must free all options structures allocated with this function using 769 | /// hadoopRzOptionsFree. 770 | /// 771 | /// #### Return 772 | /// A zero-copy options structure, or ```NULL``` if one could not be allocated. 773 | /// If ```NULL``` is returned, errno will contain the error number. 774 | pub fn hadoopRzOptionsAlloc() -> *const hadoopRzOptions; 775 | 776 | /// Determine whether we should skip checksums in read0. 777 | /// 778 | /// #### Params 779 | /// * ```opts``` - The options structure. 780 | /// * ```skip``` - Nonzero to skip checksums sometimes; zero to always 781 | /// check them. 782 | /// 783 | /// #### Return 784 | /// 0 on success; -1 plus errno on failure. 785 | pub fn hadoopRzOptionsSetSkipChecksum(opts: *const hadoopRzOptions, skip: c_int) -> c_int; 786 | 787 | /// Set the ByteBufferPool to use with read0. 788 | /// 789 | /// #### Params 790 | /// * ```opts``` - The options structure. 791 | /// * ```className``` - If this is ```NULL```, we will not use any 792 | /// ByteBufferPool. If this is non-NULL, it will be 793 | /// treated as the name of the pool class to use. 794 | /// For example, you can use ELASTIC_BYTE_BUFFER_POOL_CLASS. 795 | /// 796 | /// #### Return 797 | /// 0 if the ByteBufferPool class was found and instantiated; 798 | /// -1 plus errno otherwise. 799 | pub fn hadoopRzOptionsSetByteBufferPool( 800 | opts: *const hadoopRzOptions, 801 | className: *const c_char, 802 | ) -> c_int; 803 | 804 | /// Free a hadoopRzOptionsFree structure. 805 | /// 806 | /// #### Params 807 | /// * ```opts``` - The options structure to free. 808 | /// Any associated ByteBufferPool will also be freed. 809 | pub fn hadoopRzOptionsFree(opts: *const hadoopRzOptions); 810 | 811 | /// Perform a byte buffer read. If possible, this will be a zero-copy 812 | /// (mmap) read. 813 | /// 814 | /// #### Params 815 | /// * ```file``` - The file to read from. 816 | /// * ```opts``` - An options structure created by hadoopRzOptionsAlloc. 817 | /// * ```maxLength``` - The maximum length to read. We may read fewer bytes 818 | /// than this length. 819 | /// 820 | /// #### Return 821 | /// On success, we will return a new hadoopRzBuffer. This buffer will 822 | /// continue to be valid and readable until it is released by 823 | /// readZeroBufferFree. Failure to release a buffer will lead to a memory 824 | /// leak. You can access the data within the hadoopRzBuffer with 825 | /// hadoopRzBufferGet. If you have reached EOF, the data within the 826 | /// hadoopRzBuffer will be ```NULL```. You must still free hadoopRzBuffer 827 | /// instances containing ```NULL```. 828 | /// 829 | /// On failure, we will return ```NULL``` plus an errno code. 830 | /// ```errno = EOPNOTSUPP``` indicates that we could not do a zero-copy 831 | /// read, and there was no ByteBufferPool supplied. 832 | pub fn hadoopReadZero( 833 | file: *const hdfsFile, 834 | opts: *const hadoopRzOptions, 835 | maxLength: i32, 836 | ) -> *const hadoopRzBuffer; 837 | 838 | /// Determine the length of the buffer returned from readZero. 839 | /// 840 | /// #### Params 841 | /// * ```buffer``` - a buffer returned from readZero. 842 | /// 843 | /// #### Return 844 | /// the length of the buffer. 845 | pub fn hadoopRzBufferLength(buffer: *const hadoopRzBuffer) -> i32; 846 | 847 | /// Get a pointer to the raw buffer returned from readZero. 848 | /// 849 | /// #### Params 850 | /// * ```buffer``` - a buffer returned from readZero. 851 | /// 852 | /// #### Return 853 | /// a pointer to the start of the buffer. This will be ```NULL``` when 854 | /// end-of-file has been reached. 855 | pub fn hadoopRzBufferGet(buffer: *const hadoopRzBuffer) -> *const c_void; 856 | 857 | /// Release a buffer obtained through readZero. 858 | /// 859 | /// #### Params 860 | /// * ```file``` - The hdfs stream that created this buffer. This must be 861 | /// the same stream you called hadoopReadZero on. 862 | /// 863 | /// #### Return 864 | /// The buffer to release. 865 | pub fn hadoopRzBufferFree(file: *const hdfsFile, buffer: *const hadoopRzBuffer); 866 | } 867 | -------------------------------------------------------------------------------- /src/util.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | use std::str; 19 | 20 | use crate::dfs::HdfsFs; 21 | use crate::err::HdfsErr; 22 | use crate::raw::*; 23 | 24 | #[macro_export] 25 | macro_rules! to_raw { 26 | ($str:expr) => {{ 27 | let c_str = std::ffi::CString::new($str).unwrap(); 28 | c_str.into_raw() 29 | }}; 30 | } 31 | 32 | #[macro_export] 33 | macro_rules! from_raw { 34 | ($chars:expr) => {{ 35 | let slice = unsafe { std::ffi::CStr::from_ptr($chars) }.to_bytes(); 36 | std::str::from_utf8(slice).unwrap() 37 | }}; 38 | } 39 | 40 | // pub fn chars_to_str<'a>(chars: *const c_char) -> &'a str { 41 | // let slice = unsafe { CStr::from_ptr(chars) }.to_bytes(); 42 | // str::from_utf8(slice).unwrap() 43 | // } 44 | 45 | #[macro_export] 46 | macro_rules! b2i { 47 | ($b:expr) => {{ 48 | if $b { 49 | 1 50 | } else { 51 | 0 52 | } 53 | }}; 54 | } 55 | 56 | /// Hdfs Utility 57 | pub struct HdfsUtil; 58 | 59 | /// HDFS Utility 60 | impl HdfsUtil { 61 | /// Copy file from one filesystem to another. 62 | /// 63 | /// #### Params 64 | /// * ```srcFS``` - The handle to source filesystem. 65 | /// * ```src``` - The path of source file. 66 | /// * ```dstFS``` - The handle to destination filesystem. 67 | /// * ```dst``` - The path of destination file. 68 | pub fn copy(src_fs: &HdfsFs, src: &str, dst_fs: &HdfsFs, dst: &str) -> Result { 69 | let res = unsafe { hdfsCopy(src_fs.raw(), to_raw!(src), dst_fs.raw(), to_raw!(dst)) }; 70 | 71 | if res == 0 { 72 | Ok(true) 73 | } else { 74 | Err(HdfsErr::Unknown) 75 | } 76 | } 77 | 78 | /// Move file from one filesystem to another. 79 | /// 80 | /// #### Params 81 | /// * ```srcFS``` - The handle to source filesystem. 82 | /// * ```src``` - The path of source file. 83 | /// * ```dstFS``` - The handle to destination filesystem. 84 | /// * ```dst``` - The path of destination file. 85 | pub fn mv(src_fs: &HdfsFs, src: &str, dst_fs: &HdfsFs, dst: &str) -> Result { 86 | let res = unsafe { hdfsMove(src_fs.raw(), to_raw!(src), dst_fs.raw(), to_raw!(dst)) }; 87 | 88 | if res == 0 { 89 | Ok(true) 90 | } else { 91 | Err(HdfsErr::Unknown) 92 | } 93 | } 94 | } 95 | --------------------------------------------------------------------------------