├── .gitignore
├── Cargo.toml
├── LICENSE.txt
├── README.md
├── dev
├── _test.sh
├── build-set-env.sh
└── docker
│ ├── hdfs-native.dockerfile
│ └── libhdfs3.dockerfile
├── header
├── libhdfs3-hdfs-client.xml
└── src
├── dfs.rs
├── err.rs
├── hdfs_store.rs
├── lib.rs
├── raw.rs
└── util.rs
/.gitignore:
--------------------------------------------------------------------------------
1 | target
2 | Cargo.lock
3 | .idea
4 | .DS_Store
5 | .docker
6 | .vscode
7 |
--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | [package]
19 | name = "hdfs-native"
20 | version = "0.1.0"
21 | edition = "2018"
22 |
23 | [lib]
24 | name = "hdfs_native"
25 | path = "src/lib.rs"
26 |
27 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
28 |
29 | [dependencies]
30 | libc = "0.2"
31 | log = "0.4"
32 | url = "2"
33 | thiserror = "1"
34 |
35 | async-trait = "0.1.41"
36 | chrono = "0.4"
37 | datafusion = { git = "https://github.com/apache/arrow-datafusion.git", branch = "master" }
38 | futures = "0.3"
39 | tokio = { version = "1.0", features = ["macros", "rt", "rt-multi-thread", "sync"] }
40 | tokio-stream = "0.1"
41 |
--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
1 |
2 | Apache License
3 | Version 2.0, January 2004
4 | http://www.apache.org/licenses/
5 |
6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
7 |
8 | 1. Definitions.
9 |
10 | "License" shall mean the terms and conditions for use, reproduction,
11 | and distribution as defined by Sections 1 through 9 of this document.
12 |
13 | "Licensor" shall mean the copyright owner or entity authorized by
14 | the copyright owner that is granting the License.
15 |
16 | "Legal Entity" shall mean the union of the acting entity and all
17 | other entities that control, are controlled by, or are under common
18 | control with that entity. For the purposes of this definition,
19 | "control" means (i) the power, direct or indirect, to cause the
20 | direction or management of such entity, whether by contract or
21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
22 | outstanding shares, or (iii) beneficial ownership of such entity.
23 |
24 | "You" (or "Your") shall mean an individual or Legal Entity
25 | exercising permissions granted by this License.
26 |
27 | "Source" form shall mean the preferred form for making modifications,
28 | including but not limited to software source code, documentation
29 | source, and configuration files.
30 |
31 | "Object" form shall mean any form resulting from mechanical
32 | transformation or translation of a Source form, including but
33 | not limited to compiled object code, generated documentation,
34 | and conversions to other media types.
35 |
36 | "Work" shall mean the work of authorship, whether in Source or
37 | Object form, made available under the License, as indicated by a
38 | copyright notice that is included in or attached to the work
39 | (an example is provided in the Appendix below).
40 |
41 | "Derivative Works" shall mean any work, whether in Source or Object
42 | form, that is based on (or derived from) the Work and for which the
43 | editorial revisions, annotations, elaborations, or other modifications
44 | represent, as a whole, an original work of authorship. For the purposes
45 | of this License, Derivative Works shall not include works that remain
46 | separable from, or merely link (or bind by name) to the interfaces of,
47 | the Work and Derivative Works thereof.
48 |
49 | "Contribution" shall mean any work of authorship, including
50 | the original version of the Work and any modifications or additions
51 | to that Work or Derivative Works thereof, that is intentionally
52 | submitted to Licensor for inclusion in the Work by the copyright owner
53 | or by an individual or Legal Entity authorized to submit on behalf of
54 | the copyright owner. For the purposes of this definition, "submitted"
55 | means any form of electronic, verbal, or written communication sent
56 | to the Licensor or its representatives, including but not limited to
57 | communication on electronic mailing lists, source code control systems,
58 | and issue tracking systems that are managed by, or on behalf of, the
59 | Licensor for the purpose of discussing and improving the Work, but
60 | excluding communication that is conspicuously marked or otherwise
61 | designated in writing by the copyright owner as "Not a Contribution."
62 |
63 | "Contributor" shall mean Licensor and any individual or Legal Entity
64 | on behalf of whom a Contribution has been received by Licensor and
65 | subsequently incorporated within the Work.
66 |
67 | 2. Grant of Copyright License. Subject to the terms and conditions of
68 | this License, each Contributor hereby grants to You a perpetual,
69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
70 | copyright license to reproduce, prepare Derivative Works of,
71 | publicly display, publicly perform, sublicense, and distribute the
72 | Work and such Derivative Works in Source or Object form.
73 |
74 | 3. Grant of Patent License. Subject to the terms and conditions of
75 | this License, each Contributor hereby grants to You a perpetual,
76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
77 | (except as stated in this section) patent license to make, have made,
78 | use, offer to sell, sell, import, and otherwise transfer the Work,
79 | where such license applies only to those patent claims licensable
80 | by such Contributor that are necessarily infringed by their
81 | Contribution(s) alone or by combination of their Contribution(s)
82 | with the Work to which such Contribution(s) was submitted. If You
83 | institute patent litigation against any entity (including a
84 | cross-claim or counterclaim in a lawsuit) alleging that the Work
85 | or a Contribution incorporated within the Work constitutes direct
86 | or contributory patent infringement, then any patent licenses
87 | granted to You under this License for that Work shall terminate
88 | as of the date such litigation is filed.
89 |
90 | 4. Redistribution. You may reproduce and distribute copies of the
91 | Work or Derivative Works thereof in any medium, with or without
92 | modifications, and in Source or Object form, provided that You
93 | meet the following conditions:
94 |
95 | (a) You must give any other recipients of the Work or
96 | Derivative Works a copy of this License; and
97 |
98 | (b) You must cause any modified files to carry prominent notices
99 | stating that You changed the files; and
100 |
101 | (c) You must retain, in the Source form of any Derivative Works
102 | that You distribute, all copyright, patent, trademark, and
103 | attribution notices from the Source form of the Work,
104 | excluding those notices that do not pertain to any part of
105 | the Derivative Works; and
106 |
107 | (d) If the Work includes a "NOTICE" text file as part of its
108 | distribution, then any Derivative Works that You distribute must
109 | include a readable copy of the attribution notices contained
110 | within such NOTICE file, excluding those notices that do not
111 | pertain to any part of the Derivative Works, in at least one
112 | of the following places: within a NOTICE text file distributed
113 | as part of the Derivative Works; within the Source form or
114 | documentation, if provided along with the Derivative Works; or,
115 | within a display generated by the Derivative Works, if and
116 | wherever such third-party notices normally appear. The contents
117 | of the NOTICE file are for informational purposes only and
118 | do not modify the License. You may add Your own attribution
119 | notices within Derivative Works that You distribute, alongside
120 | or as an addendum to the NOTICE text from the Work, provided
121 | that such additional attribution notices cannot be construed
122 | as modifying the License.
123 |
124 | You may add Your own copyright statement to Your modifications and
125 | may provide additional or different license terms and conditions
126 | for use, reproduction, or distribution of Your modifications, or
127 | for any such Derivative Works as a whole, provided Your use,
128 | reproduction, and distribution of the Work otherwise complies with
129 | the conditions stated in this License.
130 |
131 | 5. Submission of Contributions. Unless You explicitly state otherwise,
132 | any Contribution intentionally submitted for inclusion in the Work
133 | by You to the Licensor shall be under the terms and conditions of
134 | this License, without any additional terms or conditions.
135 | Notwithstanding the above, nothing herein shall supersede or modify
136 | the terms of any separate license agreement you may have executed
137 | with Licensor regarding such Contributions.
138 |
139 | 6. Trademarks. This License does not grant permission to use the trade
140 | names, trademarks, service marks, or product names of the Licensor,
141 | except as required for reasonable and customary use in describing the
142 | origin of the Work and reproducing the content of the NOTICE file.
143 |
144 | 7. Disclaimer of Warranty. Unless required by applicable law or
145 | agreed to in writing, Licensor provides the Work (and each
146 | Contributor provides its Contributions) on an "AS IS" BASIS,
147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148 | implied, including, without limitation, any warranties or conditions
149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150 | PARTICULAR PURPOSE. You are solely responsible for determining the
151 | appropriateness of using or redistributing the Work and assume any
152 | risks associated with Your exercise of permissions under this License.
153 |
154 | 8. Limitation of Liability. In no event and under no legal theory,
155 | whether in tort (including negligence), contract, or otherwise,
156 | unless required by applicable law (such as deliberate and grossly
157 | negligent acts) or agreed to in writing, shall any Contributor be
158 | liable to You for damages, including any direct, indirect, special,
159 | incidental, or consequential damages of any character arising as a
160 | result of this License or out of the use or inability to use the
161 | Work (including but not limited to damages for loss of goodwill,
162 | work stoppage, computer failure or malfunction, or any and all
163 | other commercial damages or losses), even if such Contributor
164 | has been advised of the possibility of such damages.
165 |
166 | 9. Accepting Warranty or Additional Liability. While redistributing
167 | the Work or Derivative Works thereof, You may choose to offer,
168 | and charge a fee for, acceptance of support, warranty, indemnity,
169 | or other liability obligations and/or rights consistent with this
170 | License. However, in accepting such obligations, You may act only
171 | on Your own behalf and on Your sole responsibility, not on behalf
172 | of any other Contributor, and only if You agree to indemnify,
173 | defend, and hold each Contributor harmless for any liability
174 | incurred by, or claims asserted against, such Contributor by reason
175 | of your accepting any such warranty or additional liability.
176 |
177 | END OF TERMS AND CONDITIONS
178 |
179 | APPENDIX: How to apply the Apache License to your work.
180 |
181 | To apply the Apache License to your work, attach the following
182 | boilerplate notice, with the fields enclosed by brackets "[]"
183 | replaced with your own identifying information. (Don't include
184 | the brackets!) The text should be enclosed in the appropriate
185 | comment syntax for the file format. We also recommend that a
186 | file or class name and description of purpose be included on the
187 | same "printed page" as the copyright notice for easier
188 | identification within third-party archives.
189 |
190 | Copyright [yyyy] [name of copyright owner]
191 |
192 | Licensed under the Apache License, Version 2.0 (the "License");
193 | you may not use this file except in compliance with the License.
194 | You may obtain a copy of the License at
195 |
196 | http://www.apache.org/licenses/LICENSE-2.0
197 |
198 | Unless required by applicable law or agreed to in writing, software
199 | distributed under the License is distributed on an "AS IS" BASIS,
200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201 | See the License for the specific language governing permissions and
202 | limitations under the License.
203 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
19 |
20 | # DataFusion-hdfs-native
21 |
22 | Connecting DataFusion to HDFS through Native HDFS client (libhdfs3).
23 |
24 | ## Setup libhdfs3
25 |
26 | 1. Install libhdfs3
27 |
28 | You can either install it via [Conda](https://docs.conda.io/en/latest/)
29 |
30 | ```shell
31 | conda install -c conda-forge libhdfs3
32 | ```
33 |
34 | or build it from source
35 |
36 | ```shell
37 |
38 | # A specific version that could be compiled on osx for HDFS of 2.6.x version
39 | git clone https://github.com/ClickHouse-Extras/libhdfs3.git
40 | cd libhdfs3
41 | git checkout 24b058c356794ef6cc2d31323dc9adf0386652ff
42 |
43 | # then build it
44 | mkdir build && cd build
45 | ../bootstrap --prefix=/usr/local
46 | make
47 | make install
48 | ```
49 |
50 |
51 | ## Configuration
52 |
53 | ```shell
54 | # client conf to use, env LIBHDFS3_CONF or hdfs-client.xml in working directory
55 | export LIBHDFS3_CONF=/path/to/libhdfs3-hdfs-client.xml
56 | ```
57 |
--------------------------------------------------------------------------------
/dev/_test.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Licensed to the Apache Software Foundation (ASF) under one
4 | # or more contributor license agreements. See the NOTICE file
5 | # distributed with this work for additional information
6 | # regarding copyright ownership. The ASF licenses this file
7 | # to you under the Apache License, Version 2.0 (the
8 | # "License"); you may not use this file except in compliance
9 | # with the License. You may obtain a copy of the License at
10 | #
11 | # http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing,
14 | # software distributed under the License is distributed on an
15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 | # KIND, either express or implied. See the License for the
17 | # specific language governing permissions and limitations
18 | # under the License.
19 |
20 | # TODO enable debug with docker
21 |
22 | set -e
23 |
24 | . ./dev/build-set-env.sh
25 |
26 | # Use --progress=plain for detailed, non scrolled docker output
27 |
28 | docker build -t hdfs-native:$HN_VERSION -f dev/docker/hdfs-native.dockerfile .
29 |
--------------------------------------------------------------------------------
/dev/build-set-env.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Licensed to the Apache Software Foundation (ASF) under one
4 | # or more contributor license agreements. See the NOTICE file
5 | # distributed with this work for additional information
6 | # regarding copyright ownership. The ASF licenses this file
7 | # to you under the Apache License, Version 2.0 (the
8 | # "License"); you may not use this file except in compliance
9 | # with the License. You may obtain a copy of the License at
10 | #
11 | # http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing,
14 | # software distributed under the License is distributed on an
15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 | # KIND, either express or implied. See the License for the
17 | # specific language governing permissions and limitations
18 | # under the License.
19 |
20 | export HN_VERSION=$(awk -F'[ ="]+' '$1 == "version" { print $2 }' Cargo.toml)
21 |
--------------------------------------------------------------------------------
/dev/docker/hdfs-native.dockerfile:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | # Turn .dockerignore to .dockerallow by excluding everything and explicitly
19 | # allowing specific files and directories. This enables us to quickly add
20 | # dependency files to the docker content without scanning the whole directory.
21 | # This setup requires to all of our docker containers have arrow's source
22 | # as a mounted directory.
23 |
24 | #ARG RELEASE_FLAG=--release
25 | FROM yijieshen/hdfs26:0.2.0 AS base
26 | WORKDIR /tmp/hdfs-jni
27 |
28 | FROM base as planner
29 | RUN mkdir /tmp/hdfs-jni/src
30 | ADD Cargo.toml .
31 | COPY src ./src/
32 | RUN cargo chef prepare --recipe-path recipe.json
33 |
34 | FROM base as cacher
35 | COPY --from=planner /tmp/hdfs-jni/recipe.json recipe.json
36 | RUN cargo chef cook $RELEASE_FLAG --recipe-path recipe.json
37 |
38 | FROM base as builder
39 | RUN mkdir /tmp/hdfs-jni/src
40 | ADD Cargo.toml .
41 | ADD build.rs .
42 | COPY src ./src/
43 | COPY --from=cacher /tmp/hdfs-jni/target target
44 |
45 | #ARG RELEASE_FLAG=--release
46 |
47 | ENV LD_LIBRARY_PATH /usr/local/hadoop/lib/native:/usr/lib/jvm/adoptopenjdk-8-hotspot-amd64/jre/lib/amd64/server
48 | ENV LIBRARY_PATH /usr/local/hadoop/lib/native:/usr/lib/jvm/adoptopenjdk-8-hotspot-amd64/jre/lib/amd64/server
49 |
50 | ENV RUST_LOG=info
51 | ENV RUST_BACKTRACE=full
52 |
53 | # force build.rs to run to generate configure_me code.
54 | ENV FORCE_REBUILD='true'
55 | RUN export CLASSPATH=$($HADOOP_HOME/bin/hadoop classpath --glob) && RUST_LOG=info cargo test -vv
56 |
--------------------------------------------------------------------------------
/dev/docker/libhdfs3.dockerfile:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | # TODO adapt https://github.com/wesm/arrow-io-test to install libhdfs3
19 |
--------------------------------------------------------------------------------
/header:
--------------------------------------------------------------------------------
1 | Licensed to the Apache Software Foundation (ASF) under one
2 | or more contributor license agreements. See the NOTICE file
3 | distributed with this work for additional information
4 | regarding copyright ownership. The ASF licenses this file
5 | to you under the Apache License, Version 2.0 (the
6 | "License"); you may not use this file except in compliance
7 | with the License. You may obtain a copy of the License at
8 |
9 | http://www.apache.org/licenses/LICENSE-2.0
10 |
11 | Unless required by applicable law or agreed to in writing, software
12 | distributed under the License is distributed on an "AS IS" BASIS,
13 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | See the License for the specific language governing permissions and
15 | limitations under the License.
16 |
17 |
--------------------------------------------------------------------------------
/libhdfs3-hdfs-client.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
53 |
54 |
55 |
56 | hadoop.security.authentication
57 | simple
58 |
59 | the RPC authentication method, valid values include "simple" or "kerberos". default is "simple"
60 |
61 |
62 |
63 |
64 | rpc.client.timeout
65 | 3600000
66 |
67 | timeout interval of a RPC invocation in millisecond. default is 3600000.
68 |
69 |
70 |
71 |
72 | rpc.client.connect.tcpnodelay
73 | true
74 |
75 | whether set socket TCP_NODELAY to true when connect to RPC server. default is true.
76 |
77 |
78 |
79 |
80 | rpc.client.max.idle
81 | 10000
82 |
83 | the max idle time of a RPC connection in millisecond. default is 10000.
84 |
85 |
86 |
87 |
88 | rpc.client.ping.interval
89 | 10000
90 |
91 | the interval which the RPC client send a heart beat to server. 0 means disable, default is 10000.
92 |
93 |
94 |
95 |
96 | rpc.client.connect.timeout
97 | 600000
98 |
99 | the timeout interval in millisecond when the RPC client is trying to setup the connection. default is 600000.
100 |
101 |
102 |
103 |
104 | rpc.client.connect.retry
105 | 10
106 |
107 | the max retry times if the RPC client fail to setup the connection to server. default is 10.
108 |
109 |
110 |
111 |
112 | rpc.client.read.timeout
113 | 3600000
114 |
115 | the timeout interval in millisecond when the RPC client is trying to read from server. default is 3600000.
116 |
117 |
118 |
119 |
120 | rpc.client.write.timeout
121 | 3600000
122 |
123 | the timeout interval in millisecond when the RPC client is trying to write to server. default is 3600000.
124 |
125 |
126 |
127 |
128 | rpc.client.socket.linger.timeout
129 | -1
130 |
131 | set value to socket SO_LINGER when connect to RPC server. -1 means default OS value. default is -1.
132 |
133 |
134 |
135 |
136 |
137 | dfs.client.read.shortcircuit
138 | false
139 |
140 | whether reading block file bypass datanode if the block and the client are
141 | on the same node. default is true.
142 |
143 |
144 |
145 |
146 | dfs.default.replica
147 | 1
148 |
149 | the default number of replica. default is 3.
150 |
151 |
152 |
153 |
154 | dfs.prefetchsize
155 | 10
156 |
157 | the default number of blocks which information will be prefetched. default is 10.
158 |
159 |
160 |
161 |
162 | dfs.client.failover.max.attempts
163 | 15
164 |
165 | if multiply namenodes are configured, it is the max retry times when the dfs client try to issue a RPC call. default is 15.
166 |
167 |
168 |
169 |
170 | dfs.default.blocksize
171 | 67108864
172 |
173 | default block size. default is 67108864.
174 |
175 |
176 |
177 |
178 | dfs.client.log.severity
179 | INFO
180 |
181 | the minimal log severity level, valid values include FATAL, ERROR, INFO, DEBUG1, DEBUG2, DEBUG3. default is INFO.
182 |
183 |
184 |
185 |
186 |
187 | input.connect.timeout
188 | 600000
189 |
190 | the timeout interval in millisecond when the input stream is trying to setup the connection to datanode. default is 600000.
191 |
192 |
193 |
194 |
195 | input.read.timeout
196 | 3600000
197 |
198 | the timeout interval in millisecond when the input stream is trying to read from datanode. default is 3600000.
199 |
200 |
201 |
202 |
203 | input.write.timeout
204 | 3600000
205 |
206 | the timeout interval in millisecond when the input stream is trying to write to datanode. default is 3600000.
207 |
208 |
209 |
210 |
211 | input.localread.default.buffersize
212 | 1048576
213 |
214 | number of bytes of the buffer which is used to hold the data from block file and verify checksum.
215 | it is only used when "dfs.client.read.shortcircuit" is set to true. default is 1048576.
216 |
217 |
218 |
219 |
220 | input.localread.blockinfo.cachesize
221 | 1000
222 |
223 | the size of block file path information cache. default is 1000.
224 |
225 |
226 |
227 |
228 | input.read.getblockinfo.retry
229 | 3
230 |
231 | the max retry times when the client fail to get block information from namenode. default is 3.
232 |
233 |
234 |
235 |
236 |
237 | output.replace-datanode-on-failure
238 | false
239 |
240 | whether the client add new datanode into pipeline if the number of nodes in
241 | pipeline is less the specified number of replicas. default is true.
242 |
243 |
244 |
245 |
246 | output.default.chunksize
247 | 512
248 |
249 | the number of bytes of a chunk in pipeline. default is 512.
250 |
251 |
252 |
253 |
254 | output.default.packetsize
255 | 65536
256 |
257 | the number of bytes of a packet in pipeline. default is 65536.
258 |
259 |
260 |
261 |
262 | output.default.write.retry
263 | 10
264 |
265 | the max retry times when the client fail to setup the pipeline. default is 10.
266 |
267 |
268 |
269 |
270 | output.connect.timeout
271 | 600000
272 |
273 | the timeout interval in millisecond when the output stream is trying to setup the connection to datanode. default is 600000.
274 |
275 |
276 |
277 |
278 | output.read.timeout
279 | 3600000
280 |
281 | the timeout interval in millisecond when the output stream is trying to read from datanode. default is 3600000.
282 |
283 |
284 |
285 |
286 | output.write.timeout
287 | 3600000
288 |
289 | the timeout interval in millisecond when the output stream is trying to write to datanode. default is 3600000.
290 |
291 |
292 |
293 |
294 | output.packetpool.size
295 | 1024
296 |
297 | the max number of packets in a file's packet pool. default is 1024.
298 |
299 |
300 |
301 |
302 | output.close.timeout
303 | 900000
304 |
305 | the timeout interval in millisecond when close an output stream. default is 900000.
306 |
307 |
308 |
309 |
--------------------------------------------------------------------------------
/src/dfs.rs:
--------------------------------------------------------------------------------
1 | // Licensed to the Apache Software Foundation (ASF) under one
2 | // or more contributor license agreements. See the NOTICE file
3 | // distributed with this work for additional information
4 | // regarding copyright ownership. The ASF licenses this file
5 | // to you under the Apache License, Version 2.0 (the
6 | // "License"); you may not use this file except in compliance
7 | // with the License. You may obtain a copy of the License at
8 | //
9 | // http://www.apache.org/licenses/LICENSE-2.0
10 | //
11 | // Unless required by applicable law or agreed to in writing,
12 | // software distributed under the License is distributed on an
13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | // KIND, either express or implied. See the License for the
15 | // specific language governing permissions and limitations
16 | // under the License.
17 |
18 | use std::marker::PhantomData;
19 | use std::mem;
20 | use std::slice;
21 | use std::string::String;
22 | use std::sync::Arc;
23 |
24 | use libc::{c_char, c_int, c_short, c_void, time_t};
25 |
26 | use crate::err::HdfsErr;
27 | use crate::raw::*;
28 | use crate::{b2i, from_raw, to_raw};
29 | use std::cmp::min;
30 | use std::fmt::{Debug, Formatter};
31 |
32 | const O_RDONLY: c_int = 0;
33 | const O_WRONLY: c_int = 1;
34 | const O_APPEND: c_int = 1024;
35 |
36 | /// Options for zero-copy read
37 | pub struct RzOptions {
38 | ptr: *const hadoopRzOptions,
39 | }
40 |
41 | impl Drop for RzOptions {
42 | fn drop(&mut self) {
43 | unsafe { hadoopRzOptionsFree(self.ptr) }
44 | }
45 | }
46 |
47 | impl Default for RzOptions {
48 | fn default() -> Self {
49 | RzOptions::new()
50 | }
51 | }
52 |
53 | impl RzOptions {
54 | pub fn new() -> RzOptions {
55 | RzOptions {
56 | ptr: unsafe { hadoopRzOptionsAlloc() },
57 | }
58 | }
59 |
60 | pub fn skip_checksum(&self, skip: bool) -> Result {
61 | let res = unsafe { hadoopRzOptionsSetSkipChecksum(self.ptr, b2i!(skip)) };
62 |
63 | if res == 0 {
64 | Ok(true)
65 | } else {
66 | Err(HdfsErr::Unknown)
67 | }
68 | }
69 |
70 | pub fn set_bytebuffer_pool(&self, class_name: &str) -> Result {
71 | let res = unsafe { hadoopRzOptionsSetByteBufferPool(self.ptr, to_raw!(class_name)) };
72 |
73 | if res == 0 {
74 | Ok(true)
75 | } else {
76 | Err(HdfsErr::Unknown)
77 | }
78 | }
79 | }
80 |
81 | /// A buffer returned from zero-copy read.
82 | /// This buffer will be automatically freed when its lifetime is finished.
83 | pub struct RzBuffer<'a> {
84 | file: &'a HdfsFile<'a>,
85 | ptr: *const hadoopRzBuffer,
86 | }
87 |
88 | impl<'a> Drop for RzBuffer<'a> {
89 | fn drop(&mut self) {
90 | unsafe { hadoopRzBufferFree(self.file.file, self.ptr) }
91 | }
92 | }
93 |
94 | impl<'a> RzBuffer<'a> {
95 | /// Get the length of a raw buffer returned from zero-copy read.
96 | #[allow(clippy::len_without_is_empty)]
97 | pub fn len(&self) -> i32 {
98 | (unsafe { hadoopRzBufferLength(self.ptr) }) as i32
99 | }
100 |
101 | /// Get a pointer to the raw buffer returned from zero-copy read.
102 | pub fn as_ptr(&self) -> Result<*const u8, HdfsErr> {
103 | let ptr = unsafe { hadoopRzBufferGet(self.ptr) };
104 |
105 | if !ptr.is_null() {
106 | Ok(ptr as *const u8)
107 | } else {
108 | Err(HdfsErr::Unknown)
109 | }
110 | }
111 |
112 | /// Get a Slice transformed from a raw buffer
113 | pub fn as_slice(&'a self) -> Result<&[u8], HdfsErr> {
114 | let ptr = unsafe { hadoopRzBufferGet(self.ptr) as *const u8 };
115 |
116 | let len = unsafe { hadoopRzBufferLength(self.ptr) as usize };
117 |
118 | if !ptr.is_null() {
119 | Ok(unsafe { mem::transmute(slice::from_raw_parts(ptr, len as usize)) })
120 | } else {
121 | Err(HdfsErr::Unknown)
122 | }
123 | }
124 | }
125 |
126 | /// Includes hostnames where a particular block of a file is stored.
127 | pub struct BlockHosts {
128 | ptr: *const *const *const c_char,
129 | }
130 |
131 | impl Drop for BlockHosts {
132 | fn drop(&mut self) {
133 | unsafe { hdfsFreeHosts(self.ptr) };
134 | }
135 | }
136 |
137 | struct HdfsFileInfoPtr {
138 | pub ptr: *const hdfsFileInfo,
139 | pub len: i32,
140 | }
141 |
142 | impl<'a> Drop for HdfsFileInfoPtr {
143 | fn drop(&mut self) {
144 | unsafe { hdfsFreeFileInfo(self.ptr, self.len) };
145 | }
146 | }
147 |
148 | impl HdfsFileInfoPtr {
149 | fn new(ptr: *const hdfsFileInfo) -> HdfsFileInfoPtr {
150 | HdfsFileInfoPtr { ptr, len: 1 }
151 | }
152 |
153 | pub fn new_array(ptr: *const hdfsFileInfo, len: i32) -> HdfsFileInfoPtr {
154 | HdfsFileInfoPtr { ptr, len }
155 | }
156 | }
157 |
158 | /// Interface that represents the client side information for a file or directory.
159 | pub struct FileStatus<'fs> {
160 | raw: Arc,
161 | idx: u32,
162 | _marker: PhantomData<&'fs HdfsFs>,
163 | }
164 |
165 | impl<'fs> FileStatus<'fs> {
166 | #[inline]
167 | /// create FileStatus from *const hdfsFileInfo
168 | fn new(ptr: *const hdfsFileInfo) -> FileStatus<'fs> {
169 | FileStatus {
170 | raw: Arc::new(HdfsFileInfoPtr::new(ptr)),
171 | idx: 0,
172 | _marker: PhantomData,
173 | }
174 | }
175 |
176 | /// create FileStatus from *const hdfsFileInfo which points
177 | /// to dynamically allocated array.
178 | #[inline]
179 | fn from_array(raw: Arc, idx: u32) -> FileStatus<'fs> {
180 | FileStatus {
181 | raw,
182 | idx,
183 | _marker: PhantomData,
184 | }
185 | }
186 |
187 | #[inline]
188 | fn ptr(&self) -> *const hdfsFileInfo {
189 | unsafe { self.raw.ptr.offset(self.idx as isize) }
190 | }
191 |
192 | /// Get the name of the file
193 | #[inline]
194 | pub fn name(&self) -> &'fs str {
195 | from_raw!((*self.ptr()).mName)
196 | }
197 |
198 | /// Is this a file?
199 | #[inline]
200 | pub fn is_file(&self) -> bool {
201 | match unsafe { &*self.ptr() }.mKind {
202 | tObjectKind::kObjectKindFile => true,
203 | tObjectKind::kObjectKindDirectory => false,
204 | }
205 | }
206 |
207 | /// Is this a directory?
208 | #[inline]
209 | pub fn is_directory(&self) -> bool {
210 | match unsafe { &*self.ptr() }.mKind {
211 | tObjectKind::kObjectKindFile => false,
212 | tObjectKind::kObjectKindDirectory => true,
213 | }
214 | }
215 |
216 | /// Get the owner of the file
217 | #[inline]
218 | pub fn owner(&self) -> &'fs str {
219 | from_raw!((*self.ptr()).mOwner)
220 | }
221 |
222 | /// Get the group associated with the file
223 | #[inline]
224 | pub fn group(&self) -> &'fs str {
225 | from_raw!((*self.ptr()).mGroup)
226 | }
227 |
228 | /// Get the permissions associated with the file
229 | #[inline]
230 | pub fn permission(&self) -> i16 {
231 | unsafe { &*self.ptr() }.mPermissions as i16
232 | }
233 |
234 | /// Get the length of this file, in bytes.
235 | #[inline]
236 | #[allow(clippy::len_without_is_empty)]
237 | pub fn len(&self) -> usize {
238 | unsafe { &*self.ptr() }.mSize as usize
239 | }
240 |
241 | /// Get the block size of the file.
242 | #[inline]
243 | pub fn block_size(&self) -> usize {
244 | unsafe { &*self.ptr() }.mBlockSize as usize
245 | }
246 |
247 | /// Get the replication factor of a file.
248 | #[inline]
249 | pub fn replica_count(&self) -> i16 {
250 | unsafe { &*self.ptr() }.mReplication as i16
251 | }
252 |
253 | /// Get the last modification time for the file in seconds
254 | #[inline]
255 | pub fn last_modified(&self) -> time_t {
256 | unsafe { &*self.ptr() }.mLastMod
257 | }
258 |
259 | /// Get the last access time for the file in seconds
260 | #[inline]
261 | pub fn last_accced(&self) -> time_t {
262 | unsafe { &*self.ptr() }.mLastAccess
263 | }
264 | }
265 |
266 | /// Hdfs Filesystem
267 | ///
268 | /// It is basically thread safe because the native API for hdfsFs is thread-safe.
269 | #[derive(Clone)]
270 | pub struct HdfsFs {
271 | pub url: String,
272 | raw: *const hdfsFS,
273 | }
274 |
275 | unsafe impl Send for HdfsFs {}
276 |
277 | unsafe impl Sync for HdfsFs {}
278 |
279 | impl Debug for HdfsFs {
280 | fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
281 | f.debug_struct("Hdfs").field("url", &self.url).finish()
282 | }
283 | }
284 |
285 | impl HdfsFs {
286 | /// create HdfsFs instance. Please use HdfsFsCache rather than using this API directly.
287 | #[inline]
288 | pub(crate) fn new(url: String, raw: *const hdfsFS) -> HdfsFs {
289 | HdfsFs { url, raw }
290 | }
291 |
292 | /// Get HDFS namenode url
293 | #[inline]
294 | pub fn url(&self) -> &str {
295 | &self.url
296 | }
297 |
298 | /// Get a raw pointer of JNI API's HdfsFs
299 | #[inline]
300 | pub fn raw(&self) -> *const hdfsFS {
301 | self.raw
302 | }
303 |
304 | /// Open a file for append
305 | pub fn append(&self, path: &str) -> Result, HdfsErr> {
306 | if !self.exist(path) {
307 | return Err(HdfsErr::FileNotFound(path.to_owned()));
308 | }
309 |
310 | let file = unsafe { hdfsOpenFile(self.raw, to_raw!(path), O_APPEND, 0, 0, 0) };
311 |
312 | if file.is_null() {
313 | Err(HdfsErr::Unknown)
314 | } else {
315 | Ok(HdfsFile {
316 | fs: self,
317 | path: path.to_owned(),
318 | file,
319 | })
320 | }
321 | }
322 |
323 | /// set permission
324 | pub fn chmod(&self, path: &str, mode: i16) -> bool {
325 | (unsafe { hdfsChmod(self.raw, to_raw!(path), mode as c_short) }) == 0
326 | }
327 |
328 | pub fn chown(&self, path: &str, owner: &str, group: &str) -> bool {
329 | (unsafe { hdfsChown(self.raw, to_raw!(path), to_raw!(owner), to_raw!(group)) }) == 0
330 | }
331 |
332 | #[inline]
333 | pub fn create(&self, path: &str) -> Result, HdfsErr> {
334 | self.create_with_params(path, false, 0, 0, 0)
335 | }
336 |
337 | #[inline]
338 | pub fn create_with_overwrite(
339 | &self,
340 | path: &str,
341 | overwrite: bool,
342 | ) -> Result, HdfsErr> {
343 | self.create_with_params(path, overwrite, 0, 0, 0)
344 | }
345 |
346 | pub fn create_with_params(
347 | &self,
348 | path: &str,
349 | overwrite: bool,
350 | buf_size: i32,
351 | replica_num: i16,
352 | block_size: i32,
353 | ) -> Result, HdfsErr> {
354 | if !overwrite && self.exist(path) {
355 | return Err(HdfsErr::FileAlreadyExists(path.to_owned()));
356 | }
357 |
358 | let file = unsafe {
359 | hdfsOpenFile(
360 | self.raw,
361 | to_raw!(path),
362 | O_WRONLY,
363 | buf_size as c_int,
364 | replica_num as c_short,
365 | block_size as i32,
366 | )
367 | };
368 |
369 | if file.is_null() {
370 | Err(HdfsErr::Unknown)
371 | } else {
372 | Ok(HdfsFile {
373 | fs: self,
374 | path: path.to_owned(),
375 | file,
376 | })
377 | }
378 | }
379 |
380 | /// Get the default blocksize.
381 | pub fn default_blocksize(&self) -> Result {
382 | let block_sz = unsafe { hdfsGetDefaultBlockSize(self.raw) };
383 |
384 | if block_sz > 0 {
385 | Ok(block_sz as usize)
386 | } else {
387 | Err(HdfsErr::Unknown)
388 | }
389 | }
390 |
391 | /// Get the default blocksize at the filesystem indicated by a given path.
392 | pub fn block_size(&self, path: &str) -> Result {
393 | let block_sz = unsafe { hdfsGetDefaultBlockSizeAtPath(self.raw, to_raw!(path)) };
394 |
395 | if block_sz > 0 {
396 | Ok(block_sz as usize)
397 | } else {
398 | Err(HdfsErr::Unknown)
399 | }
400 | }
401 |
402 | /// Return the raw capacity of the filesystem.
403 | pub fn capacity(&self) -> Result {
404 | let block_sz = unsafe { hdfsGetCapacity(self.raw) };
405 |
406 | if block_sz > 0 {
407 | Ok(block_sz as usize)
408 | } else {
409 | Err(HdfsErr::Unknown)
410 | }
411 | }
412 |
413 | /// Delete file.
414 | pub fn delete(&self, path: &str, recursive: bool) -> Result {
415 | let res = unsafe { hdfsDelete(self.raw, to_raw!(path), recursive as c_int) };
416 |
417 | if res == 0 {
418 | Ok(true)
419 | } else {
420 | Err(HdfsErr::Unknown)
421 | }
422 | }
423 |
424 | /// Checks if a given path exsits on the filesystem
425 | pub fn exist(&self, path: &str) -> bool {
426 | unsafe { hdfsExists(self.raw, to_raw!(path)) == 0 }
427 | }
428 |
429 | /// Get hostnames where a particular block (determined by
430 | /// pos & blocksize) of a file is stored. The last element in the array
431 | /// is NULL. Due to replication, a single block could be present on
432 | /// multiple hosts.
433 | pub fn get_hosts(
434 | &self,
435 | path: &str,
436 | start: usize,
437 | length: usize,
438 | ) -> Result {
439 | let ptr = unsafe { hdfsGetHosts(self.raw, to_raw!(path), start as i64, length as i64) };
440 |
441 | if !ptr.is_null() {
442 | Ok(BlockHosts { ptr })
443 | } else {
444 | Err(HdfsErr::Unknown)
445 | }
446 | }
447 |
448 | /// create a directory
449 | pub fn mkdir(&self, path: &str) -> Result {
450 | if unsafe { hdfsCreateDirectory(self.raw, to_raw!(path)) } == 0 {
451 | Ok(true)
452 | } else {
453 | Err(HdfsErr::Unknown)
454 | }
455 | }
456 |
457 | /// open a file to read
458 | #[inline]
459 | pub fn open(&self, path: &str) -> Result, HdfsErr> {
460 | self.open_with_bufsize(path, 0)
461 | }
462 |
463 | /// open a file to read with a buffer size
464 | pub fn open_with_bufsize(&self, path: &str, buf_size: i32) -> Result, HdfsErr> {
465 | let file =
466 | unsafe { hdfsOpenFile(self.raw, to_raw!(path), O_RDONLY, buf_size as c_int, 0, 0) };
467 |
468 | if file.is_null() {
469 | Err(HdfsErr::Unknown)
470 | } else {
471 | Ok(HdfsFile {
472 | fs: self,
473 | path: path.to_owned(),
474 | file,
475 | })
476 | }
477 | }
478 |
479 | /// Set the replication of the specified file to the supplied value
480 | pub fn set_replication(&self, path: &str, num: i16) -> Result {
481 | let res = unsafe { hdfsSetReplication(self.raw, to_raw!(path), num as i16) };
482 |
483 | if res == 0 {
484 | Ok(true)
485 | } else {
486 | Err(HdfsErr::Unknown)
487 | }
488 | }
489 |
490 | /// Rename file.
491 | pub fn rename(&self, old_path: &str, new_path: &str) -> Result {
492 | let res = unsafe { hdfsRename(self.raw, to_raw!(old_path), to_raw!(new_path)) };
493 |
494 | if res == 0 {
495 | Ok(true)
496 | } else {
497 | Err(HdfsErr::Unknown)
498 | }
499 | }
500 |
501 | /// Return the total raw size of all files in the filesystem.
502 | pub fn used(&self) -> Result {
503 | let block_sz = unsafe { hdfsGetUsed(self.raw) };
504 |
505 | if block_sz > 0 {
506 | Ok(block_sz as usize)
507 | } else {
508 | Err(HdfsErr::Unknown)
509 | }
510 | }
511 |
512 | pub fn list_status(&self, path: &str) -> Result, HdfsErr> {
513 | let mut entry_num: c_int = 0;
514 |
515 | let ptr = unsafe { hdfsListDirectory(self.raw, to_raw!(path), &mut entry_num) };
516 |
517 | if ptr.is_null() {
518 | return Err(HdfsErr::Unknown);
519 | }
520 |
521 | let shared_ptr = Arc::new(HdfsFileInfoPtr::new_array(ptr, entry_num));
522 |
523 | let mut list = Vec::new();
524 | for idx in 0..entry_num {
525 | list.push(FileStatus::from_array(shared_ptr.clone(), idx as u32));
526 | }
527 |
528 | Ok(list)
529 | }
530 |
531 | pub fn get_file_status(&self, path: &str) -> Result {
532 | let ptr = unsafe { hdfsGetPathInfo(self.raw, to_raw!(path)) };
533 |
534 | if ptr.is_null() {
535 | Err(HdfsErr::Unknown)
536 | } else {
537 | Ok(FileStatus::new(ptr))
538 | }
539 | }
540 | }
541 |
542 | /// open hdfs file
543 | pub struct HdfsFile<'a> {
544 | fs: &'a HdfsFs,
545 | path: String,
546 | file: *const hdfsFile,
547 | }
548 |
549 | #[derive(Clone)]
550 | pub struct RawHdfsFileWrapper {
551 | pub path: String,
552 | pub file: *const hdfsFile,
553 | }
554 |
555 | impl<'a> From<&HdfsFile<'a>> for RawHdfsFileWrapper {
556 | fn from(file: &HdfsFile<'a>) -> Self {
557 | RawHdfsFileWrapper {
558 | path: file.path.clone(),
559 | file: file.file,
560 | }
561 | }
562 | }
563 |
564 | unsafe impl Send for RawHdfsFileWrapper {}
565 |
566 | unsafe impl Sync for RawHdfsFileWrapper {}
567 |
568 | impl<'a> HdfsFile<'a> {
569 | pub fn from_raw(rw: &RawHdfsFileWrapper, fs: &'a HdfsFs) -> HdfsFile<'a> {
570 | let path = rw.path.clone();
571 | HdfsFile {
572 | fs,
573 | path,
574 | file: rw.file,
575 | }
576 | }
577 |
578 | pub fn available(&self) -> Result {
579 | if unsafe { hdfsAvailable(self.fs.raw, self.file) } == 0 {
580 | Ok(true)
581 | } else {
582 | Err(HdfsErr::Unknown)
583 | }
584 | }
585 |
586 | /// Close the opened file
587 | pub fn close(&self) -> Result {
588 | if unsafe { hdfsCloseFile(self.fs.raw, self.file) } == 0 {
589 | Ok(true)
590 | } else {
591 | Err(HdfsErr::Unknown)
592 | }
593 | }
594 |
595 | /// Flush the data.
596 | pub fn flush(&self) -> bool {
597 | (unsafe { hdfsFlush(self.fs.raw, self.file) }) == 0
598 | }
599 |
600 | /// Flush out the data in client's user buffer. After the return of this
601 | /// call, new readers will see the data.
602 | pub fn hflush(&self) -> bool {
603 | (unsafe { hdfsHFlush(self.fs.raw, self.file) }) == 0
604 | }
605 |
606 | /// Similar to posix fsync, Flush out the data in client's
607 | /// user buffer. all the way to the disk device (but the disk may have
608 | /// it in its cache).
609 | pub fn hsync(&self) -> bool {
610 | (unsafe { hdfsHSync(self.fs.raw, self.file) }) == 0
611 | }
612 |
613 | /// Determine if a file is open for read.
614 | pub fn is_readable(&self) -> bool {
615 | (unsafe { hdfsFileIsOpenForRead(self.file) }) == 1
616 | }
617 |
618 | /// Determine if a file is open for write.
619 | pub fn is_writable(&self) -> bool {
620 | (unsafe { hdfsFileIsOpenForWrite(self.file) }) == 1
621 | }
622 |
623 | /// Return a file path
624 | pub fn path(&'a self) -> &'a str {
625 | &self.path
626 | }
627 |
628 | /// Get the current offset in the file, in bytes.
629 | pub fn pos(&self) -> Result {
630 | let pos = unsafe { hdfsTell(self.fs.raw, self.file) };
631 |
632 | if pos > 0 {
633 | Ok(pos as u64)
634 | } else {
635 | Err(HdfsErr::Unknown)
636 | }
637 | }
638 |
639 | /// Read data from an open file.
640 | pub fn read(&self, buf: &mut [u8]) -> Result {
641 | let read_len = unsafe {
642 | hdfsRead(
643 | self.fs.raw,
644 | self.file,
645 | buf.as_ptr() as *mut c_void,
646 | buf.len() as tSize,
647 | )
648 | };
649 |
650 | if read_len > 0 {
651 | Ok(read_len as i32)
652 | } else {
653 | Err(HdfsErr::Unknown)
654 | }
655 | }
656 |
657 | /// Positional read of data from an open file.
658 | pub fn read_with_pos(&self, pos: i64, buf: &mut [u8]) -> Result {
659 | let read_len = unsafe {
660 | hdfsPread(
661 | self.fs.raw,
662 | self.file,
663 | pos as tOffset,
664 | buf.as_ptr() as *mut c_void,
665 | buf.len() as tSize,
666 | )
667 | };
668 |
669 | if read_len > 0 {
670 | Ok(read_len as i32)
671 | } else {
672 | Err(HdfsErr::Unknown)
673 | }
674 | }
675 |
676 | /// Read data from an open file.
677 | pub fn read_length(&self, buf: &mut [u8], length: usize) -> Result {
678 | let required_len = min(length, buf.len());
679 | let read_len = unsafe {
680 | hdfsRead(
681 | self.fs.raw,
682 | self.file,
683 | buf.as_ptr() as *mut c_void,
684 | required_len as tSize,
685 | )
686 | };
687 |
688 | if read_len > 0 {
689 | Ok(read_len as i32)
690 | } else {
691 | Err(HdfsErr::Unknown)
692 | }
693 | }
694 |
695 | /// Positional read of data from an open file.
696 | pub fn read_with_pos_length(
697 | &self,
698 | pos: i64,
699 | buf: &mut [u8],
700 | length: usize,
701 | ) -> Result {
702 | let required_len = min(length, buf.len());
703 | let read_len = unsafe {
704 | hdfsPread(
705 | self.fs.raw,
706 | self.file,
707 | pos as tOffset,
708 | buf.as_ptr() as *mut c_void,
709 | required_len as tSize,
710 | )
711 | };
712 |
713 | if read_len > 0 {
714 | Ok(read_len as i32)
715 | } else {
716 | Err(HdfsErr::Unknown)
717 | }
718 | }
719 |
720 | /// Perform a byte buffer read. If possible, this will be a zero-copy
721 | /// (mmap) read.
722 | pub fn read_zc(&'a self, opts: &RzOptions, max_len: i32) -> Result, HdfsErr> {
723 | let buf: *const hadoopRzBuffer =
724 | unsafe { hadoopReadZero(self.file, opts.ptr, max_len as i32) };
725 |
726 | if !buf.is_null() {
727 | Ok(RzBuffer {
728 | file: self,
729 | ptr: buf,
730 | })
731 | } else {
732 | Err(HdfsErr::Unknown)
733 | }
734 | }
735 |
736 | /// Seek to given offset in file.
737 | pub fn seek(&self, offset: u64) -> bool {
738 | (unsafe { hdfsSeek(self.fs.raw, self.file, offset as tOffset) }) == 0
739 | }
740 |
741 | /// Write data into an open file.
742 | pub fn write(&self, buf: &[u8]) -> Result {
743 | let written_len = unsafe {
744 | hdfsWrite(
745 | self.fs.raw,
746 | self.file,
747 | buf.as_ptr() as *mut c_void,
748 | buf.len() as tSize,
749 | )
750 | };
751 |
752 | if written_len > 0 {
753 | Ok(written_len)
754 | } else {
755 | Err(HdfsErr::Unknown)
756 | }
757 | }
758 | }
759 |
--------------------------------------------------------------------------------
/src/err.rs:
--------------------------------------------------------------------------------
1 | // Licensed to the Apache Software Foundation (ASF) under one
2 | // or more contributor license agreements. See the NOTICE file
3 | // distributed with this work for additional information
4 | // regarding copyright ownership. The ASF licenses this file
5 | // to you under the Apache License, Version 2.0 (the
6 | // "License"); you may not use this file except in compliance
7 | // with the License. You may obtain a copy of the License at
8 | //
9 | // http://www.apache.org/licenses/LICENSE-2.0
10 | //
11 | // Unless required by applicable law or agreed to in writing,
12 | // software distributed under the License is distributed on an
13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | // KIND, either express or implied. See the License for the
15 | // specific language governing permissions and limitations
16 | // under the License.
17 |
18 | use datafusion::error::DataFusionError;
19 | use std::io::ErrorKind;
20 | use thiserror::Error;
21 |
22 | /// Errors which can occur during accessing Hdfs cluster
23 | #[derive(Error, Debug)]
24 | pub enum HdfsErr {
25 | #[error("Unknown hdfs error")]
26 | Unknown,
27 | /// file path
28 | #[error("File not found `{0}`")]
29 | FileNotFound(String),
30 | /// file path
31 | #[error("File already exists `{0}`")]
32 | FileAlreadyExists(String),
33 | /// namenode address
34 | #[error("Cannot connect to NameNode `{0}`")]
35 | CannotConnectToNameNode(String),
36 | /// URL
37 | #[error("Invalid URL `{0}`")]
38 | InvalidUrl(String),
39 | }
40 |
41 | fn get_error_kind(e: &HdfsErr) -> ErrorKind {
42 | match e {
43 | HdfsErr::Unknown => ErrorKind::Other,
44 | HdfsErr::FileNotFound(_) => ErrorKind::NotFound,
45 | HdfsErr::FileAlreadyExists(_) => ErrorKind::AlreadyExists,
46 | HdfsErr::CannotConnectToNameNode(_) => ErrorKind::ConnectionRefused,
47 | HdfsErr::InvalidUrl(_) => ErrorKind::AddrNotAvailable,
48 | }
49 | }
50 |
51 | impl From for DataFusionError {
52 | fn from(e: HdfsErr) -> DataFusionError {
53 | let transformed_kind = get_error_kind(&e);
54 | DataFusionError::IoError(std::io::Error::new(transformed_kind, e))
55 | }
56 | }
57 |
58 | impl From for std::io::Error {
59 | fn from(e: HdfsErr) -> std::io::Error {
60 | let transformed_kind = get_error_kind(&e);
61 | std::io::Error::new(transformed_kind, e)
62 | }
63 | }
64 |
--------------------------------------------------------------------------------
/src/hdfs_store.rs:
--------------------------------------------------------------------------------
1 | use async_trait::async_trait;
2 | use futures::{AsyncRead, Future};
3 | use std::sync::{Arc, Mutex};
4 |
5 | use crate::{FileStatus, HdfsErr, HdfsFile, HdfsFs, HdfsRegistry, RawHdfsFileWrapper};
6 | use chrono::{Local, TimeZone, Utc};
7 | use datafusion::datasource::object_store::{
8 | FileMeta, FileMetaStream, ListEntry, ListEntryStream, ObjectReader, ObjectStore,
9 | };
10 | use datafusion::error::{DataFusionError, Result};
11 | use std::collections::HashMap;
12 | use std::convert::TryFrom;
13 | use std::fmt::Debug;
14 | use std::io::{ErrorKind, Read};
15 | use std::pin::Pin;
16 | use std::task::{Context, Poll};
17 | use tokio::{
18 | sync::mpsc::{channel, Receiver, Sender},
19 | task,
20 | };
21 | use tokio_stream::wrappers::ReceiverStream;
22 | use tokio_stream::StreamExt;
23 |
24 | #[derive(Debug)]
25 | pub struct HdfsStore {
26 | fs_registry: HdfsRegistry,
27 | }
28 |
29 | impl HdfsStore {
30 | #[allow(dead_code)]
31 | // We will finally move HdfsStore into its own crate when Hdfs-native is mature,
32 | // therefore ignore the warning here.
33 | pub fn new() -> Result {
34 | Ok(HdfsStore {
35 | fs_registry: HdfsRegistry::new(),
36 | })
37 | }
38 |
39 | pub fn new_from(fs: Arc>>) -> Self {
40 | HdfsStore {
41 | fs_registry: HdfsRegistry::new_from(fs),
42 | }
43 | }
44 |
45 | pub fn get_fs(&self, prefix: &str) -> std::result::Result {
46 | self.fs_registry.get(prefix)
47 | }
48 |
49 | fn all_fs(&self) -> Arc>> {
50 | self.fs_registry.all_fs.clone()
51 | }
52 | }
53 |
54 | fn list_dir_sync(
55 | all_fs: Arc>>,
56 | prefix: &str,
57 | response_tx: Sender>,
58 | ) -> Result<()> {
59 | let store = HdfsStore::new_from(all_fs);
60 | let fs = store.get_fs(prefix)?;
61 | let all_status = fs.list_status(prefix)?;
62 | for status in &all_status {
63 | response_tx
64 | .blocking_send(Ok(ListEntry::from(status)))
65 | .map_err(|e| DataFusionError::Execution(e.to_string()))?;
66 | }
67 | Ok(())
68 | }
69 |
70 | impl<'a> TryFrom<&FileStatus<'a>> for FileMeta {
71 | type Error = DataFusionError;
72 |
73 | fn try_from(status: &FileStatus) -> Result {
74 | let rs: ListEntry = status.into();
75 | match rs {
76 | ListEntry::FileMeta(f) => Ok(f),
77 | ListEntry::Prefix(path) => {
78 | Err(std::io::Error::new(ErrorKind::Other, format!("{} is not a file", path)).into())
79 | }
80 | }
81 | }
82 | }
83 |
84 | impl<'a> From<&FileStatus<'a>> for ListEntry {
85 | fn from(status: &FileStatus) -> Self {
86 | if status.is_directory() {
87 | ListEntry::Prefix(status.name().to_owned())
88 | } else {
89 | let time = Local
90 | .timestamp(status.last_modified(), 0)
91 | .with_timezone(&Utc);
92 | ListEntry::FileMeta(FileMeta {
93 | path: status.name().to_owned(),
94 | last_modified: Some(time),
95 | size: status.len() as u64,
96 | })
97 | }
98 | }
99 | }
100 |
101 | #[async_trait]
102 | impl ObjectStore for HdfsStore {
103 | async fn list_file(&self, prefix: &str) -> Result {
104 | let entry_stream = self.list_dir(prefix, None).await?;
105 | let result = entry_stream.map(|r| match r {
106 | Ok(entry) => match entry {
107 | ListEntry::FileMeta(fm) => Ok(fm),
108 | ListEntry::Prefix(path) => Err(DataFusionError::from(std::io::Error::new(
109 | ErrorKind::InvalidInput,
110 | format!("{} is not a file", path),
111 | ))),
112 | },
113 | Err(e) => Err(e),
114 | });
115 |
116 | Ok(Box::pin(result))
117 | }
118 |
119 | async fn list_dir(&self, prefix: &str, _delimiter: Option) -> Result {
120 | let (response_tx, response_rx): (Sender>, Receiver>) =
121 | channel(2);
122 | let prefix = prefix.to_owned();
123 | let all_fs = self.all_fs();
124 | task::spawn_blocking(move || {
125 | if let Err(e) = list_dir_sync(all_fs, &prefix, response_tx) {
126 | println!("List status thread terminated due to error {:?}", e)
127 | }
128 | });
129 | Ok(Box::pin(ReceiverStream::new(response_rx)))
130 | }
131 |
132 | fn file_reader(&self, file: FileMeta) -> Result> {
133 | let fs = self.all_fs();
134 | let reader = HdfsFileReader::new(HdfsStore::new_from(fs), file);
135 | Ok(Arc::new(reader))
136 | }
137 | }
138 |
139 | pub struct HdfsFileReader {
140 | store: HdfsStore,
141 | file: FileMeta,
142 | }
143 |
144 | struct HdfsAsyncRead {
145 | store: HdfsStore,
146 | file: RawHdfsFileWrapper,
147 | start: u64,
148 | length: usize,
149 | }
150 |
151 | impl AsyncRead for HdfsAsyncRead {
152 | fn poll_read(
153 | self: Pin<&mut Self>,
154 | cx: &mut Context<'_>,
155 | buf: &mut [u8],
156 | ) -> Poll> {
157 | let path = self.file.path.clone();
158 | let all_fs = self.store.all_fs();
159 | let file_wrapper = self.file.clone();
160 | let start = self.start as i64;
161 | let length = self.length;
162 | let buf_len = buf.len();
163 |
164 | let mut read_sync = task::spawn_blocking(move || {
165 | let store = HdfsStore::new_from(all_fs);
166 | let fs = store.get_fs(&*path);
167 | let mut vec = vec![0u8; buf_len];
168 | match fs {
169 | Ok(fs) => {
170 | let file = HdfsFile::from_raw(&file_wrapper, &fs);
171 | file.read_with_pos_length(start as i64, &mut *vec, length)
172 | .map_err(std::io::Error::from)
173 | .map(|s| (vec, s as usize))
174 | }
175 | Err(e) => Err(std::io::Error::from(e)),
176 | }
177 | });
178 |
179 | match Pin::new(&mut read_sync).poll(cx) {
180 | Poll::Ready(r) => match r {
181 | Ok(vl_r) => match vl_r {
182 | Ok(vl) => match vl.0.as_slice().read(buf) {
183 | Ok(_) => Poll::Ready(Ok(vl.1)),
184 | Err(e) => Poll::Ready(Err(e)),
185 | },
186 | Err(e) => Poll::Ready(Err(e)),
187 | },
188 | Err(e) => Poll::Ready(Err(std::io::Error::from(e))),
189 | },
190 | Poll::Pending => Poll::Pending,
191 | }
192 | }
193 | }
194 |
195 | impl HdfsFileReader {
196 | pub fn new(store: HdfsStore, file: FileMeta) -> Self {
197 | Self { store, file }
198 | }
199 | }
200 |
201 | #[async_trait]
202 | impl ObjectReader for HdfsFileReader {
203 | async fn chunk_reader(&self, start: u64, length: usize) -> Result> {
204 | let file = self.file.path.clone();
205 | let fs = self.store.all_fs();
206 | let x = task::spawn_blocking(move || {
207 | let store = HdfsStore::new_from(fs);
208 | let fs_result = store.get_fs(&*file).map_err(DataFusionError::from);
209 | match fs_result {
210 | Ok(fs) => {
211 | let file_result = fs.open(&*file).map_err(DataFusionError::from);
212 | match file_result {
213 | Ok(file) => {
214 | let x = (&file).into();
215 | Ok(HdfsAsyncRead {
216 | store: HdfsStore::new_from(store.all_fs()),
217 | file: x,
218 | start,
219 | length,
220 | })
221 | }
222 | Err(e) => Err(e),
223 | }
224 | }
225 | Err(e) => Err(e),
226 | }
227 | })
228 | .await;
229 | match x {
230 | Ok(r) => Ok(Arc::new(r?)),
231 | Err(e) => Err(DataFusionError::Execution(format!(
232 | "Open hdfs file thread terminated due to error: {:?}",
233 | e
234 | ))),
235 | }
236 | }
237 |
238 | fn length(&self) -> u64 {
239 | self.file.size
240 | }
241 | }
242 |
243 | #[cfg(test)]
244 | mod tests {
245 | use crate::hdfs_store::HdfsStore;
246 |
247 | #[test]
248 | fn it_works() {
249 | let _hdfs_store = HdfsStore::new();
250 | }
251 | }
252 |
--------------------------------------------------------------------------------
/src/lib.rs:
--------------------------------------------------------------------------------
1 | // Licensed to the Apache Software Foundation (ASF) under one
2 | // or more contributor license agreements. See the NOTICE file
3 | // distributed with this work for additional information
4 | // regarding copyright ownership. The ASF licenses this file
5 | // to you under the Apache License, Version 2.0 (the
6 | // "License"); you may not use this file except in compliance
7 | // with the License. You may obtain a copy of the License at
8 | //
9 | // http://www.apache.org/licenses/LICENSE-2.0
10 | //
11 | // Unless required by applicable law or agreed to in writing,
12 | // software distributed under the License is distributed on an
13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | // KIND, either express or implied. See the License for the
15 | // specific language governing permissions and limitations
16 | // under the License.
17 |
18 | //! A rust wrapper over libhdfs3
19 |
20 | /// Rust APIs wrapping libhdfs3 API, providing better semantic and abstraction
21 | pub mod dfs;
22 | pub mod err;
23 | mod hdfs_store;
24 | /// libhdfs3 raw binding APIs
25 | pub mod raw;
26 | pub mod util;
27 |
28 | pub use crate::dfs::*;
29 | pub use crate::err::HdfsErr;
30 | pub use crate::util::HdfsUtil;
31 |
32 | use crate::raw::{
33 | hdfsBuilderConnect, hdfsBuilderSetNameNode, hdfsBuilderSetNameNodePort, hdfsFS, hdfsNewBuilder,
34 | };
35 | use log::info;
36 | use std::collections::HashMap;
37 | use std::sync::{Arc, Mutex};
38 | use url::Url;
39 |
40 | static LOCAL_FS_SCHEME: &str = "file";
41 |
42 | /// HdfsRegistry which stores seen HdfsFs instances.
43 | #[derive(Debug)]
44 | pub struct HdfsRegistry {
45 | all_fs: Arc>>,
46 | }
47 |
48 | impl Default for HdfsRegistry {
49 | fn default() -> Self {
50 | HdfsRegistry::new()
51 | }
52 | }
53 |
54 | struct HostPort {
55 | host: String,
56 | port: u16,
57 | }
58 |
59 | enum NNScheme {
60 | Local,
61 | Remote(HostPort),
62 | }
63 |
64 | impl ToString for NNScheme {
65 | fn to_string(&self) -> String {
66 | match self {
67 | NNScheme::Local => "file:///".to_string(),
68 | NNScheme::Remote(hp) => format!("{}:{}", hp.host, hp.port),
69 | }
70 | }
71 | }
72 |
73 | impl HdfsRegistry {
74 | pub fn new() -> HdfsRegistry {
75 | HdfsRegistry {
76 | all_fs: Arc::new(Mutex::new(HashMap::new())),
77 | }
78 | }
79 |
80 | pub fn new_from(fs: Arc>>) -> HdfsRegistry {
81 | HdfsRegistry { all_fs: fs }
82 | }
83 |
84 | fn get_namenode(&self, path: &str) -> Result {
85 | match Url::parse(path) {
86 | Ok(url) => {
87 | if url.scheme() == LOCAL_FS_SCHEME {
88 | Ok(NNScheme::Local)
89 | } else if url.host().is_some() && url.port().is_some() {
90 | Ok(NNScheme::Remote(HostPort {
91 | host: format!("{}://{}", &url.scheme(), url.host().unwrap()),
92 | port: url.port().unwrap(),
93 | }))
94 | } else {
95 | Err(HdfsErr::InvalidUrl(path.to_string()))
96 | }
97 | }
98 | Err(_) => Err(HdfsErr::InvalidUrl(path.to_string())),
99 | }
100 | }
101 |
102 | pub fn get(&self, path: &str) -> Result {
103 | let host_port = self.get_namenode(path)?;
104 |
105 | let mut map = self.all_fs.lock().unwrap();
106 |
107 | let entry: &mut HdfsFs = map.entry(host_port.to_string()).or_insert({
108 | let hdfs_fs: *const hdfsFS = unsafe {
109 | let hdfs_builder = hdfsNewBuilder();
110 | match host_port {
111 | NNScheme::Local => {} //NO-OP
112 | NNScheme::Remote(ref hp) => {
113 | hdfsBuilderSetNameNode(hdfs_builder, to_raw!(&*hp.host));
114 | hdfsBuilderSetNameNodePort(hdfs_builder, hp.port);
115 | }
116 | }
117 | info!("Connecting to NameNode ({})", &host_port.to_string());
118 | hdfsBuilderConnect(hdfs_builder)
119 | };
120 |
121 | if hdfs_fs.is_null() {
122 | return Err(HdfsErr::CannotConnectToNameNode(host_port.to_string()));
123 | }
124 | info!("Connected to NameNode ({})", &host_port.to_string());
125 | HdfsFs::new(host_port.to_string(), hdfs_fs)
126 | });
127 |
128 | Ok(entry.clone())
129 | }
130 | }
131 |
132 | #[cfg(test)]
133 | mod test {
134 | use super::HdfsRegistry;
135 | use crate::HdfsErr;
136 | use log::debug;
137 |
138 | #[test]
139 | fn test_hdfs_connection() -> Result<(), HdfsErr> {
140 | let port = 9000;
141 |
142 | let dfs_addr = format!("hdfs://localhost:{}", port);
143 | let fs_registry = HdfsRegistry::new();
144 |
145 | let test_path = format!("hdfs://localhost:{}/users/test", port);
146 | debug!("Trying to get {}", &test_path);
147 |
148 | assert_eq!(dfs_addr, fs_registry.get(&test_path)?.url);
149 |
150 | // create a file, check existence, and close
151 | let fs = fs_registry.get(&test_path)?;
152 | let test_file = "/test_file";
153 | if fs.exist(test_file) {
154 | fs.delete(test_file, true)?;
155 | }
156 | let created_file = match fs.create(test_file) {
157 | Ok(f) => f,
158 | Err(e) => panic!("Couldn't create a file {:?}", e),
159 | };
160 | assert!(created_file.close().is_ok());
161 | assert!(fs.exist(test_file));
162 |
163 | // open a file and close
164 | let opened_file = fs.open(test_file)?;
165 | assert!(opened_file.close().is_ok());
166 |
167 | match fs.mkdir("/dir1") {
168 | Ok(_) => debug!("/dir1 created"),
169 | Err(_) => panic!("Couldn't create /dir1 directory"),
170 | };
171 |
172 | let file_info = fs.get_file_status("/dir1")?;
173 |
174 | assert_eq!("/dir1", file_info.name());
175 | assert!(!file_info.is_file());
176 | assert!(file_info.is_directory());
177 |
178 | let sub_dir_num = 3;
179 | let mut expected_list = Vec::new();
180 | for x in 0..sub_dir_num {
181 | let filename = format!("/dir1/{}", x);
182 | expected_list.push(format!("/dir1/{}", x));
183 |
184 | match fs.mkdir(&filename) {
185 | Ok(_) => debug!("{} created", filename),
186 | Err(_) => panic!("Couldn't create {} directory", filename),
187 | };
188 | }
189 |
190 | let mut list = fs.list_status("/dir1")?;
191 | assert_eq!(sub_dir_num, list.len());
192 |
193 | list.sort_by(|a, b| Ord::cmp(a.name(), b.name()));
194 |
195 | for (expected, name) in expected_list
196 | .iter()
197 | .zip(list.iter().map(|status| status.name()))
198 | {
199 | assert_eq!(expected, name);
200 | }
201 | Ok(())
202 | }
203 | }
204 |
--------------------------------------------------------------------------------
/src/raw.rs:
--------------------------------------------------------------------------------
1 | // Licensed to the Apache Software Foundation (ASF) under one
2 | // or more contributor license agreements. See the NOTICE file
3 | // distributed with this work for additional information
4 | // regarding copyright ownership. The ASF licenses this file
5 | // to you under the Apache License, Version 2.0 (the
6 | // "License"); you may not use this file except in compliance
7 | // with the License. You may obtain a copy of the License at
8 | //
9 | // http://www.apache.org/licenses/LICENSE-2.0
10 | //
11 | // Unless required by applicable law or agreed to in writing,
12 | // software distributed under the License is distributed on an
13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | // KIND, either express or implied. See the License for the
15 | // specific language governing permissions and limitations
16 | // under the License.
17 |
18 | //! libhdfs FFI Binding APIs
19 | #![allow(non_camel_case_types)]
20 | #![allow(non_snake_case)]
21 |
22 | use libc::{c_char, c_int, c_short, c_void, size_t, time_t};
23 |
24 | /// Opaque Pointer of hdfsFS
25 | pub enum hdfsFS {}
26 |
27 | /// Opaque Pointer of hdfsFile
28 | pub enum hdfsFile {}
29 |
30 | /// Opaque Pointer of hdfsBuilder
31 | pub enum hdfsBuilder {}
32 |
33 | /// Opaque Pointer of hadoopRzOptions
34 | pub enum hadoopRzOptions {}
35 |
36 | /// Opaque Pointer of hadoopRzBuffer
37 | pub enum hadoopRzBuffer {}
38 |
39 | /// size of data for read/write io ops
40 | pub type tSize = i32;
41 | /// time type in seconds
42 | pub type tTime = time_t;
43 | /// offset within the file
44 | pub type tOffset = i64;
45 | /// port
46 | pub type tPort = u16;
47 |
48 | #[repr(C)]
49 | pub enum tObjectKind {
50 | kObjectKindFile = 0x46, // 'F'
51 | kObjectKindDirectory = 0x44, // 'D'
52 | }
53 |
54 | /// Information about a file/directory.
55 | #[repr(C)]
56 | pub struct hdfsReadStatistics {
57 | pub totalBytesRead: u64,
58 | pub totalLocalBytesRead: u64,
59 | pub totalShortCircuitBytesRead: u64,
60 | pub totalZeroCopyBytesRead: u64,
61 | }
62 |
63 | #[repr(C)]
64 | pub struct hdfsFileInfo {
65 | /// file or directory
66 | pub mKind: tObjectKind,
67 | /// the name of the file
68 | pub mName: *const c_char,
69 | /// the last modification time for the file in seconds
70 | pub mLastMod: tTime,
71 | /// the size of the file in bytes
72 | pub mSize: tOffset,
73 | /// the count of replicas
74 | pub mReplication: c_short,
75 | /// the block size for the file
76 | pub mBlockSize: tOffset,
77 | /// the owner of the file
78 | pub mOwner: *const c_char,
79 | /// the group associated with the file
80 | pub mGroup: *const c_char,
81 | /// the permissions associated with the file
82 | pub mPermissions: c_short,
83 | /// the last access time for the file in seconds
84 | pub mLastAccess: tTime,
85 | }
86 |
87 | #[link(name = "hdfs3", kind = "dylib")]
88 | extern "C" {
89 |
90 | /// Determine if a file is open for read.
91 | ///
92 | /// #### Params
93 | /// * ```file``` - the HDFS file
94 | ///
95 | /// #### Return
96 | /// Return 1 if the file is open for read; 0 otherwise
97 | pub fn hdfsFileIsOpenForRead(fs: *const hdfsFile) -> c_int;
98 |
99 | /// Determine if a file is open for write.
100 | ///
101 | /// #### Params
102 | /// * ```file``` - the HDFS file
103 | ///
104 | /// #### Return
105 | /// Return 1 if the file is open for write; 0 otherwise.
106 | pub fn hdfsFileIsOpenForWrite(file: *const hdfsFile) -> c_int;
107 |
108 | /// Get read statistics about a file. This is only applicable to files
109 | /// opened for reading.
110 | ///
111 | /// #### Params
112 | /// * ```file``` - The HDFS file
113 | /// * ```stats``` - (out parameter) on a successful return, the read statistics.
114 | /// Unchanged otherwise. You must free the returned statistics with
115 | /// hdfsFileFreeReadStatistics.
116 | ///
117 | /// #### Return
118 | /// * 0 if the statistics were successfully returned,
119 | /// * -1 otherwise. On a failure, please check errno against
120 | /// * ENOTSUP. webhdfs, LocalFilesystem, and so forth may
121 | /// not support read statistics.
122 | pub fn hdfsFileGetReadStatistics(
123 | file: *const hdfsFile,
124 | stats: &mut *mut hdfsReadStatistics,
125 | ) -> c_int;
126 |
127 | /// HDFS read statistics for a file,
128 | ///
129 | /// #### Params
130 | /// * ```stats``` - HDFS read statistics for a file.
131 | ///
132 | /// #### Return
133 | /// Return the number of remote bytes read.
134 | pub fn hdfsReadStatisticsGetRemoteBytesRead(stats: *const hdfsReadStatistics) -> i64;
135 |
136 | /// Free some HDFS read statistics.
137 | ///
138 | /// #### Params
139 | /// * ```stats``` - The HDFS read statistics to free.
140 | pub fn hdfsFileFreeReadStatistics(stats: *mut hdfsReadStatistics);
141 |
142 | /// Connect to a hdfs file system as a specific user.
143 | ///
144 | /// #### Params
145 | /// * ```nn``` - The NameNode. See hdfsBuilderSetNameNode for details.
146 | /// * ```port``` - The port on which the server is listening.
147 | /// * ```param``` - user the user name (this is hadoop domain user).
148 | /// Or ```NULL``` is equivelant to hhdfsConnect(host, port)
149 | ///
150 | /// #### Return
151 | /// Returns a handle to the filesystem or ```NULL``` on error.
152 | pub fn hdfsConnectAsUser(host: *const c_char, u16: u16, user: *const c_char) -> *const hdfsFS;
153 |
154 | /// Connect to a hdfs file system.
155 | ///
156 | /// This API is deprecated. Use hdfsBuilderConnect instead.
157 | ///
158 | /// #### Params
159 | /// * ```nn``` - The NameNode. See hdfsBuilderSetNameNode for details.
160 | /// * ```port``` - The port on which the server is listening.
161 | ///
162 | /// #### Return
163 | /// Returns a handle to the filesystem or ```NULL``` on error.
164 | pub fn hdfsConnect(host: *const c_char, u16: tPort) -> *const hdfsFS;
165 |
166 | /// Connect to an hdfs file system.
167 | ///
168 | /// Forces a new instance to be created. This API is deprecated.
169 | /// Use hdfsBuilderConnect instead.
170 | ///
171 | /// #### Params
172 | /// * ```nn``` - The NameNode. See hdfsBuilderSetNameNode for details.
173 | /// * ```port``` - The port on which the server is listening.
174 | /// * ```user``` - The user name to use when connecting
175 | ///
176 | /// #### Return
177 | /// Returns a handle to the filesystem or ```NULL``` on error.
178 | pub fn hdfsConnectAsUserNewInstance(
179 | host: *const c_char,
180 | u16: tPort,
181 | user: *const c_char,
182 | ) -> *const hdfsFS;
183 |
184 | /// Connect to an hdfs file system.
185 | ///
186 | /// Forces a new instance to be created. This API is deprecated.
187 | /// Use hdfsBuilderConnect instead.
188 | ///
189 | /// #### Params
190 | /// * ```nn``` - The NameNode. See hdfsBuilderSetNameNode for details.
191 | /// * ```port``` - The port on which the server is listening.
192 | ///
193 | /// #### Return
194 | /// Returns a handle to the filesystem or ```NULL``` on error.
195 | pub fn hdfsConnectNewInstance(host: *const c_char, u16: tPort) -> *const hdfsFS;
196 |
197 | /// Connect to HDFS using the parameters defined by the builder.
198 | ///
199 | /// The HDFS builder will be freed, whether or not the connection was successful.
200 | ///
201 | /// Every successful call to hdfsBuilderConnect should be matched with a call
202 | /// to hdfsDisconnect, when the hdfsFS is no longer needed.
203 | ///
204 | /// #### Params
205 | /// * ```bld``` - The HDFS builder
206 | ///
207 | /// #### Return
208 | /// Returns a handle to the filesystem, or ```NULL``` on error.
209 | pub fn hdfsBuilderConnect(bld: *mut hdfsBuilder) -> *const hdfsFS;
210 |
211 | /// Create an HDFS builder.
212 | ///
213 | /// #### Return
214 | /// The HDFS builder, or ```NULL``` on error.
215 | pub fn hdfsNewBuilder() -> *mut hdfsBuilder;
216 |
217 | /// Force the builder to always create a new instance of the FileSystem,
218 | /// rather than possibly finding one in the cache.
219 | ///
220 | /// #### Params
221 | /// * ```bld``` - The HDFS builder
222 | pub fn hdfsBuilderSetForceNewInstance(bld: *mut hdfsBuilder);
223 |
224 | /// Set the HDFS NameNode to connect to.
225 | ///
226 | /// #### Params
227 | /// * bld - The HDFS builder
228 | /// * nn - The NameNode to use. If the string given is 'default', the default NameNode
229 | /// configuration will be used (from the XML configuration files).
230 | /// If ```NULL``` is given, a LocalFileSystem will be created.
231 | /// If the string starts with a protocol type such as ```file://``` or
232 | /// ```hdfs://```, this protocol type will be used. If not, the
233 | /// ```hdfs://``` protocol type will be used.
234 | /// You may specify a NameNode port in the usual way by
235 | /// passing a string of the format ```hdfs://:```.
236 | /// Alternately, you may set the port with hdfsBuilderSetNameNodePort.
237 | /// However, you must not pass the port in two different ways.
238 | pub fn hdfsBuilderSetNameNode(bld: *mut hdfsBuilder, host: *const c_char);
239 |
240 | /// Set the port of the HDFS NameNode to connect to.
241 | ///
242 | /// #### Params
243 | /// * bld - The HDFS builder
244 | /// * port - The port.
245 | pub fn hdfsBuilderSetNameNodePort(bld: *mut hdfsBuilder, port: u16);
246 |
247 | /// Set the username to use when connecting to the HDFS cluster.
248 | ///
249 | /// #### Params
250 | /// * bld - The HDFS builder
251 | /// * userName - The user name. The string will be shallow-copied.
252 | pub fn hdfsBuilderSetUserName(bld: *mut hdfsBuilder, userName: *const c_char);
253 |
254 | /// Set the path to the Kerberos ticket cache to use when connecting to
255 | /// the HDFS cluster.
256 | ///
257 | /// #### Params
258 | /// * ```bld``` - The HDFS builder
259 | /// * ```kerbTicketCachePath``` - The Kerberos ticket cache path. The string
260 | /// will be shallow-copied.
261 | pub fn hdfsBuilderSetKerbTicketCachePath(
262 | bld: *mut hdfsBuilder,
263 | kerbTicketCachePath: *const c_char,
264 | );
265 |
266 | /// Free an HDFS builder.
267 | ///
268 | /// It is normally not necessary to call this function since
269 | /// hdfsBuilderConnect frees the builder.
270 | ///
271 | /// #### Params
272 | /// * ```bld``` - The HDFS builder
273 | pub fn hdfsFreeBuilder(bld: *mut hdfsBuilder);
274 |
275 | /// Set a configuration string for an HdfsBuilder.
276 | ///
277 | /// #### Params
278 | /// * ```key``` - The key to set.
279 | /// * ```val``` - The value, or ```NULL``` to set no value.
280 | /// This will be shallow-copied. You are responsible for
281 | /// ensuring that it remains valid until the builder is freed.
282 | ///
283 | /// #### Return
284 | /// 0 on success; nonzero error code otherwise.
285 | pub fn hdfsBuilderConfSetStr(
286 | bld: *mut hdfsBuilder,
287 | key: *const c_char,
288 | value: *const c_char,
289 | ) -> c_int;
290 |
291 | /// Get a configuration string.
292 | ///
293 | /// #### Params
294 | /// * ```key``` - The key to find
295 | /// * ```val``` - (out param) The value. This will be set to NULL if the
296 | /// key isn't found. You must free this string with
297 | /// ```hdfsConfStrFree```.
298 | ///
299 | /// #### Return
300 | /// 0 on success; nonzero error code otherwise.
301 | /// Failure to find the key is not an error.
302 | pub fn hdfsConfGetStr(value: *const c_char, val: *mut *mut c_char) -> c_int;
303 |
304 | /// Get a configuration integer.
305 | ///
306 | /// #### Params
307 | /// * ```key``` - The key to find
308 | /// * ```val``` - (out param) The value. This will NOT be changed if the
309 | /// key isn't found.
310 | ///
311 | /// #### Return
312 | /// 0 on success; nonzero error code otherwise.
313 | /// Failure to find the key is not an error.
314 | pub fn hdfsConfGetInt(key: *const c_char, val: *mut i32) -> c_int;
315 |
316 | /// Free a configuration string found with hdfsConfGetStr.
317 | ///
318 | /// #### Params
319 | /// * ```val``` - A configuration string obtained from hdfsConfGetStr
320 | pub fn hdfsConfStrFree(val: *const c_char);
321 |
322 | /// hdfsDisconnect - Disconnect from the hdfs file system.
323 | /// Disconnect from hdfs.
324 | ///
325 | /// #### Params
326 | /// * ```fs``` - The configured filesystem handle.
327 | ///
328 | /// #### Return
329 | /// Returns 0 on success, -1 on error.
330 | /// Even if there is an error, the resources associated with the
331 | /// hdfsFS will be freed.
332 | pub fn hdfsDisconnect(fs: *const hdfsFS) -> c_int;
333 |
334 | /// Open a hdfs file in given mode.
335 | ///
336 | /// #### Params
337 | /// * ```fs``` - The configured filesystem handle.
338 | /// * ```file``` - The file handle.
339 | /// * ```flags``` - an ```|``` of ```bits/fcntl.h``` file flags -
340 | /// supported flags are O_RDONLY, O_WRONLY (meaning create or overwrite
341 | /// i.e., implies O_TRUNCAT), O_WRONLY|O_APPEND. Other flags are generally
342 | /// ignored other than (O_RDWR || (O_EXCL & O_CREAT)) which return ```NULL``` and
343 | /// set errno equal ENOTSUP.
344 | /// * ```bufferSize``` - Size of buffer for read/write - pass 0 if you want
345 | /// to use the default configured values.
346 | /// * ```replication``` Block replication - pass 0 if you want to use
347 | /// the default configured values.
348 | /// * ```blocksize``` - Size of block - pass 0 if you want to use the
349 | /// default configured values.
350 | ///
351 | /// #### Return
352 | /// Returns 0 on success, -1 on error. On error, errno will be set appropriately.
353 | /// If the hdfs file was valid, the memory associated with it will
354 | /// be freed at the end of this call, even if there was an I/O error.
355 | pub fn hdfsOpenFile(
356 | fs: *const hdfsFS,
357 | path: *const c_char,
358 | flags: c_int,
359 | bufferSize: c_int,
360 | replication: c_short,
361 | blocksize: i32,
362 | ) -> *const hdfsFile;
363 |
364 | /// Close an open file.
365 | ///
366 | /// #### Params
367 | /// * ```fs``` - The configured filesystem handle.
368 | /// * ```file``` - The file handle.
369 | ///
370 | /// #### Return
371 | /// Returns 0 on success, -1 on error. On error, errno will be set
372 | /// appropriately.If the hdfs file was valid, the memory associated
373 | /// with it will be freed at the end of this call, even if there was
374 | /// an I/O error.
375 | pub fn hdfsCloseFile(fs: *const hdfsFS, file: *const hdfsFile) -> c_int;
376 |
377 | /// Checks if a given path exsits on the filesystem
378 | ///
379 | /// #### Params
380 | /// * ```fs``` - The configured filesystem handle.
381 | /// * ```path``` - The path to look for
382 | ///
383 | /// #### Return
384 | /// Returns 0 on success, -1 on error.
385 | pub fn hdfsExists(fs: *const hdfsFS, path: *const c_char) -> c_int;
386 |
387 | /// Seek to given offset in file.
388 | ///
389 | /// This works only for files opened in read-only mode.
390 | ///
391 | /// #### Params
392 | /// ```fs``` The configured filesystem handle.
393 | /// ```file``` The file handle.
394 | /// ```desiredPos``` Offset into the file to seek into.
395 | ///
396 | /// #### Return
397 | /// @return Returns 0 on success, -1 on error.
398 | pub fn hdfsSeek(fs: *const hdfsFS, file: *const hdfsFile, desiredPos: tOffset) -> c_int;
399 |
400 | /// Get the current offset in the file, in bytes.
401 | ///
402 | /// #### Params
403 | ///
404 | /// ```fs``` - The configured filesystem handle.
405 | /// ```file``` - The file handle.
406 | ///
407 | /// #### Return
408 | /// Current offset, -1 on error.
409 | pub fn hdfsTell(fs: *const hdfsFS, file: *const hdfsFile) -> tOffset;
410 |
411 | /// Read data from an open file.
412 | ///
413 | /// #### Params
414 | /// * ```fs``` - The configured filesystem handle.
415 | /// * ```file``` - The file handle.
416 | /// * ```buffer``` - The buffer to copy read bytes into.
417 | /// * ```length``` - The length of the buffer.
418 | ///
419 | /// #### Return
420 | /// On success, a positive number indicating how many bytes were read.
421 | /// On end-of-file, 0. On error, -1. Errno will be set to the error code.
422 | /// Just like the POSIX read function, hdfsRead will return -1
423 | /// and set errno to EINTR if data is temporarily unavailable,
424 | /// but we are not yet at the end of the file.
425 | pub fn hdfsRead(
426 | fs: *const hdfsFS,
427 | file: *const hdfsFile,
428 | buffer: *mut c_void,
429 | length: tSize,
430 | ) -> tSize;
431 |
432 | /// Positional read of data from an open file.
433 | ///
434 | /// #### Params
435 | /// * ```fs``` - The configured filesystem handle.
436 | /// * ```file``` - The file handle.
437 | /// * ```position``` - Position from which to read
438 | /// * ```buffer``` - The buffer to copy read bytes into.
439 | /// * ```length``` - The length of the buffer.
440 | ///
441 | /// #### Return
442 | /// See hdfsRead
443 | pub fn hdfsPread(
444 | fs: *const hdfsFS,
445 | file: *const hdfsFile,
446 | position: tOffset,
447 | buffer: *mut c_void,
448 | length: tSize,
449 | ) -> tSize;
450 |
451 | /// Write data into an open file.
452 | ///
453 | /// #### Params
454 | /// * ```fs``` - The configured filesystem handle.
455 | /// * ```file``` - The file handle.
456 | /// * ```buffer``` - The data.
457 | /// * ```length``` - The no. of bytes to write.
458 | ///
459 | /// #### Return
460 | /// the number of bytes written, -1 on error.
461 | pub fn hdfsWrite(
462 | fs: *const hdfsFS,
463 | file: *const hdfsFile,
464 | buffer: *const c_void,
465 | length: tSize,
466 | ) -> tSize;
467 |
468 | /// Flush the data.
469 | ///
470 | /// #### Params
471 | /// * ```fs``` - The configured filesystem handle.
472 | /// * ```file``` - The file handle.
473 | ///
474 | /// #### Return
475 | /// Returns 0 on success, -1 on error.
476 | pub fn hdfsFlush(fs: *const hdfsFS, file: *const hdfsFile) -> c_int;
477 |
478 | /// Flush out the data in client's user buffer. After the return of this
479 | /// call, new readers will see the data.
480 | ///
481 | /// #### Params
482 | /// * ```fs``` - The configured filesystem handle.
483 | /// * ```file``` - The file handle.
484 | ///
485 | /// #### Return
486 | /// 0 on success, -1 on error and sets errno
487 | pub fn hdfsHFlush(fs: *const hdfsFS, file: *const hdfsFile) -> c_int;
488 |
489 | /// Similar to posix fsync, Flush out the data in client's
490 | /// user buffer. all the way to the disk device (but the disk may have
491 | /// it in its cache).
492 | ///
493 | /// #### Params
494 | /// * ```fs``` - The configured filesystem handle.
495 | /// * ```file``` - The file handle.
496 | ///
497 | /// #### Return
498 | /// 0 on success, -1 on error and sets errno
499 | pub fn hdfsHSync(fs: *const hdfsFS, file: *const hdfsFile) -> c_int;
500 |
501 | /// Number of bytes that can be read from this input stream without
502 | /// blocking.
503 | ///
504 | /// #### Params
505 | /// * ```fs``` - The configured filesystem handle.
506 | /// * ```file``` - The file handle.
507 | ///
508 | /// #### Return
509 | /// 0 on success, -1 on error and sets errno
510 | pub fn hdfsAvailable(fs: *const hdfsFS, file: *const hdfsFile) -> c_int;
511 |
512 | /// Copy file from one filesystem to another.
513 | ///
514 | /// #### Params
515 | /// * ```srcFS``` - The handle to source filesystem.
516 | /// * ```src``` - The path of source file.
517 | /// * ```dstFS``` - The handle to destination filesystem.
518 | /// * ```dst``` - The path of destination file.
519 | ///
520 | /// #### Return
521 | /// Returns 0 on success, -1 on error.
522 | pub fn hdfsCopy(
523 | srcFS: *const hdfsFS,
524 | src: *const c_char,
525 | dstFS: *const hdfsFS,
526 | dst: *const c_char,
527 | ) -> c_int;
528 |
529 | /// Move file from one filesystem to another.
530 | ///
531 | /// #### Params
532 | /// * ```srcFS``` - The handle to source filesystem.
533 | /// * ```src``` - The path of source file.
534 | /// * ```dstFS``` - The handle to destination filesystem.
535 | /// * ```dst``` - The path of destination file.
536 | ///
537 | /// #### Return
538 | /// Returns 0 on success, -1 on error.
539 | pub fn hdfsMove(
540 | srcFS: *const hdfsFS,
541 | src: *const c_char,
542 | dstFS: *const hdfsFS,
543 | dst: *const c_char,
544 | ) -> c_int;
545 |
546 | /// Delete file.
547 | ///
548 | /// #### Params
549 | /// * ```fs``` - The configured filesystem handle.
550 | /// * ```path``` - The path of the file.
551 | /// * ```recursive``` - if path is a directory and set to
552 | /// non-zero, the directory is deleted else throws an exception. In
553 | /// case of a file the recursive argument is irrelevant.
554 | ///
555 | /// #### Return
556 | /// Returns 0 on success, -1 on error.
557 | pub fn hdfsDelete(fs: *const hdfsFS, path: *const c_char, recursive: c_int) -> c_int;
558 |
559 | /// Rename file.
560 | ///
561 | /// #### Params
562 | /// * ```fs``` - The configured filesystem handle.
563 | /// * ```oldPath``` - The path of the source file.
564 | /// * ```newPath``` - The path of the destination file.
565 | ///
566 | /// #### Return
567 | /// Returns 0 on success, -1 on error.
568 | pub fn hdfsRename(fs: *const hdfsFS, oldPath: *const c_char, newPath: *const c_char) -> c_int;
569 |
570 | /// Get the current working directory for the given filesystem.
571 | ///
572 | /// #### Params
573 | /// * ```fs``` - The configured filesystem handle.
574 | /// * ```buffer``` - The user-buffer to copy path of cwd into.
575 | /// * ```bufferSize``` - The length of user-buffer.
576 | ///
577 | /// #### Return
578 | /// Returns buffer, ```NULL``` on error.
579 | pub fn hdfsGetWorkingDirectory(
580 | fs: *const hdfsFS,
581 | buffer: *mut c_char,
582 | bufferSize: size_t,
583 | ) -> *mut c_char;
584 |
585 | /// Set the working directory. All relative paths will be resolved relative
586 | /// to it.
587 | ///
588 | /// #### Params
589 | /// * ```fs``` - The configured filesystem handle.
590 | /// * ```path``` - The path of the new 'cwd'.
591 | ///
592 | /// #### Return
593 | /// Returns 0 on success, -1 on error.
594 | pub fn hdfsSetWorkingDirectory(fs: *const hdfsFS, path: *const c_char) -> c_int;
595 |
596 | /// Make the given file and all non-existent parents into directories.
597 | ///
598 | /// #### Params
599 | /// * ```fs``` - The configured filesystem handle.
600 | /// * ```path``` - The path of the directory.
601 | ///
602 | /// #### Return
603 | /// Returns 0 on success, -1 on error.
604 | pub fn hdfsCreateDirectory(fs: *const hdfsFS, path: *const c_char) -> c_int;
605 |
606 | /// Set the replication of the specified file to the supplied value
607 | ///
608 | /// #### Params
609 | /// * ```fs``` The configured filesystem handle.
610 | /// * ```path``` The path of the directory.
611 | ///
612 | /// #### Return
613 | /// Returns 0 on success, -1 on error.
614 | pub fn hdfsSetReplication(fs: *const hdfsFS, path: *const c_char, replication: i16) -> c_int;
615 |
616 | /// Get list of files/directories for a given directory-path.
617 | /// hdfsFreeFileInfo should be called to deallocate memory.
618 | ///
619 | /// #### Params
620 | /// * ```fs``` - The configured filesystem handle.
621 | /// * ```path``` - The path of the directory.
622 | /// * ```numEntries``` - Set to the number of files/directories in path.
623 | ///
624 | /// #### Return
625 | /// Returns a dynamically-allocated array of hdfsFileInfo objects; ```NULL``` on
626 | /// error.
627 | pub fn hdfsListDirectory(
628 | fs: *const hdfsFS,
629 | path: *const c_char,
630 | numEntries: *mut c_int,
631 | ) -> *const hdfsFileInfo;
632 |
633 | /// Get information about a path as a (dynamically allocated) single
634 | /// hdfsFileInfo struct. hdfsFreeFileInfo should be called when the
635 | /// pointer is no longer needed.
636 | ///
637 | /// #### Params
638 | /// * ```fs``` - The configured filesystem handle.
639 | /// * ```path``` The path of the file.
640 | ///
641 | /// #### Params
642 | /// Returns a dynamically-allocated hdfsFileInfo object; ```NULL``` on error.
643 | pub fn hdfsGetPathInfo(fs: *const hdfsFS, path: *const c_char) -> *const hdfsFileInfo;
644 |
645 | /// Free up the hdfsFileInfo array (including fields)
646 | ///
647 | /// #### Params
648 | /// * ```hdfsFileInfo``` The array of dynamically-allocated hdfsFileInfo objects.
649 | /// * ```numEntries``` The size of the array.
650 | pub fn hdfsFreeFileInfo(hdfsFileInfo: *const hdfsFileInfo, numEntries: c_int);
651 |
652 | /// hdfsFileIsEncrypted: determine if a file is encrypted based on its
653 | /// hdfsFileInfo.
654 | ///
655 | /// #### Return
656 | /// -1 if there was an error (errno will be set), 0 if the file is
657 | /// not encrypted, 1 if the file is encrypted.
658 | pub fn hdfsFileIsEncrypted(hdfsFileInfo: *const hdfsFileInfo) -> c_int;
659 |
660 | /// Get hostnames where a particular block (determined by pos & blocksize)
661 | /// of a file is stored. The last element in the array is ```NULL```.
662 | /// Due to replication, a single block could be present on multiple hosts.
663 | ///
664 | /// #### Params
665 | /// * ```fs``` The configured filesystem handle.
666 | /// * ```path``` - The path of the file.
667 | /// * ```start``` - The start of the block.
668 | /// * ```length``` - The length of the block.
669 | ///
670 | /// #### Return
671 | /// Returns a dynamically-allocated 2-d array of blocks-hosts; ```NULL```
672 | /// on error.
673 | pub fn hdfsGetHosts(
674 | fs: *const hdfsFS,
675 | path: *const c_char,
676 | start: tOffset,
677 | length: tOffset,
678 | ) -> *const *const *const c_char;
679 |
680 | /// Free up the structure returned by hdfsGetHosts
681 | ///
682 | /// #### Params
683 | /// * ```hdfsFileInfo``` - The array of dynamically-allocated
684 | /// hdfsFileInfo objects.
685 | /// * ```numEntries``` - The size of the array.
686 | pub fn hdfsFreeHosts(blockHosts: *const *const *const c_char);
687 |
688 | /// Get the default blocksize.
689 | ///
690 | /// This API is deprecated. Use hdfsGetDefaultBlockSizeAtPath instead.
691 | ///
692 | /// #### Params
693 | /// * ```fs``` - The configured filesystem handle.
694 | ///
695 | /// #### Return
696 | /// Returns the default blocksize, or -1 on error.
697 | pub fn hdfsGetDefaultBlockSize(fs: *const hdfsFS) -> tOffset;
698 |
699 | /// Get the default blocksize at the filesystem indicated by a given path.
700 | ///
701 | /// #### Params
702 | /// * ```fs``` - The configured filesystem handle.
703 | /// * ```path``` - The given path will be used to locate the actual
704 | /// filesystem. The full path does not have to exist.
705 | ///
706 | /// #### Return
707 | /// Returns the default blocksize, or -1 on error.
708 | pub fn hdfsGetDefaultBlockSizeAtPath(fs: *const hdfsFS, path: *const c_char) -> tOffset;
709 |
710 | /// Return the raw capacity of the filesystem.
711 | ///
712 | /// #### Params
713 | /// * ```fs``` - The configured filesystem handle.
714 | ///
715 | /// #### Return
716 | /// Returns the raw-capacity; -1 on error.
717 | pub fn hdfsGetCapacity(fs: *const hdfsFS) -> tOffset;
718 |
719 | /// Return the total raw size of all files in the filesystem.
720 | ///
721 | /// #### Params
722 | /// * ```fs``` - The configured filesystem handle.
723 | /// #### Return
724 | /// Returns the total-size; -1 on error.
725 | pub fn hdfsGetUsed(fs: *const hdfsFS) -> tOffset;
726 |
727 | /// Change the user and/or group of a file or directory.
728 | ///
729 | /// #### Params
730 | /// * ```fs``` - The configured filesystem handle.
731 | /// * ```path``` - the path to the file or directory
732 | /// * ```owner``` - User string. Set to ```NULL``` for 'no change'
733 | /// * ```group``` - Group string. Set to ```NULL``` for 'no change'
734 | ///
735 | /// #### Return
736 | /// 0 on success else -1
737 | pub fn hdfsChown(
738 | fs: *const hdfsFS,
739 | path: *const c_char,
740 | owner: *const c_char,
741 | group: *const c_char,
742 | ) -> c_int;
743 |
744 | /// hdfsChmod
745 | ///
746 | /// #### Params
747 | /// * ```fs``` - The configured filesystem handle.
748 | /// * ```path``` - the path to the file or directory
749 | ///
750 | /// #### Return
751 | /// 0 on success else -1
752 | pub fn hdfsChmod(fs: *const hdfsFS, path: *const c_char, mode: c_short) -> c_int;
753 |
754 | /// hdfsUtime
755 | ///
756 | /// #### Params
757 | /// * ```fs``` - The configured filesystem handle.
758 | /// * ```path``` - the path to the file or directory
759 | /// * ```mtime``` - new modification time or -1 for no change
760 | /// * ```atime``` - new access time or -1 for no change
761 | ///
762 | /// #### Return
763 | /// 0 on success else -1
764 | pub fn hdfsUtime(fs: *const hdfsFS, path: *const c_char, mtime: tTime, atime: tTime) -> c_int;
765 |
766 | /// Allocate a zero-copy options structure.
767 | ///
768 | /// You must free all options structures allocated with this function using
769 | /// hadoopRzOptionsFree.
770 | ///
771 | /// #### Return
772 | /// A zero-copy options structure, or ```NULL``` if one could not be allocated.
773 | /// If ```NULL``` is returned, errno will contain the error number.
774 | pub fn hadoopRzOptionsAlloc() -> *const hadoopRzOptions;
775 |
776 | /// Determine whether we should skip checksums in read0.
777 | ///
778 | /// #### Params
779 | /// * ```opts``` - The options structure.
780 | /// * ```skip``` - Nonzero to skip checksums sometimes; zero to always
781 | /// check them.
782 | ///
783 | /// #### Return
784 | /// 0 on success; -1 plus errno on failure.
785 | pub fn hadoopRzOptionsSetSkipChecksum(opts: *const hadoopRzOptions, skip: c_int) -> c_int;
786 |
787 | /// Set the ByteBufferPool to use with read0.
788 | ///
789 | /// #### Params
790 | /// * ```opts``` - The options structure.
791 | /// * ```className``` - If this is ```NULL```, we will not use any
792 | /// ByteBufferPool. If this is non-NULL, it will be
793 | /// treated as the name of the pool class to use.
794 | /// For example, you can use ELASTIC_BYTE_BUFFER_POOL_CLASS.
795 | ///
796 | /// #### Return
797 | /// 0 if the ByteBufferPool class was found and instantiated;
798 | /// -1 plus errno otherwise.
799 | pub fn hadoopRzOptionsSetByteBufferPool(
800 | opts: *const hadoopRzOptions,
801 | className: *const c_char,
802 | ) -> c_int;
803 |
804 | /// Free a hadoopRzOptionsFree structure.
805 | ///
806 | /// #### Params
807 | /// * ```opts``` - The options structure to free.
808 | /// Any associated ByteBufferPool will also be freed.
809 | pub fn hadoopRzOptionsFree(opts: *const hadoopRzOptions);
810 |
811 | /// Perform a byte buffer read. If possible, this will be a zero-copy
812 | /// (mmap) read.
813 | ///
814 | /// #### Params
815 | /// * ```file``` - The file to read from.
816 | /// * ```opts``` - An options structure created by hadoopRzOptionsAlloc.
817 | /// * ```maxLength``` - The maximum length to read. We may read fewer bytes
818 | /// than this length.
819 | ///
820 | /// #### Return
821 | /// On success, we will return a new hadoopRzBuffer. This buffer will
822 | /// continue to be valid and readable until it is released by
823 | /// readZeroBufferFree. Failure to release a buffer will lead to a memory
824 | /// leak. You can access the data within the hadoopRzBuffer with
825 | /// hadoopRzBufferGet. If you have reached EOF, the data within the
826 | /// hadoopRzBuffer will be ```NULL```. You must still free hadoopRzBuffer
827 | /// instances containing ```NULL```.
828 | ///
829 | /// On failure, we will return ```NULL``` plus an errno code.
830 | /// ```errno = EOPNOTSUPP``` indicates that we could not do a zero-copy
831 | /// read, and there was no ByteBufferPool supplied.
832 | pub fn hadoopReadZero(
833 | file: *const hdfsFile,
834 | opts: *const hadoopRzOptions,
835 | maxLength: i32,
836 | ) -> *const hadoopRzBuffer;
837 |
838 | /// Determine the length of the buffer returned from readZero.
839 | ///
840 | /// #### Params
841 | /// * ```buffer``` - a buffer returned from readZero.
842 | ///
843 | /// #### Return
844 | /// the length of the buffer.
845 | pub fn hadoopRzBufferLength(buffer: *const hadoopRzBuffer) -> i32;
846 |
847 | /// Get a pointer to the raw buffer returned from readZero.
848 | ///
849 | /// #### Params
850 | /// * ```buffer``` - a buffer returned from readZero.
851 | ///
852 | /// #### Return
853 | /// a pointer to the start of the buffer. This will be ```NULL``` when
854 | /// end-of-file has been reached.
855 | pub fn hadoopRzBufferGet(buffer: *const hadoopRzBuffer) -> *const c_void;
856 |
857 | /// Release a buffer obtained through readZero.
858 | ///
859 | /// #### Params
860 | /// * ```file``` - The hdfs stream that created this buffer. This must be
861 | /// the same stream you called hadoopReadZero on.
862 | ///
863 | /// #### Return
864 | /// The buffer to release.
865 | pub fn hadoopRzBufferFree(file: *const hdfsFile, buffer: *const hadoopRzBuffer);
866 | }
867 |
--------------------------------------------------------------------------------
/src/util.rs:
--------------------------------------------------------------------------------
1 | // Licensed to the Apache Software Foundation (ASF) under one
2 | // or more contributor license agreements. See the NOTICE file
3 | // distributed with this work for additional information
4 | // regarding copyright ownership. The ASF licenses this file
5 | // to you under the Apache License, Version 2.0 (the
6 | // "License"); you may not use this file except in compliance
7 | // with the License. You may obtain a copy of the License at
8 | //
9 | // http://www.apache.org/licenses/LICENSE-2.0
10 | //
11 | // Unless required by applicable law or agreed to in writing,
12 | // software distributed under the License is distributed on an
13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | // KIND, either express or implied. See the License for the
15 | // specific language governing permissions and limitations
16 | // under the License.
17 |
18 | use std::str;
19 |
20 | use crate::dfs::HdfsFs;
21 | use crate::err::HdfsErr;
22 | use crate::raw::*;
23 |
24 | #[macro_export]
25 | macro_rules! to_raw {
26 | ($str:expr) => {{
27 | let c_str = std::ffi::CString::new($str).unwrap();
28 | c_str.into_raw()
29 | }};
30 | }
31 |
32 | #[macro_export]
33 | macro_rules! from_raw {
34 | ($chars:expr) => {{
35 | let slice = unsafe { std::ffi::CStr::from_ptr($chars) }.to_bytes();
36 | std::str::from_utf8(slice).unwrap()
37 | }};
38 | }
39 |
40 | // pub fn chars_to_str<'a>(chars: *const c_char) -> &'a str {
41 | // let slice = unsafe { CStr::from_ptr(chars) }.to_bytes();
42 | // str::from_utf8(slice).unwrap()
43 | // }
44 |
45 | #[macro_export]
46 | macro_rules! b2i {
47 | ($b:expr) => {{
48 | if $b {
49 | 1
50 | } else {
51 | 0
52 | }
53 | }};
54 | }
55 |
56 | /// Hdfs Utility
57 | pub struct HdfsUtil;
58 |
59 | /// HDFS Utility
60 | impl HdfsUtil {
61 | /// Copy file from one filesystem to another.
62 | ///
63 | /// #### Params
64 | /// * ```srcFS``` - The handle to source filesystem.
65 | /// * ```src``` - The path of source file.
66 | /// * ```dstFS``` - The handle to destination filesystem.
67 | /// * ```dst``` - The path of destination file.
68 | pub fn copy(src_fs: &HdfsFs, src: &str, dst_fs: &HdfsFs, dst: &str) -> Result {
69 | let res = unsafe { hdfsCopy(src_fs.raw(), to_raw!(src), dst_fs.raw(), to_raw!(dst)) };
70 |
71 | if res == 0 {
72 | Ok(true)
73 | } else {
74 | Err(HdfsErr::Unknown)
75 | }
76 | }
77 |
78 | /// Move file from one filesystem to another.
79 | ///
80 | /// #### Params
81 | /// * ```srcFS``` - The handle to source filesystem.
82 | /// * ```src``` - The path of source file.
83 | /// * ```dstFS``` - The handle to destination filesystem.
84 | /// * ```dst``` - The path of destination file.
85 | pub fn mv(src_fs: &HdfsFs, src: &str, dst_fs: &HdfsFs, dst: &str) -> Result {
86 | let res = unsafe { hdfsMove(src_fs.raw(), to_raw!(src), dst_fs.raw(), to_raw!(dst)) };
87 |
88 | if res == 0 {
89 | Ok(true)
90 | } else {
91 | Err(HdfsErr::Unknown)
92 | }
93 | }
94 | }
95 |
--------------------------------------------------------------------------------