├── NOTICE.txt ├── .github ├── dependabot.yml ├── ISSUE_TEMPLATE │ ├── question.md │ ├── bug_report.md │ └── feature_request.md ├── pull_request_template.md └── workflows │ ├── dev.yml │ ├── take.yml │ ├── audit.yml │ ├── rust.yml │ └── ci.yml ├── dev └── release │ ├── rat_exclude_files.txt │ ├── run-rat.sh │ ├── check-rat-report.py │ ├── remove-old-artifacts.sh │ ├── release-tarball.sh │ ├── update_change_log.sh │ ├── verify-release-candidate.sh │ ├── create-tarball.sh │ └── README.md ├── .cargo └── config.toml ├── src ├── client │ ├── http │ │ ├── mod.rs │ │ ├── spawn.rs │ │ └── body.rs │ ├── dns.rs │ ├── parts.rs │ ├── pagination.rs │ ├── list.rs │ ├── mock_server.rs │ ├── s3.rs │ ├── header.rs │ ├── token.rs │ └── backoff.rs ├── aws │ ├── checksum.rs │ ├── resolve.rs │ └── precondition.rs ├── signer.rs ├── tags.rs ├── list.rs ├── multipart.rs ├── config.rs ├── path │ └── parts.rs ├── attributes.rs ├── chunked.rs ├── delimited.rs ├── prefix.rs └── payload.rs ├── .github_changelog_generator ├── deny.toml ├── .asf.yaml ├── .gitignore ├── tests ├── http.rs └── get_range_file.rs ├── Cargo.toml ├── README.md └── CONTRIBUTING.md /NOTICE.txt: -------------------------------------------------------------------------------- 1 | Apache Arrow Object Store 2 | Copyright 2020-2024 The Apache Software Foundation 3 | 4 | This product includes software developed at 5 | The Apache Software Foundation (http://www.apache.org/). 6 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: cargo 4 | directory: "/" 5 | schedule: 6 | interval: daily 7 | open-pull-requests-limit: 10 8 | target-branch: main 9 | labels: [ auto-dependencies ] 10 | - package-ecosystem: "github-actions" 11 | directory: "/" 12 | schedule: 13 | interval: "daily" 14 | open-pull-requests-limit: 10 15 | labels: [ auto-dependencies ] 16 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/question.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Question 3 | about: Ask question about this project 4 | title: '' 5 | labels: question 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Which part is this question about** 11 | 14 | 15 | **Describe your question** 16 | 19 | 20 | **Additional context** 21 | 24 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: bug 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | 14 | 15 | **To Reproduce** 16 | 19 | 20 | **Expected behavior** 21 | 24 | 25 | **Additional context** 26 | -------------------------------------------------------------------------------- /dev/release/rat_exclude_files.txt: -------------------------------------------------------------------------------- 1 | venv/* 2 | testing/* 3 | target/* 4 | dev/release/rat_exclude_files.txt 5 | arrow/test/data/* 6 | arrow-csv/test/data/* 7 | arrow-json/test/data/* 8 | arrow/test/dependency/* 9 | arrow-integration-test/data/* 10 | parquet_derive/test/dependency/* 11 | .gitattributes 12 | **.gitignore 13 | .gitmodules 14 | Cargo.lock 15 | filtered_rat.txt 16 | rat.txt 17 | # auto-generated 18 | arrow-flight/src/arrow.flight.protocol.rs 19 | arrow-flight/src/sql/arrow.flight.protocol.sql.rs 20 | .github/* 21 | parquet/src/bin/parquet-fromcsv-help.txt 22 | arrow-flight/examples/data/* 23 | -------------------------------------------------------------------------------- /.cargo/config.toml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | [target.wasm32-unknown-unknown] 19 | rustflags = ['--cfg', 'getrandom_backend="wasm_js"'] -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: enhancement 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem or challenge? Please describe what you are trying to do.** 11 | 15 | 16 | **Describe the solution you'd like** 17 | 20 | 21 | **Describe alternatives you've considered** 22 | 25 | 26 | **Additional context** 27 | 30 | -------------------------------------------------------------------------------- /src/client/http/mod.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | //! HTTP client abstraction 19 | 20 | mod body; 21 | pub use body::*; 22 | 23 | mod connection; 24 | pub use connection::*; 25 | 26 | mod spawn; 27 | pub use spawn::*; 28 | -------------------------------------------------------------------------------- /.github/pull_request_template.md: -------------------------------------------------------------------------------- 1 | # Which issue does this PR close? 2 | 3 | 6 | 7 | Closes #. 8 | 9 | # Rationale for this change 10 | 11 | 15 | 16 | # What changes are included in this PR? 17 | 18 | 21 | 22 | # Are there any user-facing changes? 23 | 24 | 25 | 28 | 29 | 32 | -------------------------------------------------------------------------------- /.github_changelog_generator: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Licensed to the Apache Software Foundation (ASF) under one 4 | # or more contributor license agreements. See the NOTICE file 5 | # distributed with this work for additional information 6 | # regarding copyright ownership. The ASF licenses this file 7 | # to you under the Apache License, Version 2.0 (the 8 | # "License"); you may not use this file except in compliance 9 | # with the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, 14 | # software distributed under the License is distributed on an 15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | # KIND, either express or implied. See the License for the 17 | # specific language governing permissions and limitations 18 | # under the License. 19 | # 20 | 21 | # Add special sections for documentation, security and performance 22 | add-sections={"documentation":{"prefix":"**Documentation updates:**","labels":["documentation"]},"security":{"prefix":"**Security updates:**","labels":["security"]},"performance":{"prefix":"**Performance improvements:**","labels":["performance"]}} 23 | # so that the component is shown associated with the issue 24 | issue-line-labels=object-store 25 | # skip non object_store issues 26 | exclude-labels=development-process,invalid 27 | breaking_labels=api-change 28 | -------------------------------------------------------------------------------- /.github/workflows/dev.yml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | name: dev 19 | 20 | concurrency: 21 | group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }} 22 | cancel-in-progress: true 23 | 24 | # trigger for all PRs and changes to main 25 | on: 26 | push: 27 | branches: 28 | - main 29 | pull_request: 30 | 31 | jobs: 32 | 33 | rat: 34 | name: Release Audit Tool (RAT) 35 | runs-on: ubuntu-latest 36 | steps: 37 | - uses: actions/checkout@v6 38 | - name: Setup Python 39 | uses: actions/setup-python@v6 40 | with: 41 | python-version: 3.8 42 | - name: Audit licenses 43 | run: ./dev/release/run-rat.sh . -------------------------------------------------------------------------------- /.github/workflows/take.yml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | name: Assign the issue via a `take` comment 19 | on: 20 | issue_comment: 21 | types: created 22 | 23 | permissions: 24 | issues: write 25 | 26 | jobs: 27 | issue_assign: 28 | if: (!github.event.issue.pull_request) && github.event.comment.body == 'take' 29 | runs-on: ubuntu-latest 30 | steps: 31 | - uses: actions/github-script@v8 32 | with: 33 | script: | 34 | github.rest.issues.addAssignees({ 35 | owner: context.repo.owner, 36 | repo: context.repo.repo, 37 | issue_number: context.issue.number, 38 | assignees: [context.payload.comment.user.login], 39 | }) 40 | -------------------------------------------------------------------------------- /.github/workflows/audit.yml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | name: audit 19 | 20 | concurrency: 21 | group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }} 22 | cancel-in-progress: true 23 | 24 | # trigger for all PRs that touch certain files and changes to main 25 | on: 26 | push: 27 | branches: 28 | - main 29 | pull_request: 30 | paths: 31 | - '**/Cargo.toml' 32 | - '**/Cargo.lock' 33 | 34 | jobs: 35 | cargo-audit: 36 | name: Audit 37 | runs-on: ubuntu-latest 38 | steps: 39 | - uses: actions/checkout@v6 40 | - name: Install cargo-audit 41 | run: cargo install cargo-audit 42 | - name: Run audit check 43 | run: cargo audit 44 | -------------------------------------------------------------------------------- /deny.toml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | # Configuration documentation: 19 | #  https://embarkstudios.github.io/cargo-deny/index.html 20 | 21 | [advisories] 22 | vulnerability = "deny" 23 | yanked = "deny" 24 | unmaintained = "warn" 25 | notice = "warn" 26 | ignore = [ 27 | ] 28 | git-fetch-with-cli = true 29 | 30 | [licenses] 31 | default = "allow" 32 | unlicensed = "allow" 33 | copyleft = "allow" 34 | 35 | [bans] 36 | multiple-versions = "warn" 37 | deny = [ 38 | # We are using rustls as the TLS implementation, so we shouldn't be linking 39 | # in OpenSSL too. 40 | # 41 | # If you're hitting this, you might want to take a look at what new 42 | # dependencies you have introduced and check if there's a way to depend on 43 | # rustls instead of OpenSSL (tip: check the crate's feature flags). 44 | { name = "openssl-sys" } 45 | ] 46 | -------------------------------------------------------------------------------- /dev/release/run-rat.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Licensed to the Apache Software Foundation (ASF) under one 4 | # or more contributor license agreements. See the NOTICE file 5 | # distributed with this work for additional information 6 | # regarding copyright ownership. The ASF licenses this file 7 | # to you under the Apache License, Version 2.0 (the 8 | # "License"); you may not use this file except in compliance 9 | # with the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, 14 | # software distributed under the License is distributed on an 15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | # KIND, either express or implied. See the License for the 17 | # specific language governing permissions and limitations 18 | # under the License. 19 | # 20 | 21 | RAT_VERSION=0.13 22 | 23 | # download apache rat 24 | if [ ! -f apache-rat-${RAT_VERSION}.jar ]; then 25 | curl -s https://repo1.maven.org/maven2/org/apache/rat/apache-rat/${RAT_VERSION}/apache-rat-${RAT_VERSION}.jar > apache-rat-${RAT_VERSION}.jar 26 | fi 27 | 28 | RAT="java -jar apache-rat-${RAT_VERSION}.jar -x " 29 | 30 | RELEASE_DIR=$(cd "$(dirname "$BASH_SOURCE")"; pwd) 31 | 32 | # generate the rat report 33 | $RAT $1 > rat.txt 34 | python $RELEASE_DIR/check-rat-report.py $RELEASE_DIR/rat_exclude_files.txt rat.txt > filtered_rat.txt 35 | cat filtered_rat.txt 36 | UNAPPROVED=`cat filtered_rat.txt | grep "NOT APPROVED" | wc -l` 37 | 38 | if [ "0" -eq "${UNAPPROVED}" ]; then 39 | echo "No unapproved licenses" 40 | else 41 | echo "${UNAPPROVED} unapproved licences. Check rat report: rat.txt" 42 | exit 1 43 | fi 44 | -------------------------------------------------------------------------------- /.asf.yaml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | # Documentation can be found here: 19 | # https://cwiki.apache.org/confluence/pages/viewpage.action?pageId=127405038 20 | 21 | notifications: 22 | commits: commits@arrow.apache.org 23 | issues: github@arrow.apache.org 24 | pullrequests: github@arrow.apache.org 25 | discussions: github@arrow.apache.org 26 | jira_options: link label worklog 27 | github: 28 | description: "Rust object_store crate" 29 | homepage: https://crates.io/crates/object_store 30 | labels: 31 | - object-store 32 | enabled_merge_buttons: 33 | squash: true 34 | merge: false 35 | rebase: false 36 | features: 37 | issues: true 38 | discussions: true 39 | protected_branches: 40 | main: 41 | required_status_checks: 42 | # require branches to be up-to-date before merging 43 | strict: true 44 | # don't require any jobs to pass 45 | contexts: [] 46 | -------------------------------------------------------------------------------- /src/client/dns.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | use std::net::ToSocketAddrs; 19 | 20 | use rand::prelude::SliceRandom; 21 | use reqwest::dns::{Addrs, Name, Resolve, Resolving}; 22 | use tokio::task::JoinSet; 23 | 24 | type DynErr = Box; 25 | 26 | #[derive(Debug)] 27 | pub(crate) struct ShuffleResolver; 28 | 29 | impl Resolve for ShuffleResolver { 30 | fn resolve(&self, name: Name) -> Resolving { 31 | Box::pin(async move { 32 | // use `JoinSet` to propagate cancelation 33 | let mut tasks = JoinSet::new(); 34 | tasks.spawn_blocking(move || { 35 | let it = (name.as_str(), 0).to_socket_addrs()?; 36 | let mut addrs = it.collect::>(); 37 | 38 | addrs.shuffle(&mut rand::rng()); 39 | 40 | Ok(Box::new(addrs.into_iter()) as Addrs) 41 | }); 42 | 43 | tasks 44 | .join_next() 45 | .await 46 | .expect("spawned on task") 47 | .map_err(|err| Box::new(err) as DynErr)? 48 | }) 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /.github/workflows/rust.yml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | # workspace wide tests 19 | name: rust 20 | 21 | concurrency: 22 | group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }} 23 | cancel-in-progress: true 24 | 25 | # trigger for all PRs and changes to main 26 | on: 27 | push: 28 | branches: 29 | - main 30 | pull_request: 31 | 32 | jobs: 33 | # Run cargo fmt for all crates 34 | lint: 35 | name: Lint (cargo fmt) 36 | runs-on: ubuntu-latest 37 | container: 38 | image: amd64/rust 39 | steps: 40 | - uses: actions/checkout@v6 41 | - name: Setup rustfmt 42 | run: rustup component add rustfmt 43 | - name: Format object_store 44 | run: cargo fmt --all -- --check 45 | 46 | msrv: 47 | name: Verify MSRV (Minimum Supported Rust Version) 48 | runs-on: ubuntu-latest 49 | container: 50 | image: amd64/rust 51 | steps: 52 | - uses: actions/checkout@v6 53 | - name: Install cargo-msrv 54 | run: cargo install cargo-msrv 55 | - name: Check 56 | run: | 57 | # run `cargo msrv verify` to see problems 58 | cargo msrv verify --output-format=json || exit 1 59 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | Cargo.lock 2 | target 3 | rusty-tags.vi 4 | .history 5 | .flatbuffers/ 6 | .idea/ 7 | .vscode 8 | .zed 9 | .devcontainer 10 | venv/* 11 | # created by doctests 12 | parquet/data.parquet 13 | # release notes cache 14 | .githubchangeloggenerator.cache 15 | .githubchangeloggenerator.cache.log 16 | justfile 17 | .prettierignore 18 | .env 19 | .editorconfig 20 | # local azurite file 21 | __azurite* 22 | __blobstorage__ 23 | 24 | # .bak files 25 | *.bak 26 | *.bak2 27 | # OS-specific .gitignores 28 | 29 | # Mac .gitignore 30 | # General 31 | .DS_Store 32 | .AppleDouble 33 | .LSOverride 34 | 35 | # Icon must end with two \r 36 | Icon 37 | 38 | # Thumbnails 39 | ._* 40 | 41 | # Files that might appear in the root of a volume 42 | .DocumentRevisions-V100 43 | .fseventsd 44 | .Spotlight-V100 45 | .TemporaryItems 46 | .Trashes 47 | .VolumeIcon.icns 48 | .com.apple.timemachine.donotpresent 49 | 50 | # Directories potentially created on remote AFP share 51 | .AppleDB 52 | .AppleDesktop 53 | Network Trash Folder 54 | Temporary Items 55 | .apdisk 56 | 57 | # Linux .gitignore 58 | *~ 59 | 60 | # temporary files which can be created if a process still has a handle open of a deleted file 61 | .fuse_hidden* 62 | 63 | # KDE directory preferences 64 | .directory 65 | 66 | # Linux trash folder which might appear on any partition or disk 67 | .Trash-* 68 | 69 | # .nfs files are created when an open file is removed but is still being accessed 70 | .nfs* 71 | 72 | # Windows .gitignore 73 | # Windows thumbnail cache files 74 | Thumbs.db 75 | Thumbs.db:encryptable 76 | ehthumbs.db 77 | ehthumbs_vista.db 78 | 79 | # Dump file 80 | *.stackdump 81 | 82 | # Folder config file 83 | [Dd]esktop.ini 84 | 85 | # Recycle Bin used on file shares 86 | $RECYCLE.BIN/ 87 | 88 | # Windows Installer files 89 | *.cab 90 | *.msi 91 | *.msix 92 | *.msm 93 | *.msp 94 | 95 | # Windows shortcuts 96 | *.lnk 97 | 98 | # Python virtual env in parquet crate 99 | parquet/pytest/venv/ 100 | __pycache__/ 101 | -------------------------------------------------------------------------------- /src/client/parts.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | use crate::multipart::PartId; 19 | use parking_lot::Mutex; 20 | 21 | /// An interior mutable collection of upload parts and their corresponding part index 22 | #[derive(Debug, Default)] 23 | pub(crate) struct Parts(Mutex>); 24 | 25 | impl Parts { 26 | /// Record the [`PartId`] for a given index 27 | /// 28 | /// Note: calling this method multiple times with the same `part_idx` 29 | /// will result in multiple [`PartId`] in the final output 30 | pub(crate) fn put(&self, part_idx: usize, id: PartId) { 31 | self.0.lock().push((part_idx, id)) 32 | } 33 | 34 | /// Produce the final list of [`PartId`] ordered by `part_idx` 35 | /// 36 | /// `expected` is the number of parts expected in the final result 37 | pub(crate) fn finish(&self, expected: usize) -> crate::Result> { 38 | let mut parts = self.0.lock(); 39 | if parts.len() != expected { 40 | return Err(crate::Error::Generic { 41 | store: "Parts", 42 | source: "Missing part".to_string().into(), 43 | }); 44 | } 45 | parts.sort_unstable_by_key(|(idx, _)| *idx); 46 | Ok(parts.drain(..).map(|(_, v)| v).collect()) 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /src/aws/checksum.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | use crate::config::Parse; 19 | use std::str::FromStr; 20 | 21 | #[allow(non_camel_case_types)] 22 | #[derive(Debug, Clone, Copy, PartialEq, Eq)] 23 | /// Enum representing checksum algorithm supported by S3. 24 | pub enum Checksum { 25 | /// SHA-256 algorithm. 26 | SHA256, 27 | } 28 | 29 | impl std::fmt::Display for Checksum { 30 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 31 | match &self { 32 | Self::SHA256 => write!(f, "sha256"), 33 | } 34 | } 35 | } 36 | 37 | impl FromStr for Checksum { 38 | type Err = (); 39 | 40 | fn from_str(s: &str) -> Result { 41 | match s.to_lowercase().as_str() { 42 | "sha256" => Ok(Self::SHA256), 43 | _ => Err(()), 44 | } 45 | } 46 | } 47 | 48 | impl TryFrom<&String> for Checksum { 49 | type Error = (); 50 | 51 | fn try_from(value: &String) -> Result { 52 | value.parse() 53 | } 54 | } 55 | 56 | impl Parse for Checksum { 57 | fn parse(v: &str) -> crate::Result { 58 | v.parse().map_err(|_| crate::Error::Generic { 59 | store: "Config", 60 | source: format!("\"{v}\" is not a valid checksum algorithm").into(), 61 | }) 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /dev/release/check-rat-report.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | ############################################################################## 3 | # Licensed to the Apache Software Foundation (ASF) under one 4 | # or more contributor license agreements. See the NOTICE file 5 | # distributed with this work for additional information 6 | # regarding copyright ownership. The ASF licenses this file 7 | # to you under the Apache License, Version 2.0 (the 8 | # "License"); you may not use this file except in compliance 9 | # with the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, 14 | # software distributed under the License is distributed on an 15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | # KIND, either express or implied. See the License for the 17 | # specific language governing permissions and limitations 18 | # under the License. 19 | ############################################################################## 20 | import fnmatch 21 | import re 22 | import sys 23 | import xml.etree.ElementTree as ET 24 | 25 | if len(sys.argv) != 3: 26 | sys.stderr.write("Usage: %s exclude_globs.lst rat_report.xml\n" % 27 | sys.argv[0]) 28 | sys.exit(1) 29 | 30 | exclude_globs_filename = sys.argv[1] 31 | xml_filename = sys.argv[2] 32 | 33 | globs = [line.strip() for line in open(exclude_globs_filename, "r")] 34 | 35 | tree = ET.parse(xml_filename) 36 | root = tree.getroot() 37 | resources = root.findall('resource') 38 | 39 | all_ok = True 40 | for r in resources: 41 | approvals = r.findall('license-approval') 42 | if not approvals or approvals[0].attrib['name'] == 'true': 43 | continue 44 | clean_name = re.sub('^[^/]+/', '', r.attrib['name']) 45 | excluded = False 46 | for g in globs: 47 | if fnmatch.fnmatch(clean_name, g): 48 | excluded = True 49 | break 50 | if not excluded: 51 | sys.stdout.write("NOT APPROVED: %s (%s): %s\n" % ( 52 | clean_name, r.attrib['name'], approvals[0].attrib['name'])) 53 | all_ok = False 54 | 55 | if not all_ok: 56 | sys.exit(1) 57 | 58 | print('OK') 59 | sys.exit(0) 60 | -------------------------------------------------------------------------------- /dev/release/remove-old-artifacts.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Licensed to the Apache Software Foundation (ASF) under one 4 | # or more contributor license agreements. See the NOTICE file 5 | # distributed with this work for additional information 6 | # regarding copyright ownership. The ASF licenses this file 7 | # to you under the Apache License, Version 2.0 (the 8 | # "License"); you may not use this file except in compliance 9 | # with the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, 14 | # software distributed under the License is distributed on an 15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | # KIND, either express or implied. See the License for the 17 | # specific language governing permissions and limitations 18 | # under the License. 19 | # 20 | 21 | # This script removes all RCs and all but the most recent versions of 22 | # object_store from svn. 23 | # 24 | # The older versions are in SVN history as well as available on the 25 | # archive page https://archive.apache.org/dist/ 26 | # 27 | # See 28 | # https://infra.apache.org/release-download-pages.html 29 | 30 | set -e 31 | set -u 32 | set -o pipefail 33 | 34 | echo "Remove all RCs" 35 | dev_base_url=https://dist.apache.org/repos/dist/dev/arrow 36 | old_rcs=$( 37 | svn ls ${dev_base_url}/ | \ 38 | grep -E '^apache-arrow-object-store-rs-[0-9]' | \ 39 | sort --version-sort 40 | ) 41 | for old_rc in $old_rcs; do 42 | echo "Remove RC: ${old_rc}" 43 | svn \ 44 | delete \ 45 | -m "Remove old Apache Arrow Rust Object Store RC: ${old_rc}" \ 46 | ${dev_base_url}/${old_rc} 47 | done 48 | 49 | echo "Remove all but the most recent version" 50 | release_base_url="https://dist.apache.org/repos/dist/release/arrow" 51 | old_releases=$( 52 | svn ls ${release_base_url} | \ 53 | grep -E '^arrow-object-store-rs-[0-9\.]+' | \ 54 | sort --version-sort --reverse | \ 55 | tail -n +2 56 | ) 57 | for old_release_version in $old_releases; do 58 | echo "Remove old release: ${old_release_version}" 59 | svn \ 60 | delete \ 61 | -m "Remove Apache Arrow Rust Object Store release: ${old_release_version}" \ 62 | ${release_base_url}/${old_release_version} 63 | done 64 | -------------------------------------------------------------------------------- /src/signer.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | //! Abstraction of signed URL generation for those object store implementations that support it 19 | 20 | use crate::{Result, path::Path}; 21 | use async_trait::async_trait; 22 | use reqwest::Method; 23 | use std::{fmt, time::Duration}; 24 | use url::Url; 25 | 26 | /// Universal API to generate presigned URLs from multiple object store services. 27 | #[async_trait] 28 | pub trait Signer: Send + Sync + fmt::Debug + 'static { 29 | /// Given the intended [`Method`] and [`Path`] to use and the desired length of time for which 30 | /// the URL should be valid, return a signed [`Url`] created with the object store 31 | /// implementation's credentials such that the URL can be handed to something that doesn't have 32 | /// access to the object store's credentials, to allow limited access to the object store. 33 | async fn signed_url(&self, method: Method, path: &Path, expires_in: Duration) -> Result; 34 | 35 | /// Generate signed urls for multiple paths. 36 | /// 37 | /// See [`Signer::signed_url`] for more details. 38 | async fn signed_urls( 39 | &self, 40 | method: Method, 41 | paths: &[Path], 42 | expires_in: Duration, 43 | ) -> Result> { 44 | let mut urls = Vec::with_capacity(paths.len()); 45 | for path in paths { 46 | urls.push(self.signed_url(method.clone(), path, expires_in).await?); 47 | } 48 | Ok(urls) 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /tests/http.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | //! Tests the HTTP store implementation 19 | 20 | #[cfg(feature = "http")] 21 | use object_store::{GetOptions, GetRange, ObjectStore, http::HttpBuilder, path::Path}; 22 | 23 | #[cfg(all(feature = "http", target_arch = "wasm32", target_os = "unknown"))] 24 | use wasm_bindgen_test::*; 25 | 26 | /// Tests that even when reqwest has the `gzip` feature enabled, the HTTP store 27 | /// does not error on a missing `Content-Length` header. 28 | #[tokio::test] 29 | #[cfg(feature = "http")] 30 | async fn test_http_store_gzip() { 31 | let http_store = HttpBuilder::new() 32 | .with_url("https://raw.githubusercontent.com/apache/arrow-rs/refs/heads/main") 33 | .build() 34 | .unwrap(); 35 | 36 | let _ = http_store 37 | .get_opts( 38 | &Path::parse("LICENSE.txt").unwrap(), 39 | GetOptions::new().with_range(Some(GetRange::Bounded(0..100))), 40 | ) 41 | .await 42 | .unwrap(); 43 | } 44 | 45 | #[cfg(all(feature = "http", target_arch = "wasm32", target_os = "unknown"))] 46 | #[wasm_bindgen_test] 47 | async fn basic_wasm_get() { 48 | let http_store = HttpBuilder::new() 49 | .with_url("https://raw.githubusercontent.com/apache/arrow-rs/refs/heads/main") 50 | .build() 51 | .unwrap(); 52 | 53 | let _ = http_store 54 | .get_opts( 55 | &Path::parse("LICENSE.txt").unwrap(), 56 | GetOptions::new().with_range(Some(GetRange::Bounded(0..100))), 57 | ) 58 | .await 59 | .unwrap(); 60 | } 61 | -------------------------------------------------------------------------------- /src/tags.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | use url::form_urlencoded::Serializer; 19 | 20 | /// A collection of key value pairs used to annotate objects 21 | /// 22 | /// 23 | /// 24 | #[derive(Debug, Clone, Default, Eq, PartialEq)] 25 | pub struct TagSet(String); 26 | 27 | impl TagSet { 28 | /// Append a key value pair to this [`TagSet`] 29 | /// 30 | /// Stores have different restrictions on what characters are permitted, 31 | /// for portability it is recommended applications use no more than 10 tags, 32 | /// and stick to alphanumeric characters, and `+ - = . _ : /` 33 | /// 34 | /// 35 | /// 36 | pub fn push(&mut self, key: &str, value: &str) { 37 | Serializer::new(&mut self.0).append_pair(key, value); 38 | } 39 | 40 | /// Return this [`TagSet`] as a URL-encoded string 41 | pub fn encoded(&self) -> &str { 42 | &self.0 43 | } 44 | } 45 | 46 | #[cfg(test)] 47 | mod tests { 48 | use super::*; 49 | 50 | #[test] 51 | fn test_tag_set() { 52 | let mut set = TagSet::default(); 53 | set.push("test/foo", "value sdlks"); 54 | set.push("foo", " sdf _ /+./sd"); 55 | assert_eq!( 56 | set.encoded(), 57 | "test%2Ffoo=value+sdlks&foo=+sdf+_+%2F%2B.%2Fsd" 58 | ); 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /dev/release/release-tarball.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Licensed to the Apache Software Foundation (ASF) under one 4 | # or more contributor license agreements. See the NOTICE file 5 | # distributed with this work for additional information 6 | # regarding copyright ownership. The ASF licenses this file 7 | # to you under the Apache License, Version 2.0 (the 8 | # "License"); you may not use this file except in compliance 9 | # with the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, 14 | # software distributed under the License is distributed on an 15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | # KIND, either express or implied. See the License for the 17 | # specific language governing permissions and limitations 18 | # under the License. 19 | # 20 | 21 | # This script copies a tarball from the "dev" area of the 22 | # dist.apache.arrow repository to the "release" area 23 | # 24 | # This script should only be run after the release has been approved 25 | # by the arrow PMC committee. 26 | # 27 | # See release/README.md for full release instructions 28 | # 29 | # Based in part on post-01-upload.sh from apache/arrow 30 | 31 | 32 | set -e 33 | set -u 34 | 35 | SOURCE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" 36 | SOURCE_TOP_DIR="$(cd "${SOURCE_DIR}/../../" && pwd)" 37 | 38 | if [ "$#" -ne 2 ]; then 39 | echo "Usage: $0 " 40 | echo "ex. $0 0.4.0 1" 41 | exit 42 | fi 43 | 44 | version=$1 45 | rc=$2 46 | 47 | tmp_dir=tmp-apache-arrow-dist 48 | 49 | echo "Recreate temporary directory: ${tmp_dir}" 50 | rm -rf ${tmp_dir} 51 | mkdir -p ${tmp_dir} 52 | 53 | echo "Clone dev dist repository" 54 | svn \ 55 | co \ 56 | https://dist.apache.org/repos/dist/dev/arrow/apache-arrow-object-store-rs-${version}-rc${rc} \ 57 | ${tmp_dir}/dev 58 | 59 | echo "Clone release dist repository" 60 | svn co https://dist.apache.org/repos/dist/release/arrow ${tmp_dir}/release 61 | 62 | echo "Copy ${version}-rc${rc} to release working copy" 63 | release_version=arrow-object-store-rs-${version} 64 | mkdir -p ${tmp_dir}/release/${release_version} 65 | cp -r ${tmp_dir}/dev/* ${tmp_dir}/release/${release_version}/ 66 | svn add ${tmp_dir}/release/${release_version} 67 | 68 | echo "Commit release" 69 | svn ci -m "Apache Arrow Rust Object Store ${version}" ${tmp_dir}/release 70 | 71 | echo "Clean up" 72 | rm -rf ${tmp_dir} 73 | 74 | echo "Success!" 75 | echo "The release is available here:" 76 | echo " https://dist.apache.org/repos/dist/release/arrow/${release_version}" 77 | 78 | echo "Clean up old artifacts from svn" 79 | "${SOURCE_TOP_DIR}"/dev/release/remove-old-artifacts.sh 80 | -------------------------------------------------------------------------------- /src/client/pagination.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | use crate::Result; 19 | use futures::Stream; 20 | use std::future::Future; 21 | 22 | /// Takes a paginated operation `op` that when called with: 23 | /// 24 | /// - A state `S` 25 | /// - An optional next token `Option` 26 | /// 27 | /// Returns 28 | /// 29 | /// - A response value `T` 30 | /// - The next state `S` 31 | /// - The next continuation token `Option` 32 | /// 33 | /// And converts it into a `Stream>` which will first call `op(state, None)`, and yield 34 | /// the returned response `T`. If the returned continuation token was `None` the stream will then 35 | /// finish, otherwise it will continue to call `op(state, token)` with the values returned by the 36 | /// previous call to `op`, until a continuation token of `None` is returned 37 | /// 38 | pub(crate) fn stream_paginated( 39 | client: C, 40 | state: S, 41 | op: F, 42 | ) -> impl Stream> 43 | where 44 | C: Clone, 45 | F: Fn(C, S, Option) -> Fut + Copy, 46 | Fut: Future)>>, 47 | { 48 | enum PaginationState { 49 | Start(T), 50 | HasMore(T, String), 51 | Done, 52 | } 53 | 54 | futures::stream::unfold(PaginationState::Start(state), move |state| { 55 | let client = client.clone(); 56 | async move { 57 | let (s, page_token) = match state { 58 | PaginationState::Start(s) => (s, None), 59 | PaginationState::HasMore(s, page_token) if !page_token.is_empty() => { 60 | (s, Some(page_token)) 61 | } 62 | _ => { 63 | return None; 64 | } 65 | }; 66 | 67 | let (resp, s, continuation) = match op(client, s, page_token).await { 68 | Ok(resp) => resp, 69 | Err(e) => return Some((Err(e), PaginationState::Done)), 70 | }; 71 | 72 | let next_state = match continuation { 73 | Some(token) => PaginationState::HasMore(s, token), 74 | None => PaginationState::Done, 75 | }; 76 | 77 | Some((Ok(resp), next_state)) 78 | } 79 | }) 80 | } 81 | -------------------------------------------------------------------------------- /src/aws/resolve.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | use crate::aws::STORE; 19 | use crate::{ClientOptions, Result}; 20 | 21 | /// A specialized `Error` for object store-related errors 22 | #[derive(Debug, thiserror::Error)] 23 | enum Error { 24 | #[error("Bucket '{}' not found", bucket)] 25 | BucketNotFound { bucket: String }, 26 | 27 | #[error("Failed to resolve region for bucket '{}'", bucket)] 28 | ResolveRegion { 29 | bucket: String, 30 | source: reqwest::Error, 31 | }, 32 | 33 | #[error("Failed to parse the region for bucket '{}'", bucket)] 34 | RegionParse { bucket: String }, 35 | } 36 | 37 | impl From for crate::Error { 38 | fn from(source: Error) -> Self { 39 | Self::Generic { 40 | store: STORE, 41 | source: Box::new(source), 42 | } 43 | } 44 | } 45 | 46 | /// Get the bucket region using the [HeadBucket API]. This will fail if the bucket does not exist. 47 | /// 48 | /// [HeadBucket API]: https://docs.aws.amazon.com/AmazonS3/latest/API/API_HeadBucket.html 49 | pub async fn resolve_bucket_region(bucket: &str, client_options: &ClientOptions) -> Result { 50 | use reqwest::StatusCode; 51 | 52 | let endpoint = format!("https://{bucket}.s3.amazonaws.com"); 53 | 54 | let client = client_options.client()?; 55 | 56 | let response = client.head(&endpoint).send().await.map_err(|source| { 57 | let bucket = bucket.into(); 58 | Error::ResolveRegion { bucket, source } 59 | })?; 60 | 61 | if response.status() == StatusCode::NOT_FOUND { 62 | let bucket = bucket.into(); 63 | return Err(Error::BucketNotFound { bucket }.into()); 64 | } 65 | 66 | let region = response 67 | .headers() 68 | .get("x-amz-bucket-region") 69 | .and_then(|x| x.to_str().ok()) 70 | .ok_or_else(|| Error::RegionParse { 71 | bucket: bucket.into(), 72 | })?; 73 | 74 | Ok(region.to_string()) 75 | } 76 | 77 | #[cfg(test)] 78 | mod tests { 79 | use super::*; 80 | 81 | #[tokio::test] 82 | async fn test_bucket_does_not_exist() { 83 | let bucket = "please-dont-exist"; 84 | 85 | let result = resolve_bucket_region(bucket, &ClientOptions::new()).await; 86 | 87 | assert!(result.is_err()); 88 | } 89 | } 90 | -------------------------------------------------------------------------------- /dev/release/update_change_log.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Licensed to the Apache Software Foundation (ASF) under one 4 | # or more contributor license agreements. See the NOTICE file 5 | # distributed with this work for additional information 6 | # regarding copyright ownership. The ASF licenses this file 7 | # to you under the Apache License, Version 2.0 (the 8 | # "License"); you may not use this file except in compliance 9 | # with the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, 14 | # software distributed under the License is distributed on an 15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | # KIND, either express or implied. See the License for the 17 | # specific language governing permissions and limitations 18 | # under the License. 19 | # 20 | 21 | # invokes the changelog generator from 22 | # https://github.com/github-changelog-generator/github-changelog-generator 23 | # 24 | # With the config located in 25 | # arrow-rs-object-store/.github_changelog_generator 26 | # 27 | # Usage: 28 | # CHANGELOG_GITHUB_TOKEN= ./update_change_log.sh 29 | 30 | set -e 31 | 32 | SINCE_TAG="v0.12.4" 33 | FUTURE_RELEASE="v0.13.0" 34 | 35 | SOURCE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" 36 | SOURCE_TOP_DIR="$(cd "${SOURCE_DIR}/../../" && pwd)" 37 | 38 | OUTPUT_PATH="${SOURCE_TOP_DIR}/CHANGELOG.md" 39 | 40 | # remove license header so github-changelog-generator has a clean base to append 41 | sed -i.bak '1,18d' "${OUTPUT_PATH}" 42 | 43 | pushd "${SOURCE_TOP_DIR}" 44 | docker run -it --rm -e CHANGELOG_GITHUB_TOKEN="$CHANGELOG_GITHUB_TOKEN" -v "$(pwd)":/usr/local/src/your-app githubchangeloggenerator/github-changelog-generator \ 45 | --user apache \ 46 | --project arrow-rs-object-store \ 47 | --cache-file=.githubchangeloggenerator.cache \ 48 | --cache-log=.githubchangeloggenerator.cache.log \ 49 | --http-cache \ 50 | --max-issues=600 \ 51 | --since-tag ${SINCE_TAG} \ 52 | --future-release ${FUTURE_RELEASE} 53 | 54 | sed -i.bak "s/\\\n/\n\n/" "${OUTPUT_PATH}" 55 | 56 | # Put license header back on 57 | echo ' 75 | ' | cat - "${OUTPUT_PATH}" > "${OUTPUT_PATH}".tmp 76 | mv "${OUTPUT_PATH}".tmp "${OUTPUT_PATH}" 77 | -------------------------------------------------------------------------------- /src/list.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | //! Paginated Listing 19 | 20 | use super::Result; 21 | use crate::ListResult; 22 | use async_trait::async_trait; 23 | use std::borrow::Cow; 24 | 25 | /// Options for a paginated list request 26 | #[derive(Debug, Default, Clone)] 27 | pub struct PaginatedListOptions { 28 | /// Path to start listing from 29 | /// 30 | /// Note: Not all stores support this 31 | pub offset: Option, 32 | 33 | /// A delimiter use to group keys with a common prefix 34 | /// 35 | /// Note: Some stores only support `/` 36 | pub delimiter: Option>, 37 | 38 | /// The maximum number of paths to return 39 | pub max_keys: Option, 40 | 41 | /// A page token from a previous request 42 | /// 43 | /// Note: Behaviour is implementation defined if the previous request 44 | /// used a different prefix or options 45 | pub page_token: Option, 46 | 47 | /// Implementation-specific extensions. Intended for use by implementations 48 | /// that need to pass context-specific information (like tracing spans) via trait methods. 49 | /// 50 | /// These extensions are ignored entirely by backends offered through this crate. 51 | pub extensions: http::Extensions, 52 | } 53 | 54 | /// A [`ListResult`] with optional pagination token 55 | #[derive(Debug)] 56 | pub struct PaginatedListResult { 57 | /// The list result 58 | pub result: ListResult, 59 | /// If result set truncated, the pagination token to fetch next results 60 | pub page_token: Option, 61 | } 62 | 63 | /// A low-level interface for interacting with paginated listing APIs 64 | /// 65 | /// Most use-cases should prefer [`ObjectStore::list`] as this is supported by more 66 | /// backends, including [`LocalFileSystem`], however, [`PaginatedListStore`] can be 67 | /// used where stateless pagination or non-path segment based listing is required 68 | /// 69 | /// [`ObjectStore::list`]: crate::ObjectStore::list 70 | /// [`LocalFileSystem`]: crate::local::LocalFileSystem 71 | #[async_trait] 72 | pub trait PaginatedListStore: Send + Sync + 'static { 73 | /// Perform a paginated list request 74 | /// 75 | /// Note: the order of returned objects is not guaranteed and 76 | /// unlike [`ObjectStore::list`] a trailing delimiter is not 77 | /// automatically added to `prefix` 78 | /// 79 | /// [`ObjectStore::list`]: crate::ObjectStore::list 80 | async fn list_paginated( 81 | &self, 82 | prefix: Option<&str>, 83 | opts: PaginatedListOptions, 84 | ) -> Result; 85 | } 86 | -------------------------------------------------------------------------------- /dev/release/verify-release-candidate.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Licensed to the Apache Software Foundation (ASF) under one 4 | # or more contributor license agreements. See the NOTICE file 5 | # distributed with this work for additional information 6 | # regarding copyright ownership. The ASF licenses this file 7 | # to you under the Apache License, Version 2.0 (the 8 | # "License"); you may not use this file except in compliance 9 | # with the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, 14 | # software distributed under the License is distributed on an 15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | # KIND, either express or implied. See the License for the 17 | # specific language governing permissions and limitations 18 | # under the License. 19 | # 20 | 21 | case $# in 22 | 2) VERSION="$1" 23 | RC_NUMBER="$2" 24 | ;; 25 | *) echo "Usage: $0 X.Y.Z RC_NUMBER" 26 | exit 1 27 | ;; 28 | esac 29 | 30 | set -e 31 | set -x 32 | set -o pipefail 33 | 34 | SOURCE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)" 35 | ARROW_DIR="$(dirname $(dirname ${SOURCE_DIR}))" 36 | ARROW_DIST_URL='https://dist.apache.org/repos/dist/dev/arrow' 37 | 38 | download_dist_file() { 39 | curl \ 40 | --silent \ 41 | --show-error \ 42 | --fail \ 43 | --location \ 44 | --remote-name $ARROW_DIST_URL/$1 45 | } 46 | 47 | download_rc_file() { 48 | download_dist_file apache-arrow-object-store-rs-${VERSION}-rc${RC_NUMBER}/$1 49 | } 50 | 51 | import_gpg_keys() { 52 | download_dist_file KEYS 53 | gpg --import KEYS 54 | } 55 | 56 | if type shasum >/dev/null 2>&1; then 57 | sha256_verify="shasum -a 256 -c" 58 | sha512_verify="shasum -a 512 -c" 59 | else 60 | sha256_verify="sha256sum -c" 61 | sha512_verify="sha512sum -c" 62 | fi 63 | 64 | fetch_archive() { 65 | local dist_name=$1 66 | download_rc_file ${dist_name}.tar.gz 67 | download_rc_file ${dist_name}.tar.gz.asc 68 | download_rc_file ${dist_name}.tar.gz.sha256 69 | download_rc_file ${dist_name}.tar.gz.sha512 70 | gpg --verify ${dist_name}.tar.gz.asc ${dist_name}.tar.gz 71 | ${sha256_verify} ${dist_name}.tar.gz.sha256 72 | ${sha512_verify} ${dist_name}.tar.gz.sha512 73 | } 74 | 75 | setup_tempdir() { 76 | cleanup() { 77 | if [ "${TEST_SUCCESS}" = "yes" ]; then 78 | rm -fr "${ARROW_TMPDIR}" 79 | else 80 | echo "Failed to verify release candidate. See ${ARROW_TMPDIR} for details." 81 | fi 82 | } 83 | 84 | if [ -z "${ARROW_TMPDIR}" ]; then 85 | # clean up automatically if ARROW_TMPDIR is not defined 86 | ARROW_TMPDIR=$(mktemp -d -t "$1.XXXXX") 87 | trap cleanup EXIT 88 | else 89 | # don't clean up automatically 90 | mkdir -p "${ARROW_TMPDIR}" 91 | fi 92 | } 93 | 94 | test_source_distribution() { 95 | # install rust toolchain in a similar fashion like test-miniconda 96 | export RUSTUP_HOME=$PWD/test-rustup 97 | export CARGO_HOME=$PWD/test-rustup 98 | 99 | curl https://sh.rustup.rs -sSf | sh -s -- -y --no-modify-path 100 | 101 | export PATH=$RUSTUP_HOME/bin:$PATH 102 | source $RUSTUP_HOME/env 103 | 104 | # build and test rust 105 | cargo build 106 | cargo test --all --all-features 107 | 108 | # verify that the crate can be published to crates.io 109 | cargo publish --dry-run 110 | } 111 | 112 | TEST_SUCCESS=no 113 | 114 | setup_tempdir "arrow-${VERSION}" 115 | echo "Working in sandbox ${ARROW_TMPDIR}" 116 | cd ${ARROW_TMPDIR} 117 | 118 | dist_name="apache-arrow-object-store-rs-${VERSION}" 119 | import_gpg_keys 120 | fetch_archive ${dist_name} 121 | tar xf ${dist_name}.tar.gz 122 | pushd ${dist_name} 123 | test_source_distribution 124 | popd 125 | 126 | TEST_SUCCESS=yes 127 | echo 'Release candidate looks good!' 128 | exit 0 129 | -------------------------------------------------------------------------------- /src/multipart.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | //! Cloud Multipart Upload 19 | //! 20 | //! This crate provides an asynchronous interface for multipart file uploads to 21 | //! cloud storage services. It's designed to offer efficient, non-blocking operations, 22 | //! especially useful when dealing with large files or high-throughput systems. 23 | 24 | use async_trait::async_trait; 25 | 26 | use crate::path::Path; 27 | use crate::{MultipartId, PutPayload, PutResult, Result}; 28 | 29 | /// Represents a part of a file that has been successfully uploaded in a multipart upload process. 30 | #[derive(Debug, Clone)] 31 | pub struct PartId { 32 | /// Id of this part 33 | pub content_id: String, 34 | } 35 | 36 | /// A low-level interface for interacting with multipart upload APIs 37 | /// 38 | /// Most use-cases should prefer [`ObjectStore::put_multipart_opts`] as this is supported by more 39 | /// backends, including [`LocalFileSystem`], and automatically handles uploading fixed 40 | /// size parts of sufficient size in parallel 41 | /// 42 | /// [`ObjectStore::put_multipart_opts`]: crate::ObjectStore::put_multipart_opts 43 | /// [`LocalFileSystem`]: crate::local::LocalFileSystem 44 | #[async_trait] 45 | pub trait MultipartStore: Send + Sync + 'static { 46 | /// Creates a new multipart upload, returning the [`MultipartId`] 47 | async fn create_multipart(&self, path: &Path) -> Result; 48 | 49 | /// Uploads a new part with index `part_idx` 50 | /// 51 | /// `part_idx` should be an integer in the range `0..N` where `N` is the number of 52 | /// parts in the upload. Parts may be uploaded concurrently and in any order. 53 | /// 54 | /// Most stores require that all parts excluding the last are at least 5 MiB, and some 55 | /// further require that all parts excluding the last be the same size, e.g. [R2]. 56 | /// [`WriteMultipart`] performs writes in fixed size blocks of 5 MiB, and clients wanting 57 | /// to maximise compatibility should look to do likewise. 58 | /// 59 | /// [R2]: https://developers.cloudflare.com/r2/objects/multipart-objects/#limitations 60 | /// [`WriteMultipart`]: crate::upload::WriteMultipart 61 | async fn put_part( 62 | &self, 63 | path: &Path, 64 | id: &MultipartId, 65 | part_idx: usize, 66 | data: PutPayload, 67 | ) -> Result; 68 | 69 | /// Completes a multipart upload 70 | /// 71 | /// The `i`'th value of `parts` must be a [`PartId`] returned by a call to [`Self::put_part`] 72 | /// with a `part_idx` of `i`, and the same `path` and `id` as provided to this method. Calling 73 | /// this method with out of sequence or repeated [`PartId`], or [`PartId`] returned for other 74 | /// values of `path` or `id`, will result in implementation-defined behaviour 75 | async fn complete_multipart( 76 | &self, 77 | path: &Path, 78 | id: &MultipartId, 79 | parts: Vec, 80 | ) -> Result; 81 | 82 | /// Aborts a multipart upload 83 | async fn abort_multipart(&self, path: &Path, id: &MultipartId) -> Result<()>; 84 | } 85 | -------------------------------------------------------------------------------- /src/config.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | use std::fmt::{Debug, Display, Formatter}; 18 | use std::str::FromStr; 19 | use std::time::Duration; 20 | 21 | use humantime::{format_duration, parse_duration}; 22 | use reqwest::header::HeaderValue; 23 | 24 | use crate::{Error, Result}; 25 | 26 | /// Provides deferred parsing of a value 27 | /// 28 | /// This allows builders to defer fallibility to build 29 | #[derive(Debug, Clone)] 30 | pub(crate) enum ConfigValue { 31 | Parsed(T), 32 | Deferred(String), 33 | } 34 | 35 | impl Display for ConfigValue { 36 | fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { 37 | match self { 38 | Self::Parsed(v) => write!(f, "{v}"), 39 | Self::Deferred(v) => write!(f, "{v}"), 40 | } 41 | } 42 | } 43 | 44 | impl From for ConfigValue { 45 | fn from(value: T) -> Self { 46 | Self::Parsed(value) 47 | } 48 | } 49 | 50 | impl ConfigValue { 51 | pub(crate) fn parse(&mut self, v: impl Into) { 52 | *self = Self::Deferred(v.into()) 53 | } 54 | 55 | pub(crate) fn get(&self) -> Result { 56 | match self { 57 | Self::Parsed(v) => Ok(v.clone()), 58 | Self::Deferred(v) => T::parse(v), 59 | } 60 | } 61 | } 62 | 63 | impl Default for ConfigValue { 64 | fn default() -> Self { 65 | Self::Parsed(T::default()) 66 | } 67 | } 68 | 69 | /// A value that can be stored in [`ConfigValue`] 70 | pub(crate) trait Parse: Sized { 71 | fn parse(v: &str) -> Result; 72 | } 73 | 74 | impl Parse for bool { 75 | fn parse(v: &str) -> Result { 76 | let lower = v.to_ascii_lowercase(); 77 | match lower.as_str() { 78 | "1" | "true" | "on" | "yes" | "y" => Ok(true), 79 | "0" | "false" | "off" | "no" | "n" => Ok(false), 80 | _ => Err(Error::Generic { 81 | store: "Config", 82 | source: format!("failed to parse \"{v}\" as boolean").into(), 83 | }), 84 | } 85 | } 86 | } 87 | 88 | impl Parse for Duration { 89 | fn parse(v: &str) -> Result { 90 | parse_duration(v).map_err(|_| Error::Generic { 91 | store: "Config", 92 | source: format!("failed to parse \"{v}\" as Duration").into(), 93 | }) 94 | } 95 | } 96 | 97 | impl Parse for usize { 98 | fn parse(v: &str) -> Result { 99 | Self::from_str(v).map_err(|_| Error::Generic { 100 | store: "Config", 101 | source: format!("failed to parse \"{v}\" as usize").into(), 102 | }) 103 | } 104 | } 105 | 106 | impl Parse for u32 { 107 | fn parse(v: &str) -> Result { 108 | Self::from_str(v).map_err(|_| Error::Generic { 109 | store: "Config", 110 | source: format!("failed to parse \"{v}\" as u32").into(), 111 | }) 112 | } 113 | } 114 | 115 | impl Parse for HeaderValue { 116 | fn parse(v: &str) -> Result { 117 | Self::from_str(v).map_err(|_| Error::Generic { 118 | store: "Config", 119 | source: format!("failed to parse \"{v}\" as HeaderValue").into(), 120 | }) 121 | } 122 | } 123 | 124 | pub(crate) fn fmt_duration(duration: &ConfigValue) -> String { 125 | match duration { 126 | ConfigValue::Parsed(v) => format_duration(*v).to_string(), 127 | ConfigValue::Deferred(v) => v.clone(), 128 | } 129 | } 130 | 131 | #[cfg(test)] 132 | mod tests { 133 | use super::*; 134 | use std::time::Duration; 135 | 136 | #[test] 137 | fn test_parse_duration() { 138 | let duration = Duration::from_secs(60); 139 | assert_eq!(Duration::parse("60 seconds").unwrap(), duration); 140 | assert_eq!(Duration::parse("60 s").unwrap(), duration); 141 | assert_eq!(Duration::parse("60s").unwrap(), duration) 142 | } 143 | } 144 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | [package] 19 | name = "object_store" 20 | version = "0.13.0" 21 | edition = "2024" 22 | license = "MIT/Apache-2.0" 23 | readme = "README.md" 24 | description = "A generic object store interface for uniformly interacting with AWS S3, Google Cloud Storage, Azure Blob Storage and local files." 25 | keywords = ["object", "storage", "cloud"] 26 | repository = "https://github.com/apache/arrow-rs-object-store" 27 | rust-version = "1.85" 28 | include = ["src/**/*.rs", "README.md", "LICENSE.txt", "NOTICE.txt", "Cargo.toml"] 29 | 30 | [package.metadata.docs.rs] 31 | all-features = true 32 | 33 | [dependencies] # In alphabetical order 34 | async-trait = "0.1.53" 35 | bytes = "1.0" 36 | chrono = { version = "0.4.34", default-features = false, features = ["clock"] } 37 | futures = "0.3" 38 | http = "1.2.0" 39 | humantime = "2.1" 40 | itertools = "0.14.0" 41 | parking_lot = { version = "0.12" } 42 | percent-encoding = "2.1" 43 | thiserror = "2.0.2" 44 | tracing = { version = "0.1" } 45 | url = "2.2" 46 | walkdir = { version = "2", optional = true } 47 | 48 | # Cloud storage support 49 | base64 = { version = "0.22", default-features = false, features = ["std"], optional = true } 50 | form_urlencoded = { version = "1.2", optional = true } 51 | http-body-util = { version = "0.1.2", optional = true } 52 | httparse = { version = "1.8.0", default-features = false, features = ["std"], optional = true } 53 | hyper = { version = "1.2", default-features = false, optional = true } 54 | md-5 = { version = "0.10.6", default-features = false, optional = true } 55 | quick-xml = { version = "0.38.0", features = ["serialize", "overlapped-lists"], optional = true } 56 | rand = { version = "0.9", default-features = false, features = ["std", "std_rng", "thread_rng"], optional = true } 57 | reqwest = { version = "0.12", default-features = false, features = ["rustls-tls-native-roots", "http2"], optional = true } 58 | ring = { version = "0.17", default-features = false, features = ["std"], optional = true } 59 | rustls-pki-types = { version = "1.9", default-features = false, features = ["std"], optional = true } 60 | serde = { version = "1.0", default-features = false, features = ["derive"], optional = true } 61 | serde_json = { version = "1.0", default-features = false, features = ["std"], optional = true } 62 | serde_urlencoded = { version = "0.7", optional = true } 63 | tokio = { version = "1.29.0", features = ["sync", "macros", "rt", "time", "io-util"] } 64 | 65 | [target.'cfg(target_family="unix")'.dev-dependencies] 66 | nix = { version = "0.30.0", features = ["fs"] } 67 | 68 | [target.'cfg(all(target_arch = "wasm32", target_os = "unknown"))'.dependencies] 69 | web-time = { version = "1.1.0" } 70 | wasm-bindgen-futures = "0.4.18" 71 | 72 | [features] 73 | default = ["fs"] 74 | cloud = ["serde", "serde_json", "quick-xml", "hyper", "reqwest", "reqwest/stream", "chrono/serde", "base64", "rand", "ring", "http-body-util", "form_urlencoded", "serde_urlencoded"] 75 | azure = ["cloud", "httparse"] 76 | fs = ["walkdir"] 77 | gcp = ["cloud", "rustls-pki-types"] 78 | aws = ["cloud", "md-5"] 79 | http = ["cloud"] 80 | tls-webpki-roots = ["reqwest?/rustls-tls-webpki-roots"] 81 | integration = ["rand"] 82 | 83 | [dev-dependencies] # In alphabetical order 84 | hyper = { version = "1.2", features = ["server"] } 85 | hyper-util = "0.1" 86 | rand = "0.9" 87 | tempfile = "3.1.0" 88 | regex = "1.11.1" 89 | # The "gzip" feature for reqwest is enabled for an integration test. 90 | reqwest = { version = "0.12", default-features = false, features = ["gzip"] } 91 | 92 | [target.'cfg(all(target_arch = "wasm32", target_os = "unknown"))'.dev-dependencies] 93 | wasm-bindgen-test = "0.3.50" 94 | 95 | [dev-dependencies.getrandom_v03] 96 | package = "getrandom" 97 | version = "0.3" 98 | features = ["wasm_js"] 99 | 100 | [dev-dependencies.getrandom_v02] 101 | package = "getrandom" 102 | version = "0.2" 103 | features = ["js"] 104 | 105 | [[test]] 106 | name = "get_range_file" 107 | path = "tests/get_range_file.rs" 108 | required-features = ["fs"] -------------------------------------------------------------------------------- /dev/release/create-tarball.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Licensed to the Apache Software Foundation (ASF) under one 4 | # or more contributor license agreements. See the NOTICE file 5 | # distributed with this work for additional information 6 | # regarding copyright ownership. The ASF licenses this file 7 | # to you under the Apache License, Version 2.0 (the 8 | # "License"); you may not use this file except in compliance 9 | # with the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, 14 | # software distributed under the License is distributed on an 15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | # KIND, either express or implied. See the License for the 17 | # specific language governing permissions and limitations 18 | # under the License. 19 | # 20 | 21 | # This script creates a signed tarball in 22 | # dev/dist/apache-arrow-object-store-rs--.tar.gz and uploads it to 23 | # the "dev" area of the dist.apache.arrow repository and prepares an 24 | # email for sending to the dev@arrow.apache.org list for a formal 25 | # vote. 26 | # 27 | # Note the tags are expected to be `object_sore_` 28 | # 29 | # See release/README.md for full release instructions 30 | # 31 | # Requirements: 32 | # 33 | # 1. gpg setup for signing and have uploaded your public 34 | # signature to https://pgp.mit.edu/ 35 | # 36 | # 2. Logged into the apache svn server with the appropriate 37 | # credentials 38 | # 39 | # 40 | # Based in part on 02-source.sh from apache/arrow 41 | # 42 | 43 | set -e 44 | 45 | SOURCE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" 46 | SOURCE_TOP_DIR="$(cd "${SOURCE_DIR}/../../" && pwd)" 47 | 48 | if [ "$#" -ne 2 ]; then 49 | echo "Usage: $0 " 50 | echo "ex. $0 0.4.0 1" 51 | exit 52 | fi 53 | 54 | object_store_version=$1 55 | rc=$2 56 | tag=v${object_store_version} 57 | 58 | release=apache-arrow-object-store-rs-${object_store_version} 59 | distdir=${SOURCE_TOP_DIR}/dev/dist/${release}-rc${rc} 60 | tarname=${release}.tar.gz 61 | tarball=${distdir}/${tarname} 62 | url="https://dist.apache.org/repos/dist/dev/arrow/${release}-rc${rc}" 63 | 64 | echo "Attempting to create ${tarball} from tag ${tag}" 65 | 66 | release_hash=$(cd "${SOURCE_TOP_DIR}" && git rev-list --max-count=1 ${tag}) 67 | 68 | if [ -z "$release_hash" ]; then 69 | echo "Cannot continue: unknown git tag: $tag" 70 | fi 71 | 72 | echo "Draft email for dev@arrow.apache.org mailing list" 73 | echo "" 74 | echo "---------------------------------------------------------" 75 | cat < containing the files in git at $release_hash 109 | # the files in the tarball are prefixed with {tag=} (e.g. 0.4.0) 110 | mkdir -p ${distdir} 111 | (cd "${SOURCE_TOP_DIR}" && git archive ${release_hash} --prefix ${release}/ | gzip > ${tarball}) 112 | 113 | echo "Running rat license checker on ${tarball}" 114 | ${SOURCE_DIR}/../../dev/release/run-rat.sh ${tarball} 115 | 116 | echo "Signing tarball and creating checksums" 117 | gpg --armor --output ${tarball}.asc --detach-sig ${tarball} 118 | # create signing with relative path of tarball 119 | # so that they can be verified with a command such as 120 | # shasum --check apache-arrow-rs-4.1.0-rc2.tar.gz.sha512 121 | (cd ${distdir} && shasum -a 256 ${tarname}) > ${tarball}.sha256 122 | (cd ${distdir} && shasum -a 512 ${tarname}) > ${tarball}.sha512 123 | 124 | echo "Uploading to apache dist/dev to ${url}" 125 | svn co --depth=empty https://dist.apache.org/repos/dist/dev/arrow ${SOURCE_TOP_DIR}/dev/dist 126 | svn add ${distdir} 127 | svn ci -m "Apache Arrow Rust ${object_store_version=} ${rc}" ${distdir} 128 | -------------------------------------------------------------------------------- /src/client/list.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | use crate::Result; 19 | use crate::client::pagination::stream_paginated; 20 | use crate::list::{PaginatedListOptions, PaginatedListResult}; 21 | use crate::path::{DELIMITER, Path}; 22 | use crate::{ListResult, ObjectMeta}; 23 | use async_trait::async_trait; 24 | use futures::stream::BoxStream; 25 | use futures::{StreamExt, TryStreamExt}; 26 | use std::borrow::Cow; 27 | use std::collections::BTreeSet; 28 | 29 | /// A client that can perform paginated list requests 30 | #[async_trait] 31 | pub(crate) trait ListClient: Send + Sync + 'static { 32 | async fn list_request( 33 | &self, 34 | prefix: Option<&str>, 35 | options: PaginatedListOptions, 36 | ) -> Result; 37 | } 38 | 39 | /// Extension trait for [`ListClient`] that adds common listing functionality 40 | #[async_trait] 41 | pub(crate) trait ListClientExt { 42 | fn list_paginated( 43 | &self, 44 | prefix: Option<&Path>, 45 | delimiter: bool, 46 | offset: Option<&Path>, 47 | ) -> BoxStream<'static, Result>; 48 | 49 | fn list(&self, prefix: Option<&Path>) -> BoxStream<'static, Result>; 50 | 51 | #[allow(unused)] 52 | fn list_with_offset( 53 | &self, 54 | prefix: Option<&Path>, 55 | offset: &Path, 56 | ) -> BoxStream<'static, Result>; 57 | 58 | async fn list_with_delimiter(&self, prefix: Option<&Path>) -> Result; 59 | } 60 | 61 | #[async_trait] 62 | impl ListClientExt for T { 63 | fn list_paginated( 64 | &self, 65 | prefix: Option<&Path>, 66 | delimiter: bool, 67 | offset: Option<&Path>, 68 | ) -> BoxStream<'static, Result> { 69 | let offset = offset.map(|x| x.to_string()); 70 | let prefix = prefix 71 | .filter(|x| !x.as_ref().is_empty()) 72 | .map(|p| format!("{}{}", p.as_ref(), DELIMITER)); 73 | stream_paginated( 74 | self.clone(), 75 | (prefix, offset), 76 | move |client, (prefix, offset), page_token| async move { 77 | let r = client 78 | .list_request( 79 | prefix.as_deref(), 80 | PaginatedListOptions { 81 | offset: offset.clone(), 82 | delimiter: delimiter.then_some(Cow::Borrowed(DELIMITER)), 83 | page_token, 84 | ..Default::default() 85 | }, 86 | ) 87 | .await?; 88 | Ok((r.result, (prefix, offset), r.page_token)) 89 | }, 90 | ) 91 | .boxed() 92 | } 93 | 94 | fn list(&self, prefix: Option<&Path>) -> BoxStream<'static, Result> { 95 | self.list_paginated(prefix, false, None) 96 | .map_ok(|r| futures::stream::iter(r.objects.into_iter().map(Ok))) 97 | .try_flatten() 98 | .boxed() 99 | } 100 | 101 | fn list_with_offset( 102 | &self, 103 | prefix: Option<&Path>, 104 | offset: &Path, 105 | ) -> BoxStream<'static, Result> { 106 | self.list_paginated(prefix, false, Some(offset)) 107 | .map_ok(|r| futures::stream::iter(r.objects.into_iter().map(Ok))) 108 | .try_flatten() 109 | .boxed() 110 | } 111 | 112 | async fn list_with_delimiter(&self, prefix: Option<&Path>) -> Result { 113 | let mut stream = self.list_paginated(prefix, true, None); 114 | 115 | let mut common_prefixes = BTreeSet::new(); 116 | let mut objects = Vec::new(); 117 | 118 | while let Some(result) = stream.next().await { 119 | let response = result?; 120 | common_prefixes.extend(response.common_prefixes.into_iter()); 121 | objects.extend(response.objects.into_iter()); 122 | } 123 | 124 | Ok(ListResult { 125 | common_prefixes: common_prefixes.into_iter().collect(), 126 | objects, 127 | }) 128 | } 129 | } 130 | -------------------------------------------------------------------------------- /src/client/mock_server.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | use crate::client::{HttpResponse, HttpResponseBody}; 19 | use futures::FutureExt; 20 | use futures::future::BoxFuture; 21 | use hyper::body::Incoming; 22 | use hyper::server::conn::http1; 23 | use hyper::service::service_fn; 24 | use hyper::{Request, Response}; 25 | use hyper_util::rt::TokioIo; 26 | use parking_lot::Mutex; 27 | use std::collections::VecDeque; 28 | use std::convert::Infallible; 29 | use std::future::Future; 30 | use std::net::SocketAddr; 31 | use std::sync::Arc; 32 | use tokio::net::TcpListener; 33 | use tokio::sync::oneshot; 34 | use tokio::task::{JoinHandle, JoinSet}; 35 | 36 | pub(crate) type ResponseFn = 37 | Box) -> BoxFuture<'static, HttpResponse> + Send>; 38 | 39 | /// A mock server 40 | pub(crate) struct MockServer { 41 | responses: Arc>>, 42 | shutdown: oneshot::Sender<()>, 43 | handle: JoinHandle<()>, 44 | url: String, 45 | } 46 | 47 | impl MockServer { 48 | pub(crate) async fn new() -> Self { 49 | let responses: Arc>> = 50 | Arc::new(Mutex::new(VecDeque::with_capacity(10))); 51 | 52 | let addr = SocketAddr::from(([127, 0, 0, 1], 0)); 53 | let listener = TcpListener::bind(addr).await.unwrap(); 54 | 55 | let (shutdown, mut rx) = oneshot::channel::<()>(); 56 | 57 | let url = format!("http://{}", listener.local_addr().unwrap()); 58 | 59 | let r = Arc::clone(&responses); 60 | let handle = tokio::spawn(async move { 61 | let mut set = JoinSet::new(); 62 | 63 | loop { 64 | let (stream, _) = tokio::select! { 65 | conn = listener.accept() => conn.unwrap(), 66 | _ = &mut rx => break, 67 | }; 68 | 69 | let r = Arc::clone(&r); 70 | set.spawn(async move { 71 | let _ = http1::Builder::new() 72 | .serve_connection( 73 | TokioIo::new(stream), 74 | service_fn(move |req| { 75 | let r = Arc::clone(&r); 76 | let next = r.lock().pop_front(); 77 | async move { 78 | Ok::<_, Infallible>(match next { 79 | Some(r) => r(req).await, 80 | None => HttpResponse::new("Hello World".to_string().into()), 81 | }) 82 | } 83 | }), 84 | ) 85 | .await; 86 | }); 87 | } 88 | 89 | set.abort_all(); 90 | }); 91 | 92 | Self { 93 | responses, 94 | shutdown, 95 | handle, 96 | url, 97 | } 98 | } 99 | 100 | /// The url of the mock server 101 | pub(crate) fn url(&self) -> &str { 102 | &self.url 103 | } 104 | 105 | /// Add a response 106 | pub(crate) fn push>(&self, response: Response) { 107 | let resp = response.map(Into::into); 108 | self.push_fn(|_| resp) 109 | } 110 | 111 | /// Add a response function 112 | pub(crate) fn push_fn(&self, f: F) 113 | where 114 | F: FnOnce(Request) -> Response + Send + 'static, 115 | B: Into, 116 | { 117 | let f = Box::new(|req| async move { f(req).map(Into::into) }.boxed()); 118 | self.responses.lock().push_back(f) 119 | } 120 | 121 | pub(crate) fn push_async_fn(&self, f: F) 122 | where 123 | F: FnOnce(Request) -> Fut + Send + 'static, 124 | Fut: Future> + Send + 'static, 125 | { 126 | let f = Box::new(|r| f(r).map(|b| b.map(Into::into)).boxed()); 127 | self.responses.lock().push_back(f) 128 | } 129 | 130 | /// Shutdown the mock server 131 | pub(crate) async fn shutdown(self) { 132 | let _ = self.shutdown.send(()); 133 | self.handle.await.unwrap() 134 | } 135 | } 136 | -------------------------------------------------------------------------------- /src/client/s3.rs: -------------------------------------------------------------------------------- 1 | // or more contributor license agreements. See the NOTICE file 2 | // distributed with this work for additional information 3 | // regarding copyright ownership. The ASF licenses this file 4 | // to you under the Apache License, Version 2.0 (the 5 | // "License"); you may not use this file except in compliance 6 | // with the License. You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, 11 | // software distributed under the License is distributed on an 12 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | // KIND, either express or implied. See the License for the 14 | // specific language governing permissions and limitations 15 | // under the License. 16 | 17 | //! The list and multipart API used by both GCS and S3 18 | 19 | use crate::multipart::PartId; 20 | use crate::path::Path; 21 | use crate::{ListResult, ObjectMeta, Result}; 22 | use chrono::{DateTime, Utc}; 23 | use serde::{Deserialize, Serialize}; 24 | 25 | #[derive(Debug, Deserialize)] 26 | #[serde(rename_all = "PascalCase")] 27 | pub struct ListResponse { 28 | #[serde(default)] 29 | pub contents: Vec, 30 | #[serde(default)] 31 | pub common_prefixes: Vec, 32 | #[serde(default)] 33 | pub next_continuation_token: Option, 34 | } 35 | 36 | impl TryFrom for ListResult { 37 | type Error = crate::Error; 38 | 39 | fn try_from(value: ListResponse) -> Result { 40 | let common_prefixes = value 41 | .common_prefixes 42 | .into_iter() 43 | .map(|x| Ok(Path::parse(x.prefix)?)) 44 | .collect::>()?; 45 | 46 | let objects = value 47 | .contents 48 | .into_iter() 49 | .map(TryFrom::try_from) 50 | .collect::>()?; 51 | 52 | Ok(Self { 53 | common_prefixes, 54 | objects, 55 | }) 56 | } 57 | } 58 | 59 | #[derive(Debug, Deserialize)] 60 | #[serde(rename_all = "PascalCase")] 61 | pub struct ListPrefix { 62 | pub prefix: String, 63 | } 64 | 65 | #[derive(Debug, Deserialize)] 66 | #[serde(rename_all = "PascalCase")] 67 | pub struct ListContents { 68 | pub key: String, 69 | pub size: u64, 70 | pub last_modified: DateTime, 71 | #[serde(rename = "ETag")] 72 | pub e_tag: Option, 73 | } 74 | 75 | impl TryFrom for ObjectMeta { 76 | type Error = crate::Error; 77 | 78 | fn try_from(value: ListContents) -> Result { 79 | Ok(Self { 80 | location: Path::parse(value.key)?, 81 | last_modified: value.last_modified, 82 | size: value.size, 83 | e_tag: value.e_tag, 84 | version: None, 85 | }) 86 | } 87 | } 88 | 89 | #[derive(Debug, Deserialize)] 90 | #[serde(rename_all = "PascalCase")] 91 | pub(crate) struct InitiateMultipartUploadResult { 92 | pub upload_id: String, 93 | } 94 | 95 | #[cfg(feature = "aws")] 96 | #[derive(Debug, Deserialize)] 97 | #[serde(rename_all = "PascalCase")] 98 | pub(crate) struct CopyPartResult { 99 | #[serde(rename = "ETag")] 100 | pub e_tag: String, 101 | #[serde(default, rename = "ChecksumSHA256")] 102 | pub checksum_sha256: Option, 103 | } 104 | 105 | #[derive(Debug, Serialize)] 106 | #[serde(rename_all = "PascalCase")] 107 | pub(crate) struct CompleteMultipartUpload { 108 | pub part: Vec, 109 | } 110 | 111 | #[derive(Serialize, Deserialize)] 112 | pub(crate) struct PartMetadata { 113 | pub e_tag: String, 114 | #[serde(skip_serializing_if = "Option::is_none")] 115 | pub checksum_sha256: Option, 116 | } 117 | 118 | impl From> for CompleteMultipartUpload { 119 | fn from(value: Vec) -> Self { 120 | let part = value 121 | .into_iter() 122 | .enumerate() 123 | .map(|(part_idx, part)| { 124 | let md = match quick_xml::de::from_str::(&part.content_id) { 125 | Ok(md) => md, 126 | // fallback to old way 127 | Err(_) => PartMetadata { 128 | e_tag: part.content_id.clone(), 129 | checksum_sha256: None, 130 | }, 131 | }; 132 | MultipartPart { 133 | e_tag: md.e_tag, 134 | part_number: part_idx + 1, 135 | checksum_sha256: md.checksum_sha256, 136 | } 137 | }) 138 | .collect(); 139 | Self { part } 140 | } 141 | } 142 | 143 | #[derive(Debug, Serialize)] 144 | pub(crate) struct MultipartPart { 145 | #[serde(rename = "ETag")] 146 | pub e_tag: String, 147 | #[serde(rename = "PartNumber")] 148 | pub part_number: usize, 149 | #[serde(rename = "ChecksumSHA256")] 150 | #[serde(skip_serializing_if = "Option::is_none")] 151 | pub checksum_sha256: Option, 152 | } 153 | 154 | #[derive(Debug, Deserialize)] 155 | #[serde(rename_all = "PascalCase")] 156 | pub(crate) struct CompleteMultipartUploadResult { 157 | #[serde(rename = "ETag")] 158 | pub e_tag: String, 159 | } 160 | -------------------------------------------------------------------------------- /tests/get_range_file.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | //! Tests the default implementation of get_range handles GetResult::File correctly (#4350) 19 | 20 | use async_trait::async_trait; 21 | use bytes::Bytes; 22 | use futures::stream::BoxStream; 23 | use object_store::local::LocalFileSystem; 24 | use object_store::path::Path; 25 | use object_store::*; 26 | use std::fmt::Formatter; 27 | use tempfile::tempdir; 28 | 29 | #[derive(Debug)] 30 | struct MyStore(LocalFileSystem); 31 | 32 | impl std::fmt::Display for MyStore { 33 | fn fmt(&self, _: &mut Formatter<'_>) -> std::fmt::Result { 34 | todo!() 35 | } 36 | } 37 | 38 | #[async_trait] 39 | impl ObjectStore for MyStore { 40 | async fn put_opts( 41 | &self, 42 | location: &Path, 43 | payload: PutPayload, 44 | opts: PutOptions, 45 | ) -> Result { 46 | self.0.put_opts(location, payload, opts).await 47 | } 48 | 49 | async fn put_multipart_opts( 50 | &self, 51 | _location: &Path, 52 | _opts: PutMultipartOptions, 53 | ) -> Result> { 54 | todo!() 55 | } 56 | 57 | async fn get_opts(&self, location: &Path, options: GetOptions) -> Result { 58 | self.0.get_opts(location, options).await 59 | } 60 | 61 | fn delete_stream( 62 | &self, 63 | _: BoxStream<'static, Result>, 64 | ) -> BoxStream<'static, Result> { 65 | todo!() 66 | } 67 | 68 | fn list(&self, _: Option<&Path>) -> BoxStream<'static, Result> { 69 | todo!() 70 | } 71 | 72 | async fn list_with_delimiter(&self, _: Option<&Path>) -> Result { 73 | todo!() 74 | } 75 | 76 | async fn copy_opts(&self, _: &Path, _: &Path, _: CopyOptions) -> Result<()> { 77 | todo!() 78 | } 79 | } 80 | 81 | #[tokio::test] 82 | async fn test_get_range() { 83 | let tmp = tempdir().unwrap(); 84 | let store = MyStore(LocalFileSystem::new_with_prefix(tmp.path()).unwrap()); 85 | let path = Path::from("foo"); 86 | 87 | let expected = Bytes::from_static(b"hello world"); 88 | store.put(&path, expected.clone().into()).await.unwrap(); 89 | let fetched = store.get(&path).await.unwrap().bytes().await.unwrap(); 90 | assert_eq!(expected, fetched); 91 | 92 | for range in [0..10, 3..5, 0..expected.len() as u64] { 93 | let data = store.get_range(&path, range.clone()).await.unwrap(); 94 | assert_eq!( 95 | &data[..], 96 | &expected[range.start as usize..range.end as usize] 97 | ) 98 | } 99 | 100 | let over_range = 0..(expected.len() as u64 * 2); 101 | let data = store.get_range(&path, over_range.clone()).await.unwrap(); 102 | assert_eq!(&data[..], expected) 103 | } 104 | 105 | /// Test that, when a requesting a range which overhangs the end of the resource, 106 | /// the resulting [GetResult::range] reports the returned range, 107 | /// not the requested. 108 | #[tokio::test] 109 | async fn test_get_opts_over_range() { 110 | let tmp = tempdir().unwrap(); 111 | let store = MyStore(LocalFileSystem::new_with_prefix(tmp.path()).unwrap()); 112 | let path = Path::from("foo"); 113 | 114 | let expected = Bytes::from_static(b"hello world"); 115 | store.put(&path, expected.clone().into()).await.unwrap(); 116 | 117 | let opts = 118 | GetOptions::new().with_range(Some(GetRange::Bounded(0..(expected.len() as u64 * 2)))); 119 | let res = store.get_opts(&path, opts).await.unwrap(); 120 | assert_eq!(res.range, 0..expected.len() as u64); 121 | assert_eq!(res.bytes().await.unwrap(), expected); 122 | } 123 | 124 | #[tokio::test] 125 | async fn test_get_range_opts_with_etag() { 126 | let tmp = tempdir().unwrap(); 127 | let store = MyStore(LocalFileSystem::new_with_prefix(tmp.path()).unwrap()); 128 | let path = Path::from("foo"); 129 | 130 | let expected = Bytes::from_static(b"hello world"); 131 | store.put(&path, expected.clone().into()).await.unwrap(); 132 | 133 | // pull the file to get the etag 134 | let file = store.get(&path).await.unwrap(); 135 | let etag = file.meta.e_tag.clone().unwrap(); 136 | 137 | let opts = GetOptions::new() 138 | .with_if_match(Some(etag)) 139 | .with_range(Some(0..(expected.len() as u64 * 2))); 140 | let res = store.get_opts(&path, opts).await.unwrap(); 141 | assert_eq!(res.bytes().await.unwrap(), expected); 142 | 143 | // pulling a file with an invalid etag should fail 144 | let opts = GetOptions::new() 145 | .with_if_match(Some("invalid-etag")) 146 | .with_range(Some(0..(expected.len() as u64 * 2))); 147 | let err = store.get_opts(&path, opts).await; 148 | assert!(err.is_err()); 149 | } 150 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 19 | 20 | # Rust Object Store 21 | 22 | A focused, easy to use, idiomatic, high performance, `async` object 23 | store library for interacting with object stores. 24 | 25 | Using this crate, the same binary and code can easily run in multiple 26 | clouds and local test environments, via a simple runtime configuration 27 | change. Supported object stores include: 28 | 29 | * [AWS S3](https://aws.amazon.com/s3/) 30 | * [Azure Blob Storage](https://azure.microsoft.com/en-us/services/storage/blobs/) 31 | * [Google Cloud Storage](https://cloud.google.com/storage) 32 | * Local files 33 | * Memory 34 | * [HTTP/WebDAV Storage](https://datatracker.ietf.org/doc/html/rfc2518) 35 | * Custom implementations 36 | 37 | Originally developed by [InfluxData](https://www.influxdata.com/) and later donated to [Apache Arrow](https://arrow.apache.org/). 38 | 39 | See [docs.rs](https://docs.rs/object_store) for usage instructions 40 | 41 | See [CONTRIBUTING.md] to learn how to contribute to this project. 42 | 43 | [CONTRIBUTING.md]: https://github.com/apache/arrow-rs-object-store/blob/main/CONTRIBUTING.md 44 | 45 | ## Support for `wasm32-unknown-unknown` target 46 | 47 | It's possible to build `object_store` for the `wasm32-unknown-unknown` target, however the cloud storage features `aws`, `azure`, `gcp`, and `http` are not supported. 48 | 49 | ``` 50 | cargo build -p object_store --target wasm32-unknown-unknown 51 | ``` 52 | 53 | ## Related Apache Crates 54 | 55 | Here are several related crates in different repositories from other Apache projects. 56 | 57 | | Crate | Description | Documentation | 58 | | ------------------------ | ------------------------------------------- | --------------------------------------- | 59 | | [`object_store_opendal`] | Use [`opendal`] as [`object_store`] backend | [(README)][object_store_opendal-readme] | 60 | 61 | [`object_store_opendal`]: https://crates.io/crates/object_store_opendal 62 | [`opendal`]: https://crates.io/crates/opendal 63 | [object_store_opendal-readme]: https://github.com/apache/opendal/blob/main/integrations/object_store/README.md 64 | 65 | ## Community Extensions 66 | 67 | There following community maintained crates provide additional functionality for `object_store` and are NOT governed by the Apache Software Foundation. We list them below in the hope they may be useful, but they are not official Apache projects or endorsed by the Apache Arrow project. 68 | 69 | | Crate | Description | Documentation | 70 | | ---------------------------- | -------------------------------------------------------------------------------- | ------------------------------------------- | 71 | | [`hdfs_native_object_store`] | Use HDFS as [`object_store`] backend | [(README)][hdfs_native_object_store-readme] | 72 | | [`ic_object_store`] | Use [ICP] blockchain as [`object_store`] backend | [(README)][ic_object_store-readme] | 73 | | [`anda_object_store`] | Extends the [`object_store`] with metadata management and AES-256-GCM encryption | [(README)][anda_object_store-readme] | 74 | 75 | [`hdfs_native_object_store`]: https://crates.io/crates/hdfs_native_object_store 76 | [hdfs_native_object_store-readme]: https://github.com/datafusion-contrib/hdfs-native-object-store 77 | [`ic_object_store`]: https://crates.io/crates/ic_object_store 78 | [ic_object_store-readme]: https://github.com/ldclabs/ic-oss/tree/main/src/ic_object_store 79 | [`anda_object_store`]: https://crates.io/crates/anda_object_store 80 | [anda_object_store-readme]: https://github.com/ldclabs/anda-db/blob/main/rs/anda_object_store 81 | [ICP]: https://www.internetcomputer.org/ 82 | 83 | ## Release Schedule 84 | 85 | The [`object_store`] crate follows [Semantic Versioning]. We aim to release new 86 | versions approximately every 2 months. 87 | 88 | Please see [the release tracker] for an up to date release schedule and to track 89 | the progress of upcoming releases. 90 | 91 | [`object_store`]: https://crates.io/crates/object_store 92 | [semantic versioning]: https://semver.org/ 93 | [the release tracker]: https://github.com/apache/arrow-rs-object-store/issues/392 94 | 95 | Planned Release Schedule 96 | 97 | | Approximate Date | Version | Notes | Ticket | 98 | |------------------|----------|--------------------------------|:-------------------------------------------------------------------| 99 | | Dec 2025 | `0.13.0` | Major, breaking API changes | [#367](https://github.com/apache/arrow-rs-object-store/issues/367) | 100 | | Dec 2025 | `0.12.5` | Minor, NO breaking API changes | [#582](https://github.com/apache/arrow-rs-object-store/issues/582) | 101 | | Feb 2026 | `0.13.1` | Minor, NO breaking API changes | [#393](https://github.com/apache/arrow-rs-object-store/issues/393) | 102 | -------------------------------------------------------------------------------- /src/client/http/spawn.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | use crate::client::{ 19 | HttpError, HttpErrorKind, HttpRequest, HttpResponse, HttpResponseBody, HttpService, 20 | }; 21 | use async_trait::async_trait; 22 | use bytes::Bytes; 23 | use http::Response; 24 | use http_body_util::BodyExt; 25 | use hyper::body::{Body, Frame}; 26 | use std::pin::Pin; 27 | use std::task::{Context, Poll}; 28 | use thiserror::Error; 29 | use tokio::runtime::Handle; 30 | use tokio::task::JoinHandle; 31 | 32 | /// Spawn error 33 | #[derive(Debug, Error)] 34 | #[error("SpawnError")] 35 | struct SpawnError {} 36 | 37 | impl From for HttpError { 38 | fn from(value: SpawnError) -> Self { 39 | Self::new(HttpErrorKind::Interrupted, value) 40 | } 41 | } 42 | 43 | /// Wraps a provided [`HttpService`] and runs it on a separate tokio runtime 44 | /// 45 | /// See example on [`SpawnedReqwestConnector`] 46 | /// 47 | /// [`SpawnedReqwestConnector`]: crate::client::http::SpawnedReqwestConnector 48 | #[derive(Debug)] 49 | pub struct SpawnService { 50 | inner: T, 51 | runtime: Handle, 52 | } 53 | 54 | impl SpawnService { 55 | /// Creates a new [`SpawnService`] from the provided 56 | pub fn new(inner: T, runtime: Handle) -> Self { 57 | Self { inner, runtime } 58 | } 59 | } 60 | 61 | #[async_trait] 62 | impl HttpService for SpawnService { 63 | async fn call(&self, req: HttpRequest) -> Result { 64 | let inner = self.inner.clone(); 65 | let (send, recv) = tokio::sync::oneshot::channel(); 66 | 67 | // We use an unbounded channel to prevent backpressure across the runtime boundary 68 | // which could in turn starve the underlying IO operations 69 | let (sender, receiver) = tokio::sync::mpsc::unbounded_channel(); 70 | 71 | let handle = SpawnHandle(self.runtime.spawn(async move { 72 | let r = match HttpService::call(&inner, req).await { 73 | Ok(resp) => resp, 74 | Err(e) => { 75 | let _ = send.send(Err(e)); 76 | return; 77 | } 78 | }; 79 | 80 | let (parts, mut body) = r.into_parts(); 81 | if send.send(Ok(parts)).is_err() { 82 | return; 83 | } 84 | 85 | while let Some(x) = body.frame().await { 86 | if sender.send(x).is_err() { 87 | return; 88 | } 89 | } 90 | })); 91 | 92 | let parts = recv.await.map_err(|_| SpawnError {})??; 93 | 94 | Ok(Response::from_parts( 95 | parts, 96 | HttpResponseBody::new(SpawnBody { 97 | stream: receiver, 98 | _worker: handle, 99 | }), 100 | )) 101 | } 102 | } 103 | 104 | /// A wrapper around a [`JoinHandle`] that aborts on drop 105 | struct SpawnHandle(JoinHandle<()>); 106 | impl Drop for SpawnHandle { 107 | fn drop(&mut self) { 108 | self.0.abort(); 109 | } 110 | } 111 | 112 | type StreamItem = Result, HttpError>; 113 | 114 | struct SpawnBody { 115 | stream: tokio::sync::mpsc::UnboundedReceiver, 116 | _worker: SpawnHandle, 117 | } 118 | 119 | impl Body for SpawnBody { 120 | type Data = Bytes; 121 | type Error = HttpError; 122 | 123 | fn poll_frame(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { 124 | self.stream.poll_recv(cx) 125 | } 126 | } 127 | 128 | #[cfg(not(target_arch = "wasm32"))] 129 | #[cfg(test)] 130 | mod tests { 131 | use super::*; 132 | use crate::RetryConfig; 133 | use crate::client::HttpClient; 134 | use crate::client::mock_server::MockServer; 135 | use crate::client::retry::RetryExt; 136 | 137 | async fn test_client(client: HttpClient) { 138 | let (send, recv) = tokio::sync::oneshot::channel(); 139 | 140 | let mock = MockServer::new().await; 141 | mock.push(Response::new("BANANAS".to_string())); 142 | 143 | let url = mock.url().to_string(); 144 | let thread = std::thread::spawn(|| { 145 | futures::executor::block_on(async move { 146 | let retry = RetryConfig::default(); 147 | let ret = client.get(url).send_retry(&retry).await.unwrap(); 148 | let payload = ret.into_body().bytes().await.unwrap(); 149 | assert_eq!(payload.as_ref(), b"BANANAS"); 150 | let _ = send.send(()); 151 | }) 152 | }); 153 | recv.await.unwrap(); 154 | thread.join().unwrap(); 155 | } 156 | 157 | #[tokio::test] 158 | async fn test_spawn() { 159 | let client = HttpClient::new(SpawnService::new(reqwest::Client::new(), Handle::current())); 160 | test_client(client).await; 161 | } 162 | 163 | #[tokio::test] 164 | #[should_panic] 165 | async fn test_no_spawn() { 166 | let client = HttpClient::new(reqwest::Client::new()); 167 | test_client(client).await; 168 | } 169 | } 170 | -------------------------------------------------------------------------------- /src/client/header.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | //! Logic for extracting ObjectMeta from headers used by AWS, GCP and Azure 19 | 20 | use crate::ObjectMeta; 21 | use crate::path::Path; 22 | use chrono::{DateTime, TimeZone, Utc}; 23 | use http::HeaderMap; 24 | use http::header::{CONTENT_LENGTH, ETAG, LAST_MODIFIED}; 25 | 26 | #[derive(Debug, Copy, Clone)] 27 | /// Configuration for header extraction 28 | pub(crate) struct HeaderConfig { 29 | /// Whether to require an ETag header when extracting [`ObjectMeta`] from headers. 30 | /// 31 | /// Defaults to `true` 32 | pub etag_required: bool, 33 | 34 | /// Whether to require a Last-Modified header when extracting [`ObjectMeta`] from headers. 35 | /// 36 | /// Defaults to `true` 37 | pub last_modified_required: bool, 38 | 39 | /// The version header name if any 40 | pub version_header: Option<&'static str>, 41 | 42 | /// The user defined metadata prefix if any 43 | pub user_defined_metadata_prefix: Option<&'static str>, 44 | } 45 | 46 | #[derive(Debug, thiserror::Error)] 47 | pub(crate) enum Error { 48 | #[error("ETag Header missing from response")] 49 | MissingEtag, 50 | 51 | #[error("Received header containing non-ASCII data")] 52 | BadHeader { source: reqwest::header::ToStrError }, 53 | 54 | #[error("Last-Modified Header missing from response")] 55 | MissingLastModified, 56 | 57 | #[error("Content-Length Header missing from response")] 58 | MissingContentLength, 59 | 60 | #[error("Invalid last modified '{}': {}", last_modified, source)] 61 | InvalidLastModified { 62 | last_modified: String, 63 | source: chrono::ParseError, 64 | }, 65 | 66 | #[error("Invalid content length '{}': {}", content_length, source)] 67 | InvalidContentLength { 68 | content_length: String, 69 | source: std::num::ParseIntError, 70 | }, 71 | } 72 | 73 | /// Extracts a PutResult from the provided [`HeaderMap`] 74 | #[cfg(any(feature = "aws", feature = "gcp", feature = "azure"))] 75 | pub(crate) fn get_put_result( 76 | headers: &HeaderMap, 77 | version: &str, 78 | ) -> Result { 79 | let e_tag = Some(get_etag(headers)?); 80 | let version = get_version(headers, version)?; 81 | Ok(crate::PutResult { e_tag, version }) 82 | } 83 | 84 | /// Extracts a optional version from the provided [`HeaderMap`] 85 | #[cfg(any(feature = "aws", feature = "gcp", feature = "azure"))] 86 | pub(crate) fn get_version(headers: &HeaderMap, version: &str) -> Result, Error> { 87 | Ok(match headers.get(version) { 88 | Some(x) => Some( 89 | x.to_str() 90 | .map_err(|source| Error::BadHeader { source })? 91 | .to_string(), 92 | ), 93 | None => None, 94 | }) 95 | } 96 | 97 | /// Extracts an etag from the provided [`HeaderMap`] 98 | pub(crate) fn get_etag(headers: &HeaderMap) -> Result { 99 | let e_tag = headers.get(ETAG).ok_or(Error::MissingEtag)?; 100 | Ok(e_tag 101 | .to_str() 102 | .map_err(|source| Error::BadHeader { source })? 103 | .to_string()) 104 | } 105 | 106 | /// Extracts [`ObjectMeta`] from the provided [`HeaderMap`] 107 | pub(crate) fn header_meta( 108 | location: &Path, 109 | headers: &HeaderMap, 110 | cfg: HeaderConfig, 111 | ) -> Result { 112 | let last_modified = match headers.get(LAST_MODIFIED) { 113 | Some(last_modified) => { 114 | let last_modified = last_modified 115 | .to_str() 116 | .map_err(|source| Error::BadHeader { source })?; 117 | 118 | DateTime::parse_from_rfc2822(last_modified) 119 | .map_err(|source| Error::InvalidLastModified { 120 | last_modified: last_modified.into(), 121 | source, 122 | })? 123 | .with_timezone(&Utc) 124 | } 125 | None if cfg.last_modified_required => return Err(Error::MissingLastModified), 126 | None => Utc.timestamp_nanos(0), 127 | }; 128 | 129 | let e_tag = match get_etag(headers) { 130 | Ok(e_tag) => Some(e_tag), 131 | Err(Error::MissingEtag) if !cfg.etag_required => None, 132 | Err(e) => return Err(e), 133 | }; 134 | 135 | let content_length = headers 136 | .get(CONTENT_LENGTH) 137 | .ok_or(Error::MissingContentLength)?; 138 | 139 | let content_length = content_length 140 | .to_str() 141 | .map_err(|source| Error::BadHeader { source })?; 142 | 143 | let size = content_length 144 | .parse() 145 | .map_err(|source| Error::InvalidContentLength { 146 | content_length: content_length.into(), 147 | source, 148 | })?; 149 | 150 | let version = match cfg.version_header.and_then(|h| headers.get(h)) { 151 | Some(v) => Some( 152 | v.to_str() 153 | .map_err(|source| Error::BadHeader { source })? 154 | .to_string(), 155 | ), 156 | None => None, 157 | }; 158 | 159 | Ok(ObjectMeta { 160 | location: location.clone(), 161 | last_modified, 162 | version, 163 | size, 164 | e_tag, 165 | }) 166 | } 167 | -------------------------------------------------------------------------------- /src/client/token.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | use std::future::Future; 19 | use std::time::{Duration, Instant}; 20 | use tokio::sync::Mutex; 21 | 22 | /// A temporary authentication token with an associated expiry 23 | #[derive(Debug, Clone)] 24 | pub(crate) struct TemporaryToken { 25 | /// The temporary credential 26 | pub token: T, 27 | /// The instant at which this credential is no longer valid 28 | /// None means the credential does not expire 29 | pub expiry: Option, 30 | } 31 | 32 | /// Provides [`TokenCache::get_or_insert_with`] which can be used to cache a 33 | /// [`TemporaryToken`] based on its expiry 34 | #[derive(Debug)] 35 | pub(crate) struct TokenCache { 36 | cache: Mutex, Instant)>>, 37 | min_ttl: Duration, 38 | fetch_backoff: Duration, 39 | } 40 | 41 | impl Default for TokenCache { 42 | fn default() -> Self { 43 | Self { 44 | cache: Default::default(), 45 | min_ttl: Duration::from_secs(300), 46 | // How long to wait before re-attempting a token fetch after receiving one that 47 | // is still within the min-ttl 48 | fetch_backoff: Duration::from_millis(100), 49 | } 50 | } 51 | } 52 | 53 | impl TokenCache { 54 | /// Override the minimum remaining TTL for a cached token to be used 55 | #[cfg(any(feature = "aws", feature = "gcp"))] 56 | pub(crate) fn with_min_ttl(self, min_ttl: Duration) -> Self { 57 | Self { min_ttl, ..self } 58 | } 59 | 60 | pub(crate) async fn get_or_insert_with(&self, f: F) -> Result 61 | where 62 | F: FnOnce() -> Fut + Send, 63 | Fut: Future, E>> + Send, 64 | { 65 | let now = Instant::now(); 66 | let mut locked = self.cache.lock().await; 67 | 68 | if let Some((cached, fetched_at)) = locked.as_ref() { 69 | match cached.expiry { 70 | Some(ttl) => { 71 | if ttl.checked_duration_since(now).unwrap_or_default() > self.min_ttl || 72 | // if we've recently attempted to fetch this token and it's not actually 73 | // expired, we'll wait to re-fetch it and return the cached one 74 | (fetched_at.elapsed() < self.fetch_backoff && ttl.checked_duration_since(now).is_some()) 75 | { 76 | return Ok(cached.token.clone()); 77 | } 78 | } 79 | None => return Ok(cached.token.clone()), 80 | } 81 | } 82 | 83 | let cached = f().await?; 84 | let token = cached.token.clone(); 85 | *locked = Some((cached, Instant::now())); 86 | 87 | Ok(token) 88 | } 89 | } 90 | 91 | #[cfg(test)] 92 | mod test { 93 | use crate::client::token::{TemporaryToken, TokenCache}; 94 | use std::sync::atomic::{AtomicU32, Ordering}; 95 | use std::time::{Duration, Instant}; 96 | 97 | // Helper function to create a token with a specific expiry duration from now 98 | fn create_token(expiry_duration: Option) -> TemporaryToken { 99 | TemporaryToken { 100 | token: "test_token".to_string(), 101 | expiry: expiry_duration.map(|d| Instant::now() + d), 102 | } 103 | } 104 | 105 | #[tokio::test] 106 | async fn test_expired_token_is_refreshed() { 107 | let cache = TokenCache::default(); 108 | static COUNTER: AtomicU32 = AtomicU32::new(0); 109 | 110 | async fn get_token() -> Result, String> { 111 | COUNTER.fetch_add(1, Ordering::SeqCst); 112 | Ok::<_, String>(create_token(Some(Duration::from_secs(0)))) 113 | } 114 | 115 | // Should fetch initial token 116 | let _ = cache.get_or_insert_with(get_token).await.unwrap(); 117 | assert_eq!(COUNTER.load(Ordering::SeqCst), 1); 118 | 119 | tokio::time::sleep(Duration::from_millis(2)).await; 120 | 121 | // Token is expired, so should fetch again 122 | let _ = cache.get_or_insert_with(get_token).await.unwrap(); 123 | assert_eq!(COUNTER.load(Ordering::SeqCst), 2); 124 | } 125 | 126 | #[tokio::test] 127 | async fn test_min_ttl_causes_refresh() { 128 | let cache = TokenCache { 129 | cache: Default::default(), 130 | min_ttl: Duration::from_secs(1), 131 | fetch_backoff: Duration::from_millis(1), 132 | }; 133 | 134 | static COUNTER: AtomicU32 = AtomicU32::new(0); 135 | 136 | async fn get_token() -> Result, String> { 137 | COUNTER.fetch_add(1, Ordering::SeqCst); 138 | Ok::<_, String>(create_token(Some(Duration::from_millis(100)))) 139 | } 140 | 141 | // Initial fetch 142 | let _ = cache.get_or_insert_with(get_token).await.unwrap(); 143 | assert_eq!(COUNTER.load(Ordering::SeqCst), 1); 144 | 145 | // Should not fetch again since not expired and within fetch_backoff 146 | let _ = cache.get_or_insert_with(get_token).await.unwrap(); 147 | assert_eq!(COUNTER.load(Ordering::SeqCst), 1); 148 | 149 | tokio::time::sleep(Duration::from_millis(2)).await; 150 | 151 | // Should fetch, since we've passed fetch_backoff 152 | let _ = cache.get_or_insert_with(get_token).await.unwrap(); 153 | assert_eq!(COUNTER.load(Ordering::SeqCst), 2); 154 | } 155 | } 156 | -------------------------------------------------------------------------------- /src/client/backoff.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | use rand::{prelude::*, rng}; 19 | use std::time::Duration; 20 | 21 | /// Exponential backoff with decorrelated jitter algorithm 22 | /// 23 | /// The first backoff will always be `init_backoff`. 24 | /// 25 | /// Subsequent backoffs will pick a random value between `init_backoff` and 26 | /// `base * previous` where `previous` is the duration of the previous backoff 27 | /// 28 | /// See 29 | #[allow(missing_copy_implementations)] 30 | #[derive(Debug, Clone)] 31 | pub struct BackoffConfig { 32 | /// The initial backoff duration 33 | pub init_backoff: Duration, 34 | /// The maximum backoff duration 35 | pub max_backoff: Duration, 36 | /// The multiplier to use for the next backoff duration 37 | pub base: f64, 38 | } 39 | 40 | impl Default for BackoffConfig { 41 | fn default() -> Self { 42 | Self { 43 | init_backoff: Duration::from_millis(100), 44 | max_backoff: Duration::from_secs(15), 45 | base: 2., 46 | } 47 | } 48 | } 49 | 50 | /// [`Backoff`] can be created from a [`BackoffConfig`] 51 | /// 52 | /// Consecutive calls to [`Backoff::next`] will return the next backoff interval 53 | /// 54 | pub(crate) struct Backoff { 55 | init_backoff: f64, 56 | next_backoff_secs: f64, 57 | max_backoff_secs: f64, 58 | base: f64, 59 | rng: Option>, 60 | } 61 | 62 | impl std::fmt::Debug for Backoff { 63 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 64 | f.debug_struct("Backoff") 65 | .field("init_backoff", &self.init_backoff) 66 | .field("next_backoff_secs", &self.next_backoff_secs) 67 | .field("max_backoff_secs", &self.max_backoff_secs) 68 | .field("base", &self.base) 69 | .finish() 70 | } 71 | } 72 | 73 | impl Backoff { 74 | /// Create a new [`Backoff`] from the provided [`BackoffConfig`] 75 | pub(crate) fn new(config: &BackoffConfig) -> Self { 76 | Self::new_with_rng(config, None) 77 | } 78 | 79 | /// Creates a new `Backoff` with the optional `rng` 80 | /// 81 | /// Used [`rand::rng()`] if no rng provided 82 | pub(crate) fn new_with_rng( 83 | config: &BackoffConfig, 84 | rng: Option>, 85 | ) -> Self { 86 | let init_backoff = config.init_backoff.as_secs_f64(); 87 | Self { 88 | init_backoff, 89 | next_backoff_secs: init_backoff, 90 | max_backoff_secs: config.max_backoff.as_secs_f64(), 91 | base: config.base, 92 | rng, 93 | } 94 | } 95 | 96 | /// Returns the next backoff duration to wait for 97 | pub(crate) fn next(&mut self) -> Duration { 98 | let range = self.init_backoff..(self.next_backoff_secs * self.base); 99 | 100 | let rand_backoff = match self.rng.as_mut() { 101 | Some(rng) => rng.random_range(range), 102 | None => rng().random_range(range), 103 | }; 104 | 105 | let next_backoff = self.max_backoff_secs.min(rand_backoff); 106 | Duration::from_secs_f64(std::mem::replace(&mut self.next_backoff_secs, next_backoff)) 107 | } 108 | } 109 | 110 | #[cfg(test)] 111 | mod tests { 112 | use super::*; 113 | use rand::rand_core::impls::fill_bytes_via_next; 114 | 115 | struct FixedRng(u64); 116 | 117 | impl RngCore for FixedRng { 118 | fn next_u32(&mut self) -> u32 { 119 | self.0 as _ 120 | } 121 | 122 | fn next_u64(&mut self) -> u64 { 123 | self.0 124 | } 125 | 126 | fn fill_bytes(&mut self, dst: &mut [u8]) { 127 | fill_bytes_via_next(self, dst) 128 | } 129 | } 130 | 131 | #[test] 132 | fn test_backoff() { 133 | let init_backoff_secs = 1.; 134 | let max_backoff_secs = 500.; 135 | let base = 3.; 136 | 137 | let config = BackoffConfig { 138 | init_backoff: Duration::from_secs_f64(init_backoff_secs), 139 | max_backoff: Duration::from_secs_f64(max_backoff_secs), 140 | base, 141 | }; 142 | 143 | let assert_fuzzy_eq = |a: f64, b: f64| assert!((b - a).abs() < 0.0001, "{a} != {b}"); 144 | 145 | // Create a static rng that takes the minimum of the range 146 | let rng = Box::new(FixedRng(0)); 147 | let mut backoff = Backoff::new_with_rng(&config, Some(rng)); 148 | 149 | for _ in 0..20 { 150 | assert_eq!(backoff.next().as_secs_f64(), init_backoff_secs); 151 | } 152 | 153 | // Create a static rng that takes the maximum of the range 154 | let rng = Box::new(FixedRng(u64::MAX)); 155 | let mut backoff = Backoff::new_with_rng(&config, Some(rng)); 156 | 157 | for i in 0..20 { 158 | let value = (base.powi(i) * init_backoff_secs).min(max_backoff_secs); 159 | assert_fuzzy_eq(backoff.next().as_secs_f64(), value); 160 | } 161 | 162 | // Create a static rng that takes the mid point of the range 163 | let rng = Box::new(FixedRng(u64::MAX / 2)); 164 | let mut backoff = Backoff::new_with_rng(&config, Some(rng)); 165 | 166 | let mut value = init_backoff_secs; 167 | for _ in 0..20 { 168 | assert_fuzzy_eq(backoff.next().as_secs_f64(), value); 169 | value = 170 | (init_backoff_secs + (value * base - init_backoff_secs) / 2.).min(max_backoff_secs); 171 | } 172 | } 173 | } 174 | -------------------------------------------------------------------------------- /src/path/parts.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | use percent_encoding::{AsciiSet, CONTROLS, percent_encode}; 19 | use std::{ 20 | borrow::Cow, 21 | iter::{self, FusedIterator}, 22 | str::SplitTerminator, 23 | }; 24 | 25 | use crate::path::DELIMITER_BYTE; 26 | 27 | /// Error returned by [`PathPart::parse`] 28 | #[derive(Debug, thiserror::Error)] 29 | #[error( 30 | "Encountered illegal character sequence \"{}\" whilst parsing path segment \"{}\"", 31 | illegal, 32 | segment 33 | )] 34 | #[allow(missing_copy_implementations)] 35 | pub struct InvalidPart { 36 | segment: String, 37 | illegal: String, 38 | } 39 | 40 | /// The PathPart type exists to validate the directory/file names that form part 41 | /// of a path. 42 | /// 43 | /// A [`PathPart`] is guaranteed to: 44 | /// 45 | /// * Contain no ASCII control characters or `/` 46 | /// * Not be a relative path segment, i.e. `.` or `..` 47 | #[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Debug, Default, Hash)] 48 | pub struct PathPart<'a> { 49 | pub(super) raw: Cow<'a, str>, 50 | } 51 | 52 | impl<'a> PathPart<'a> { 53 | /// Parse the provided path segment as a [`PathPart`] returning an error if invalid 54 | pub fn parse(segment: &'a str) -> Result { 55 | if segment == "." || segment == ".." { 56 | return Err(InvalidPart { 57 | segment: segment.to_string(), 58 | illegal: segment.to_string(), 59 | }); 60 | } 61 | 62 | for c in segment.chars() { 63 | if c.is_ascii_control() || c == '/' { 64 | return Err(InvalidPart { 65 | segment: segment.to_string(), 66 | // This is correct as only single byte characters up to this point 67 | illegal: c.to_string(), 68 | }); 69 | } 70 | } 71 | 72 | Ok(Self { 73 | raw: segment.into(), 74 | }) 75 | } 76 | } 77 | 78 | /// Characters we want to encode. 79 | const INVALID: &AsciiSet = &CONTROLS 80 | // The delimiter we are reserving for internal hierarchy 81 | .add(DELIMITER_BYTE) 82 | // Characters AWS recommends avoiding for object keys 83 | // https://docs.aws.amazon.com/AmazonS3/latest/dev/UsingMetadata.html 84 | .add(b'\\') 85 | .add(b'{') 86 | .add(b'^') 87 | .add(b'}') 88 | .add(b'%') 89 | .add(b'`') 90 | .add(b']') 91 | .add(b'"') // " <-- my editor is confused about double quotes within single quotes 92 | .add(b'>') 93 | .add(b'[') 94 | .add(b'~') 95 | .add(b'<') 96 | .add(b'#') 97 | .add(b'|') 98 | // Characters Google Cloud Storage recommends avoiding for object names 99 | // https://cloud.google.com/storage/docs/naming-objects 100 | .add(b'\r') 101 | .add(b'\n') 102 | .add(b'*') 103 | .add(b'?'); 104 | 105 | impl<'a> From<&'a [u8]> for PathPart<'a> { 106 | fn from(v: &'a [u8]) -> Self { 107 | let inner = match v { 108 | // We don't want to encode `.` generally, but we do want to disallow parts of paths 109 | // to be equal to `.` or `..` to prevent file system traversal shenanigans. 110 | b"." => "%2E".into(), 111 | b".." => "%2E%2E".into(), 112 | other => percent_encode(other, INVALID).into(), 113 | }; 114 | Self { raw: inner } 115 | } 116 | } 117 | 118 | impl<'a> From<&'a str> for PathPart<'a> { 119 | fn from(v: &'a str) -> Self { 120 | Self::from(v.as_bytes()) 121 | } 122 | } 123 | 124 | impl From for PathPart<'static> { 125 | fn from(s: String) -> Self { 126 | Self { 127 | raw: Cow::Owned(PathPart::from(s.as_str()).raw.into_owned()), 128 | } 129 | } 130 | } 131 | 132 | impl AsRef for PathPart<'_> { 133 | fn as_ref(&self) -> &str { 134 | self.raw.as_ref() 135 | } 136 | } 137 | 138 | /// See [`Path::parts`](super::Path::parts) 139 | #[derive(Debug, Clone)] 140 | pub struct PathParts<'a>(iter::Map, fn(&str) -> PathPart<'_>>); 141 | 142 | impl<'a> PathParts<'a> { 143 | /// Create an iterator over the parts of the provided raw [`Path`](super::Path). 144 | pub(super) fn new(raw: &'a str) -> Self { 145 | Self( 146 | raw.split_terminator(super::DELIMITER_CHAR) 147 | .map(|s| PathPart { raw: s.into() }), 148 | ) 149 | } 150 | } 151 | 152 | impl<'a> Iterator for PathParts<'a> { 153 | type Item = PathPart<'a>; 154 | 155 | fn next(&mut self) -> Option { 156 | self.0.next() 157 | } 158 | } 159 | 160 | impl<'a> FusedIterator for PathParts<'a> {} 161 | 162 | impl<'a> DoubleEndedIterator for PathParts<'a> { 163 | fn next_back(&mut self) -> Option { 164 | self.0.next_back() 165 | } 166 | } 167 | 168 | #[cfg(test)] 169 | mod tests { 170 | use super::*; 171 | 172 | #[test] 173 | fn path_part_delimiter_gets_encoded() { 174 | let part: PathPart<'_> = "foo/bar".into(); 175 | assert_eq!(part.raw, "foo%2Fbar"); 176 | } 177 | 178 | #[test] 179 | fn path_part_given_already_encoded_string() { 180 | let part: PathPart<'_> = "foo%2Fbar".into(); 181 | assert_eq!(part.raw, "foo%252Fbar"); 182 | } 183 | 184 | #[test] 185 | fn path_part_cant_be_one_dot() { 186 | let part: PathPart<'_> = ".".into(); 187 | assert_eq!(part.raw, "%2E"); 188 | } 189 | 190 | #[test] 191 | fn path_part_cant_be_two_dots() { 192 | let part: PathPart<'_> = "..".into(); 193 | assert_eq!(part.raw, "%2E%2E"); 194 | } 195 | 196 | #[test] 197 | fn path_part_parse() { 198 | PathPart::parse("foo").unwrap(); 199 | PathPart::parse("foo/bar").unwrap_err(); 200 | 201 | // Test percent-encoded path 202 | PathPart::parse("foo%2Fbar").unwrap(); 203 | PathPart::parse("L%3ABC.parquet").unwrap(); 204 | 205 | // Test path containing bad escape sequence 206 | PathPart::parse("%Z").unwrap(); 207 | PathPart::parse("%%").unwrap(); 208 | } 209 | } 210 | -------------------------------------------------------------------------------- /src/client/http/body.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | use crate::client::{HttpError, HttpErrorKind}; 19 | use crate::{PutPayload, collect_bytes}; 20 | use bytes::Bytes; 21 | use futures::StreamExt; 22 | use futures::stream::BoxStream; 23 | use http_body_util::combinators::BoxBody; 24 | use http_body_util::{BodyExt, Full}; 25 | use hyper::body::{Body, Frame, SizeHint}; 26 | use std::pin::Pin; 27 | use std::task::{Context, Poll}; 28 | 29 | /// An HTTP Request 30 | pub type HttpRequest = http::Request; 31 | 32 | /// The [`Body`] of an [`HttpRequest`] 33 | #[derive(Debug, Clone)] 34 | pub struct HttpRequestBody(Inner); 35 | 36 | impl HttpRequestBody { 37 | /// An empty [`HttpRequestBody`] 38 | pub fn empty() -> Self { 39 | Self(Inner::Bytes(Bytes::new())) 40 | } 41 | 42 | #[cfg(not(target_arch = "wasm32"))] 43 | pub(crate) fn into_reqwest(self) -> reqwest::Body { 44 | match self.0 { 45 | Inner::Bytes(b) => b.into(), 46 | Inner::PutPayload(_, payload) => reqwest::Body::wrap_stream(futures::stream::iter( 47 | payload.into_iter().map(Ok::<_, HttpError>), 48 | )), 49 | } 50 | } 51 | 52 | #[cfg(all(target_arch = "wasm32", target_os = "unknown"))] 53 | pub(crate) fn into_reqwest(self) -> reqwest::Body { 54 | match self.0 { 55 | Inner::Bytes(b) => b.into(), 56 | Inner::PutPayload(_, payload) => Bytes::from(payload).into(), 57 | } 58 | } 59 | 60 | /// Returns true if this body is empty 61 | pub fn is_empty(&self) -> bool { 62 | match &self.0 { 63 | Inner::Bytes(x) => x.is_empty(), 64 | Inner::PutPayload(_, x) => x.iter().any(|x| !x.is_empty()), 65 | } 66 | } 67 | 68 | /// Returns the total length of the [`Bytes`] in this body 69 | pub fn content_length(&self) -> usize { 70 | match &self.0 { 71 | Inner::Bytes(x) => x.len(), 72 | Inner::PutPayload(_, x) => x.content_length(), 73 | } 74 | } 75 | 76 | /// If this body consists of a single contiguous [`Bytes`], returns it 77 | pub fn as_bytes(&self) -> Option<&Bytes> { 78 | match &self.0 { 79 | Inner::Bytes(x) => Some(x), 80 | _ => None, 81 | } 82 | } 83 | } 84 | 85 | impl From for HttpRequestBody { 86 | fn from(value: Bytes) -> Self { 87 | Self(Inner::Bytes(value)) 88 | } 89 | } 90 | 91 | impl From> for HttpRequestBody { 92 | fn from(value: Vec) -> Self { 93 | Self(Inner::Bytes(value.into())) 94 | } 95 | } 96 | 97 | impl From for HttpRequestBody { 98 | fn from(value: String) -> Self { 99 | Self(Inner::Bytes(value.into())) 100 | } 101 | } 102 | 103 | impl From for HttpRequestBody { 104 | fn from(value: PutPayload) -> Self { 105 | Self(Inner::PutPayload(0, value)) 106 | } 107 | } 108 | 109 | #[derive(Debug, Clone)] 110 | enum Inner { 111 | Bytes(Bytes), 112 | PutPayload(usize, PutPayload), 113 | } 114 | 115 | impl Body for HttpRequestBody { 116 | type Data = Bytes; 117 | type Error = HttpError; 118 | 119 | fn poll_frame( 120 | mut self: Pin<&mut Self>, 121 | _cx: &mut Context<'_>, 122 | ) -> Poll, Self::Error>>> { 123 | Poll::Ready(match &mut self.0 { 124 | Inner::Bytes(bytes) => { 125 | let out = bytes.split_off(0); 126 | if out.is_empty() { 127 | None 128 | } else { 129 | Some(Ok(Frame::data(out))) 130 | } 131 | } 132 | Inner::PutPayload(offset, payload) => { 133 | let slice = payload.as_ref(); 134 | if *offset == slice.len() { 135 | None 136 | } else { 137 | Some(Ok(Frame::data( 138 | slice[std::mem::replace(offset, *offset + 1)].clone(), 139 | ))) 140 | } 141 | } 142 | }) 143 | } 144 | 145 | fn is_end_stream(&self) -> bool { 146 | match self.0 { 147 | Inner::Bytes(ref bytes) => bytes.is_empty(), 148 | Inner::PutPayload(offset, ref body) => offset == body.as_ref().len(), 149 | } 150 | } 151 | 152 | fn size_hint(&self) -> SizeHint { 153 | match self.0 { 154 | Inner::Bytes(ref bytes) => SizeHint::with_exact(bytes.len() as u64), 155 | Inner::PutPayload(offset, ref payload) => { 156 | let iter = payload.as_ref().iter().skip(offset); 157 | SizeHint::with_exact(iter.map(|x| x.len() as u64).sum()) 158 | } 159 | } 160 | } 161 | } 162 | 163 | /// An HTTP response 164 | pub type HttpResponse = http::Response; 165 | 166 | /// The body of an [`HttpResponse`] 167 | #[derive(Debug)] 168 | pub struct HttpResponseBody(BoxBody); 169 | 170 | impl HttpResponseBody { 171 | /// Create an [`HttpResponseBody`] from the provided [`Body`] 172 | /// 173 | /// Note: [`BodyExt::map_err`] can be used to alter error variants 174 | pub fn new(body: B) -> Self 175 | where 176 | B: Body + Send + Sync + 'static, 177 | { 178 | Self(BoxBody::new(body)) 179 | } 180 | 181 | /// Collects this response into a [`Bytes`] 182 | pub async fn bytes(self) -> Result { 183 | let size_hint = self.0.size_hint().lower(); 184 | let s = self.0.into_data_stream(); 185 | collect_bytes(s, Some(size_hint)).await 186 | } 187 | 188 | /// Returns a stream of this response data 189 | pub fn bytes_stream(self) -> BoxStream<'static, Result> { 190 | self.0.into_data_stream().boxed() 191 | } 192 | 193 | /// Returns the response as a [`String`] 194 | pub(crate) async fn text(self) -> Result { 195 | let b = self.bytes().await?; 196 | String::from_utf8(b.into()).map_err(|e| HttpError::new(HttpErrorKind::Decode, e)) 197 | } 198 | 199 | #[cfg(any(feature = "aws", feature = "gcp", feature = "azure"))] 200 | pub(crate) async fn json(self) -> Result { 201 | let b = self.bytes().await?; 202 | serde_json::from_slice(&b).map_err(|e| HttpError::new(HttpErrorKind::Decode, e)) 203 | } 204 | } 205 | 206 | impl Body for HttpResponseBody { 207 | type Data = Bytes; 208 | type Error = HttpError; 209 | 210 | fn poll_frame( 211 | mut self: Pin<&mut Self>, 212 | cx: &mut Context<'_>, 213 | ) -> Poll, Self::Error>>> { 214 | Pin::new(&mut self.0).poll_frame(cx) 215 | } 216 | 217 | fn is_end_stream(&self) -> bool { 218 | self.0.is_end_stream() 219 | } 220 | 221 | fn size_hint(&self) -> SizeHint { 222 | self.0.size_hint() 223 | } 224 | } 225 | 226 | impl From for HttpResponseBody { 227 | fn from(value: Bytes) -> Self { 228 | Self::new(Full::new(value).map_err(|e| match e {})) 229 | } 230 | } 231 | 232 | impl From> for HttpResponseBody { 233 | fn from(value: Vec) -> Self { 234 | Bytes::from(value).into() 235 | } 236 | } 237 | 238 | impl From for HttpResponseBody { 239 | fn from(value: String) -> Self { 240 | Bytes::from(value).into() 241 | } 242 | } 243 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | 19 | 20 | # Development instructions 21 | 22 | ## Running Tests 23 | 24 | Tests can be run using `cargo` 25 | 26 | ```shell 27 | cargo test 28 | ``` 29 | 30 | ## Running Integration Tests 31 | 32 | By default, integration tests are not run. To run them you will need to set `TEST_INTEGRATION=1` and then provide the 33 | necessary configuration for that object store 34 | 35 | ### AWS 36 | 37 | To test the S3 integration against [localstack](https://localstack.cloud/) 38 | 39 | First start up a container running localstack 40 | 41 | ```shell 42 | LOCALSTACK_VERSION=sha256:a0b79cb2430f1818de2c66ce89d41bba40f5a1823410f5a7eaf3494b692eed97 43 | podman run -d -p 4566:4566 localstack/localstack@$LOCALSTACK_VERSION 44 | podman run -d -p 1338:1338 amazon/amazon-ec2-metadata-mock:v1.9.2 --imdsv2 45 | ``` 46 | 47 | Setup environment 48 | 49 | ```shell 50 | export TEST_INTEGRATION=1 51 | export AWS_DEFAULT_REGION=us-east-1 52 | export AWS_ACCESS_KEY_ID=test 53 | export AWS_SECRET_ACCESS_KEY=test 54 | export AWS_ENDPOINT=http://localhost:4566 55 | export AWS_ALLOW_HTTP=true 56 | export AWS_BUCKET_NAME=test-bucket 57 | ``` 58 | 59 | Create a bucket using the AWS CLI 60 | 61 | ```shell 62 | podman run --net=host --env-host amazon/aws-cli --endpoint-url=http://localhost:4566 s3 mb s3://test-bucket 63 | ``` 64 | 65 | Or directly with: 66 | 67 | ```shell 68 | aws s3 mb s3://test-bucket --endpoint-url=http://localhost:4566 69 | aws --endpoint-url=http://localhost:4566 s3 mb s3://test-bucket-for-spawn 70 | aws --endpoint-url=http://localhost:4566 dynamodb create-table --table-name test-table --key-schema AttributeName=path,KeyType=HASH AttributeName=etag,KeyType=RANGE --attribute-definitions AttributeName=path,AttributeType=S AttributeName=etag,AttributeType=S --provisioned-throughput ReadCapacityUnits=5,WriteCapacityUnits=5 71 | ``` 72 | 73 | Run tests 74 | 75 | ```shell 76 | cargo test --features aws 77 | ``` 78 | 79 | #### Encryption tests 80 | 81 | To create an encryption key for the tests, you can run the following command: 82 | 83 | ```shell 84 | export AWS_SSE_KMS_KEY_ID=$(aws --endpoint-url=http://localhost:4566 \ 85 | kms create-key --description "test key" | 86 | jq -r '.KeyMetadata.KeyId') 87 | ``` 88 | 89 | To run integration tests with encryption, you can set the following environment variables: 90 | 91 | ```shell 92 | export AWS_SERVER_SIDE_ENCRYPTION=aws:kms 93 | export AWS_SSE_BUCKET_KEY=false 94 | cargo test --features aws 95 | ``` 96 | 97 | As well as: 98 | 99 | ```shell 100 | unset AWS_SSE_BUCKET_KEY 101 | export AWS_SERVER_SIDE_ENCRYPTION=aws:kms:dsse 102 | cargo test --features aws 103 | ``` 104 | 105 | #### SSE-C Encryption tests 106 | 107 | Unfortunately, localstack does not support SSE-C encryption (https://github.com/localstack/localstack/issues/11356). 108 | 109 | We will use [MinIO](https://min.io/docs/minio/container/operations/server-side-encryption.html) to test SSE-C encryption. 110 | 111 | First, create a self-signed certificate to enable HTTPS for MinIO, as SSE-C requires HTTPS. 112 | 113 | ```shell 114 | mkdir ~/certs 115 | cd ~/certs 116 | openssl genpkey -algorithm RSA -out private.key 117 | openssl req -new -key private.key -out request.csr -subj "/C=US/ST=State/L=City/O=Organization/OU=Unit/CN=example.com/emailAddress=email@example.com" 118 | openssl x509 -req -days 365 -in request.csr -signkey private.key -out public.crt 119 | rm request.csr 120 | ``` 121 | 122 | Second, start MinIO with the self-signed certificate. 123 | 124 | ```shell 125 | docker run -d \ 126 | -p 9000:9000 \ 127 | --name minio \ 128 | -v ${HOME}/certs:/root/.minio/certs \ 129 | -e "MINIO_ROOT_USER=minio" \ 130 | -e "MINIO_ROOT_PASSWORD=minio123" \ 131 | minio/minio server /data 132 | ``` 133 | 134 | Create a test bucket. 135 | 136 | ```shell 137 | export AWS_BUCKET_NAME=test-bucket 138 | export AWS_ACCESS_KEY_ID=minio 139 | export AWS_SECRET_ACCESS_KEY=minio123 140 | export AWS_ENDPOINT=https://localhost:9000 141 | aws s3 mb s3://test-bucket --endpoint-url=https://localhost:9000 --no-verify-ssl 142 | ``` 143 | 144 | Run the tests. The real test is `test_s3_ssec_encryption_with_minio()` 145 | 146 | ```shell 147 | export TEST_S3_SSEC_ENCRYPTION=1 148 | cargo test --features aws --package object_store --lib aws::tests::test_s3_ssec_encryption_with_minio -- --exact --nocapture 149 | ``` 150 | 151 | ### Azure 152 | 153 | To test the Azure integration 154 | against [azurite](https://docs.microsoft.com/en-us/azure/storage/common/storage-use-azurite?tabs=visual-studio) 155 | 156 | Startup azurite 157 | 158 | ```shell 159 | podman run -p 10000:10000 -p 10001:10001 -p 10002:10002 mcr.microsoft.com/azure-storage/azurite 160 | ``` 161 | 162 | Create a bucket 163 | 164 | ```shell 165 | podman run --net=host mcr.microsoft.com/azure-cli az storage container create -n test-bucket --connection-string 'DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=http://127.0.0.1:10000/devstoreaccount1;QueueEndpoint=http://127.0.0.1:10001/devstoreaccount1;' 166 | ``` 167 | 168 | Run tests 169 | 170 | ```shell 171 | AZURE_USE_EMULATOR=1 \ 172 | TEST_INTEGRATION=1 \ 173 | AZURE_CONTAINER_NAME=test-bucket \ 174 | AZURE_STORAGE_ACCOUNT_NAME=devstoreaccount1 \ 175 | AZURE_STORAGE_ACCESS_KEY=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw== \ 176 | AZURE_ENDPOINT=http://127.0.0.1:10000/devstoreaccount1 \ 177 | AZURE_ALLOW_HTTP=true \ 178 | cargo test --features azure 179 | ``` 180 | 181 | ### GCP 182 | 183 | To test the GCS integration, we use [Fake GCS Server](https://github.com/fsouza/fake-gcs-server) 184 | 185 | Startup the fake server: 186 | 187 | ```shell 188 | docker run -p 4443:4443 tustvold/fake-gcs-server -scheme http 189 | ``` 190 | 191 | Configure the account: 192 | ```shell 193 | curl -v -X POST --data-binary '{"name":"test-bucket"}' -H "Content-Type: application/json" "http://localhost:4443/storage/v1/b" 194 | echo '{"gcs_base_url": "http://localhost:4443", "disable_oauth": true, "client_email": "", "private_key": ""}' > /tmp/gcs.json 195 | ``` 196 | 197 | Now run the tests: 198 | ```shell 199 | TEST_INTEGRATION=1 \ 200 | OBJECT_STORE_BUCKET=test-bucket \ 201 | GOOGLE_SERVICE_ACCOUNT=/tmp/gcs.json \ 202 | cargo test -p object_store --features=gcp 203 | ``` 204 | 205 | # Deprecation Guidelines 206 | 207 | Minor releases may deprecate, but not remove APIs. Deprecating APIs allows 208 | downstream Rust programs to still compile, but generate compiler warnings. This 209 | gives downstream crates time to migrate prior to API removal. 210 | 211 | To deprecate an API: 212 | 213 | - Mark the API as deprecated using `#[deprecated]` and specify the exact object_store version in which it was deprecated 214 | - Concisely describe the preferred API to help the user transition 215 | 216 | The deprecated version is the next version which will be released (please 217 | consult the list above). To mark the API as deprecated, use the 218 | `#[deprecated(since = "...", note = "...")]` attribute. 219 | 220 | For example 221 | 222 | ```rust 223 | #[deprecated(since = "0.11.0", note = "Use `date_part` instead")] 224 | ``` 225 | 226 | In general, deprecated APIs will remain in the codebase for at least two major releases after 227 | they were deprecated (typically between 6 - 9 months later). For example, an API 228 | deprecated in `0.10.0` can be removed in `0.13.0` (or later). Deprecated APIs 229 | may be removed earlier or later than these guidelines at the discretion of the 230 | maintainers. 231 | -------------------------------------------------------------------------------- /dev/release/README.md: -------------------------------------------------------------------------------- 1 | 19 | 20 | # Release Process 21 | 22 | ## Overview 23 | 24 | This file documents the release process for the `object_store` crate. 25 | 26 | We release a new version of `object_store` according to the schedule listed in 27 | the [main README.md] 28 | 29 | [main README.md]: https://github.com/apache/arrow-rs-object-store?tab=readme-ov-file#release-schedule 30 | 31 | As we are still in an early phase, we use the 0.x version scheme. If any code has 32 | been merged to main that has a breaking API change, as defined in [Rust RFC 1105] 33 | the minor version number is incremented changed (e.g. `0.3.0` to `0.4.0`). 34 | Otherwise the patch version is incremented (e.g. `0.3.0` to `0.3.1`). 35 | 36 | [Rust RFC 1105]: https://github.com/rust-lang/rfcs/blob/master/text/1105-api-evolution.md 37 | 38 | # Release Mechanics 39 | 40 | ## Process Overview 41 | 42 | As part of the Apache governance model, official releases consist of 43 | signed source tarballs approved by the PMC. 44 | 45 | We then use the code in the approved source tarball to release to 46 | crates.io, the Rust ecosystem's package manager. 47 | 48 | We create a `CHANGELOG.md` so our users know what has been changed between releases. 49 | 50 | The CHANGELOG is created automatically using 51 | [update_change_log.sh](https://github.com/apache/arrow-rs-object-store/blob/main/dev/release/update_change_log.sh) 52 | 53 | This script creates a changelog using github issues and the 54 | labels associated with them. 55 | 56 | ## Prepare CHANGELOG and version: 57 | 58 | Now prepare a PR to update `CHANGELOG.md` and versions on `main` to reflect the planned release. 59 | 60 | See [#437] for an example. 61 | 62 | [#437]: https://github.com/apache/arrow-rs-object-store/pull/437 63 | 64 | ```bash 65 | git checkout main 66 | git pull 67 | git checkout -b 68 | 69 | # Update versions. Make sure to run it before the next step since we do not want CHANGELOG-old.md affected. 70 | sed -i '' -e 's/0.11.0/0.11.1/g' `find . -name 'Cargo.toml' -or -name '*.md' | grep -v CHANGELOG` 71 | git commit -a -m 'Update version' 72 | 73 | # ensure your github token is available 74 | export CHANGELOG_GITHUB_TOKEN= 75 | 76 | # manually edit ./dev/release/update_change_log.sh to reflect the release version 77 | # create the changelog 78 | ./dev/release/update_change_log.sh 79 | 80 | # review change log / and edit associated issues and labels if needed, rerun update_change_log.sh 81 | 82 | # Commit changes 83 | git commit -a -m 'Create changelog' 84 | 85 | # push changes to fork and create a PR to main 86 | git push 87 | ``` 88 | 89 | Note that when reviewing the change log, rather than editing the 90 | `CHANGELOG.md`, it is preferred to update the issues and their labels 91 | (e.g. add `invalid` label to exclude them from release notes) 92 | 93 | Merge this PR to `main` prior to the next step. 94 | 95 | ## Prepare release candidate tarball 96 | 97 | After you have merged the updates to the `CHANGELOG` and version, 98 | create a release candidate using the following steps. Note you need to 99 | be a committer to run these scripts as they upload to the apache `svn` 100 | distribution servers. 101 | 102 | ### Create git tag for the release: 103 | 104 | While the official release artifact is a signed tarball, we also tag the commit it was created for convenience and code archaeology. 105 | 106 | Use a string such as `v0.4.0` as the ``. 107 | 108 | Create and push the tag thusly: 109 | 110 | ```shell 111 | git fetch apache 112 | git tag apache/main 113 | # push tag to apache 114 | git push apache 115 | ``` 116 | 117 | ### Pick an Release Candidate (RC) number 118 | 119 | Pick numbers in sequential order, with `1` for `rc1`, `2` for `rc2`, etc. 120 | 121 | ### Create, sign, and upload tarball 122 | 123 | Run `create-tarball.sh` with the `` tag and `` and you found in previous steps. 124 | 125 | ```shell 126 | ./dev/release/create-tarball.sh 0.11.1 1 127 | ``` 128 | 129 | The `create-tarball.sh` script 130 | 131 | 1. creates and uploads a release candidate tarball to the [arrow 132 | dev](https://dist.apache.org/repos/dist/dev/arrow) location on the 133 | apache distribution svn server 134 | 135 | 2. provide you an email template to 136 | send to dev@arrow.apache.org for release voting. 137 | 138 | ### Vote on Release Candidate tarball 139 | 140 | Send an email, based on the output from the script to dev@arrow.apache.org. The email should look like 141 | 142 | ``` 143 | Draft email for dev@arrow.apache.org mailing list 144 | 145 | --------------------------------------------------------- 146 | To: dev@arrow.apache.org 147 | Subject: [VOTE][RUST] Release Apache Arrow Rust Object Store 0.11.1 RC1 148 | 149 | Hi, 150 | 151 | I would like to propose a release of Apache Arrow Rust Object 152 | Store Implementation, version 0.11.1. 153 | 154 | This release candidate is based on commit: b945b15de9085f5961a478d4f35b0c5c3427e248 [1] 155 | 156 | The proposed release tarball and signatures are hosted at [2]. 157 | 158 | The changelog is located at [3]. 159 | 160 | Please download, verify checksums and signatures, run the unit tests, 161 | and vote on the release. There is a script [4] that automates some of 162 | the verification. 163 | 164 | The vote will be open for at least 72 hours. 165 | 166 | [ ] +1 Release this as Apache Arrow Rust Object Store 167 | [ ] +0 168 | [ ] -1 Do not release this as Apache Arrow Rust Object Store because... 169 | 170 | [1]: https://github.com/apache/arrow-rs-object-store/tree/b945b15de9085f5961a478d4f35b0c5c3427e248 171 | [2]: https://dist.apache.org/repos/dist/dev/arrow/apache-arrow-object-store-rs-0.11.1-rc1/ 172 | [3]: https://github.com/apache/arrow-rs-object-store/blob/b945b15de9085f5961a478d4f35b0c5c3427e248/CHANGELOG.md 173 | [4]: https://github.com/apache/arrow-rs-object-store/blob/main/dev/release/verify-release-candidate.sh 174 | ``` 175 | 176 | For the release to become "official" it needs at least three Apache Arrow PMC members to vote +1 on it. 177 | 178 | ## Verifying release candidates 179 | 180 | The `dev/release/verify-release-candidate.sh` script can assist in the verification process. Run it like: 181 | 182 | ``` 183 | ./dev/release/verify-release-candidate.sh 0.11.0 1 184 | ``` 185 | 186 | #### If the release is not approved 187 | 188 | If the release is not approved, fix whatever the problem is and try again with the next RC number 189 | 190 | ### If the release is approved, 191 | 192 | Move tarball to the release location in SVN, e.g. https://dist.apache.org/repos/dist/release/arrow/apache-arrow-object-store-rs-4.1.0-rc4/, using the `release-tarball.sh` script: 193 | 194 | ```shell 195 | ./dev/release/release-tarball.sh 4.1.0 2 196 | ``` 197 | 198 | Congratulations! The release is now official! 199 | 200 | ### Publish on Crates.io 201 | 202 | Only approved releases of the tarball should be published to 203 | crates.io, in order to conform to Apache Software Foundation 204 | governance standards. 205 | 206 | An Arrow committer can publish this crate after an official project release has 207 | been made to crates.io using the following instructions. 208 | 209 | Follow [these 210 | instructions](https://doc.rust-lang.org/cargo/reference/publishing.html) to 211 | create an account and login to crates.io before asking to be added as an owner 212 | of the [object store crate](https://crates.io/crates/object_store). 213 | 214 | Download and unpack the official release tarball 215 | 216 | Verify that the Cargo.toml in the tarball contains the correct version 217 | (e.g. `version = "0.11.0"`) and then publish the crate with the 218 | following commands 219 | 220 | ```shell 221 | cargo publish 222 | ``` 223 | -------------------------------------------------------------------------------- /src/attributes.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | use std::borrow::Cow; 19 | use std::collections::HashMap; 20 | use std::ops::Deref; 21 | 22 | /// Additional object attribute types 23 | #[non_exhaustive] 24 | #[derive(Debug, Hash, Eq, PartialEq, Clone)] 25 | pub enum Attribute { 26 | /// Specifies how the object should be handled by a browser 27 | /// 28 | /// See [Content-Disposition](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Disposition) 29 | ContentDisposition, 30 | /// Specifies the encodings applied to the object 31 | /// 32 | /// See [Content-Encoding](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Encoding) 33 | ContentEncoding, 34 | /// Specifies the language of the object 35 | /// 36 | /// See [Content-Language](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Language) 37 | ContentLanguage, 38 | /// Specifies the MIME type of the object 39 | /// 40 | /// This takes precedence over any [ClientOptions](crate::ClientOptions) configuration 41 | /// 42 | /// See [Content-Type](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Type) 43 | ContentType, 44 | /// Overrides cache control policy of the object 45 | /// 46 | /// See [Cache-Control](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Cache-Control) 47 | CacheControl, 48 | /// Specifies the storage class of the object. 49 | /// 50 | /// See [AWS](https://aws.amazon.com/s3/storage-classes/), 51 | /// [GCP](https://cloud.google.com/storage/docs/storage-classes), and 52 | /// [Azure](https://learn.microsoft.com/en-us/rest/api/storageservices/set-blob-tier). 53 | /// `StorageClass` is used as the name for this attribute because 2 of the 3 storage providers 54 | /// use that name 55 | StorageClass, 56 | /// Specifies a user-defined metadata field for the object 57 | /// 58 | /// The String is a user-defined key 59 | Metadata(Cow<'static, str>), 60 | } 61 | 62 | /// The value of an [`Attribute`] 63 | /// 64 | /// Provides efficient conversion from both static and owned strings 65 | /// 66 | /// ``` 67 | /// # use object_store::AttributeValue; 68 | /// // Can use static strings without needing an allocation 69 | /// let value = AttributeValue::from("bar"); 70 | /// // Can also store owned strings 71 | /// let value = AttributeValue::from("foo".to_string()); 72 | /// ``` 73 | #[derive(Debug, Hash, Eq, PartialEq, Clone)] 74 | pub struct AttributeValue(Cow<'static, str>); 75 | 76 | impl AsRef for AttributeValue { 77 | fn as_ref(&self) -> &str { 78 | &self.0 79 | } 80 | } 81 | 82 | impl From<&'static str> for AttributeValue { 83 | fn from(value: &'static str) -> Self { 84 | Self(Cow::Borrowed(value)) 85 | } 86 | } 87 | 88 | impl From for AttributeValue { 89 | fn from(value: String) -> Self { 90 | Self(Cow::Owned(value)) 91 | } 92 | } 93 | 94 | impl Deref for AttributeValue { 95 | type Target = str; 96 | 97 | fn deref(&self) -> &Self::Target { 98 | self.0.as_ref() 99 | } 100 | } 101 | 102 | /// Additional attributes of an object 103 | /// 104 | /// Attributes can be specified in [PutOptions](crate::PutOptions) and retrieved 105 | /// from APIs returning [GetResult](crate::GetResult). 106 | /// 107 | /// Unlike [`ObjectMeta`](crate::ObjectMeta), [`Attributes`] are not returned by 108 | /// listing APIs 109 | #[derive(Debug, Default, Eq, PartialEq, Clone)] 110 | pub struct Attributes(HashMap); 111 | 112 | impl Attributes { 113 | /// Create a new empty [`Attributes`] 114 | pub fn new() -> Self { 115 | Self::default() 116 | } 117 | 118 | /// Create a new [`Attributes`] with space for `capacity` [`Attribute`] 119 | pub fn with_capacity(capacity: usize) -> Self { 120 | Self(HashMap::with_capacity(capacity)) 121 | } 122 | 123 | /// Insert a new [`Attribute`], [`AttributeValue`] pair 124 | /// 125 | /// Returns the previous value for `key` if any 126 | pub fn insert(&mut self, key: Attribute, value: AttributeValue) -> Option { 127 | self.0.insert(key, value) 128 | } 129 | 130 | /// Returns the [`AttributeValue`] for `key` if any 131 | pub fn get(&self, key: &Attribute) -> Option<&AttributeValue> { 132 | self.0.get(key) 133 | } 134 | 135 | /// Removes the [`AttributeValue`] for `key` if any 136 | pub fn remove(&mut self, key: &Attribute) -> Option { 137 | self.0.remove(key) 138 | } 139 | 140 | /// Returns an [`AttributesIter`] over this 141 | pub fn iter(&self) -> AttributesIter<'_> { 142 | self.into_iter() 143 | } 144 | 145 | /// Returns the number of [`Attribute`] in this collection 146 | #[inline] 147 | pub fn len(&self) -> usize { 148 | self.0.len() 149 | } 150 | 151 | /// Returns true if this contains no [`Attribute`] 152 | #[inline] 153 | pub fn is_empty(&self) -> bool { 154 | self.0.is_empty() 155 | } 156 | } 157 | 158 | impl FromIterator<(K, V)> for Attributes 159 | where 160 | K: Into, 161 | V: Into, 162 | { 163 | fn from_iter>(iter: T) -> Self { 164 | Self( 165 | iter.into_iter() 166 | .map(|(k, v)| (k.into(), v.into())) 167 | .collect(), 168 | ) 169 | } 170 | } 171 | 172 | impl<'a> IntoIterator for &'a Attributes { 173 | type Item = (&'a Attribute, &'a AttributeValue); 174 | type IntoIter = AttributesIter<'a>; 175 | 176 | fn into_iter(self) -> Self::IntoIter { 177 | AttributesIter(self.0.iter()) 178 | } 179 | } 180 | 181 | /// Iterator over [`Attributes`] 182 | #[derive(Debug)] 183 | pub struct AttributesIter<'a>(std::collections::hash_map::Iter<'a, Attribute, AttributeValue>); 184 | 185 | impl<'a> Iterator for AttributesIter<'a> { 186 | type Item = (&'a Attribute, &'a AttributeValue); 187 | 188 | fn next(&mut self) -> Option { 189 | self.0.next() 190 | } 191 | 192 | fn size_hint(&self) -> (usize, Option) { 193 | self.0.size_hint() 194 | } 195 | } 196 | 197 | #[cfg(test)] 198 | mod tests { 199 | use super::*; 200 | 201 | #[test] 202 | fn test_attributes_basic() { 203 | let mut attributes = Attributes::from_iter([ 204 | (Attribute::ContentDisposition, "inline"), 205 | (Attribute::ContentEncoding, "gzip"), 206 | (Attribute::ContentLanguage, "en-US"), 207 | (Attribute::ContentType, "test"), 208 | (Attribute::CacheControl, "control"), 209 | (Attribute::Metadata("key1".into()), "value1"), 210 | ]); 211 | 212 | assert!(!attributes.is_empty()); 213 | assert_eq!(attributes.len(), 6); 214 | 215 | assert_eq!( 216 | attributes.get(&Attribute::ContentType), 217 | Some(&"test".into()) 218 | ); 219 | 220 | let metav = "control".into(); 221 | assert_eq!(attributes.get(&Attribute::CacheControl), Some(&metav)); 222 | assert_eq!( 223 | attributes.insert(Attribute::CacheControl, "v1".into()), 224 | Some(metav) 225 | ); 226 | assert_eq!(attributes.len(), 6); 227 | 228 | assert_eq!( 229 | attributes.remove(&Attribute::CacheControl).unwrap(), 230 | "v1".into() 231 | ); 232 | assert_eq!(attributes.len(), 5); 233 | 234 | let metav: AttributeValue = "v2".into(); 235 | attributes.insert(Attribute::CacheControl, metav.clone()); 236 | assert_eq!(attributes.get(&Attribute::CacheControl), Some(&metav)); 237 | assert_eq!(attributes.len(), 6); 238 | 239 | assert_eq!( 240 | attributes.get(&Attribute::ContentDisposition), 241 | Some(&"inline".into()) 242 | ); 243 | assert_eq!( 244 | attributes.get(&Attribute::ContentEncoding), 245 | Some(&"gzip".into()) 246 | ); 247 | assert_eq!( 248 | attributes.get(&Attribute::ContentLanguage), 249 | Some(&"en-US".into()) 250 | ); 251 | assert_eq!( 252 | attributes.get(&Attribute::Metadata("key1".into())), 253 | Some(&"value1".into()) 254 | ); 255 | } 256 | } 257 | -------------------------------------------------------------------------------- /src/aws/precondition.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | use crate::config::Parse; 19 | 20 | use itertools::Itertools; 21 | 22 | /// Configure how to provide [`CopyMode::Create`] for [`AmazonS3`]. 23 | /// 24 | /// [`CopyMode::Create`]: crate::CopyMode::Create 25 | /// [`AmazonS3`]: super::AmazonS3 26 | #[derive(Debug, Clone, PartialEq, Eq)] 27 | #[non_exhaustive] 28 | pub enum S3CopyIfNotExists { 29 | /// Some S3-compatible stores, such as Cloudflare R2, support copy if not exists 30 | /// semantics through custom headers. 31 | /// 32 | /// If set, [`CopyMode::Create`] will perform a normal copy operation 33 | /// with the provided header pair, and expect the store to fail with `412 Precondition Failed` 34 | /// if the destination file already exists. 35 | /// 36 | /// Encoded as `header::` ignoring whitespace 37 | /// 38 | /// For example `header: cf-copy-destination-if-none-match: *`, would set 39 | /// the header `cf-copy-destination-if-none-match` to `*` 40 | /// 41 | /// [`CopyMode::Create`]: crate::CopyMode::Create 42 | Header(String, String), 43 | /// The same as [`S3CopyIfNotExists::Header`] but allows custom status code checking, for object stores that return values 44 | /// other than 412. 45 | /// 46 | /// Encoded as `header-with-status:::` ignoring whitespace 47 | HeaderWithStatus(String, String, reqwest::StatusCode), 48 | /// Native Amazon S3 supports copy if not exists through a multipart upload 49 | /// where the upload copies an existing object and is completed only if the 50 | /// new object does not already exist. 51 | /// 52 | /// WARNING: When using this mode, `copy_if_not_exists` does not copy tags 53 | /// or attributes from the source object. 54 | /// 55 | /// WARNING: When using this mode, `copy_if_not_exists` makes only a best 56 | /// effort attempt to clean up the multipart upload if the copy operation 57 | /// fails. Consider using a lifecycle rule to automatically clean up 58 | /// abandoned multipart uploads. See [the module 59 | /// docs](super#multipart-uploads) for details. 60 | /// 61 | /// Encoded as `multipart` ignoring whitespace. 62 | Multipart, 63 | } 64 | 65 | impl std::fmt::Display for S3CopyIfNotExists { 66 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 67 | match self { 68 | Self::Header(k, v) => write!(f, "header: {k}: {v}"), 69 | Self::HeaderWithStatus(k, v, code) => { 70 | write!(f, "header-with-status: {k}: {v}: {}", code.as_u16()) 71 | } 72 | Self::Multipart => f.write_str("multipart"), 73 | } 74 | } 75 | } 76 | 77 | impl S3CopyIfNotExists { 78 | fn from_str(s: &str) -> Option { 79 | if s.trim() == "multipart" { 80 | return Some(Self::Multipart); 81 | }; 82 | 83 | let (variant, value) = s.split_once(':')?; 84 | match variant.trim() { 85 | "header" => { 86 | let (k, v) = value.split_once(':')?; 87 | Some(Self::Header(k.trim().to_string(), v.trim().to_string())) 88 | } 89 | "header-with-status" => { 90 | let (k, v, status) = value.split(':').collect_tuple()?; 91 | 92 | let code = status.trim().parse().ok()?; 93 | 94 | Some(Self::HeaderWithStatus( 95 | k.trim().to_string(), 96 | v.trim().to_string(), 97 | code, 98 | )) 99 | } 100 | _ => None, 101 | } 102 | } 103 | } 104 | 105 | impl Parse for S3CopyIfNotExists { 106 | fn parse(v: &str) -> crate::Result { 107 | Self::from_str(v).ok_or_else(|| crate::Error::Generic { 108 | store: "Config", 109 | source: format!("Failed to parse \"{v}\" as S3CopyIfNotExists").into(), 110 | }) 111 | } 112 | } 113 | 114 | /// Configure how to provide conditional put support for [`AmazonS3`]. 115 | /// 116 | /// [`AmazonS3`]: super::AmazonS3 117 | #[derive(Debug, Clone, Eq, PartialEq, Default)] 118 | #[allow(missing_copy_implementations)] 119 | #[non_exhaustive] 120 | pub enum S3ConditionalPut { 121 | /// Some S3-compatible stores, such as Cloudflare R2 and minio support conditional 122 | /// put using the standard [HTTP precondition] headers If-Match and If-None-Match 123 | /// 124 | /// Encoded as `etag` ignoring whitespace 125 | /// 126 | /// [HTTP precondition]: https://datatracker.ietf.org/doc/html/rfc9110#name-preconditions 127 | #[default] 128 | ETagMatch, 129 | 130 | /// Disable `conditional put` 131 | Disabled, 132 | } 133 | 134 | impl std::fmt::Display for S3ConditionalPut { 135 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 136 | match self { 137 | Self::ETagMatch => write!(f, "etag"), 138 | Self::Disabled => write!(f, "disabled"), 139 | } 140 | } 141 | } 142 | 143 | impl S3ConditionalPut { 144 | fn from_str(s: &str) -> Option { 145 | match s.trim() { 146 | "etag" => Some(Self::ETagMatch), 147 | "disabled" => Some(Self::Disabled), 148 | _ => None, 149 | } 150 | } 151 | } 152 | 153 | impl Parse for S3ConditionalPut { 154 | fn parse(v: &str) -> crate::Result { 155 | Self::from_str(v).ok_or_else(|| crate::Error::Generic { 156 | store: "Config", 157 | source: format!("Failed to parse \"{v}\" as S3PutConditional").into(), 158 | }) 159 | } 160 | } 161 | 162 | #[cfg(test)] 163 | mod tests { 164 | use super::S3CopyIfNotExists; 165 | 166 | #[test] 167 | fn parse_s3_copy_if_not_exists_header() { 168 | let input = "header: cf-copy-destination-if-none-match: *"; 169 | let expected = Some(S3CopyIfNotExists::Header( 170 | "cf-copy-destination-if-none-match".to_owned(), 171 | "*".to_owned(), 172 | )); 173 | 174 | assert_eq!(expected, S3CopyIfNotExists::from_str(input)); 175 | } 176 | 177 | #[test] 178 | fn parse_s3_copy_if_not_exists_header_with_status() { 179 | let input = "header-with-status:key:value:403"; 180 | let expected = Some(S3CopyIfNotExists::HeaderWithStatus( 181 | "key".to_owned(), 182 | "value".to_owned(), 183 | reqwest::StatusCode::FORBIDDEN, 184 | )); 185 | 186 | assert_eq!(expected, S3CopyIfNotExists::from_str(input)); 187 | } 188 | 189 | #[test] 190 | fn parse_s3_copy_if_not_exists_header_whitespace_invariant() { 191 | let expected = Some(S3CopyIfNotExists::Header( 192 | "cf-copy-destination-if-none-match".to_owned(), 193 | "*".to_owned(), 194 | )); 195 | 196 | const INPUTS: &[&str] = &[ 197 | "header:cf-copy-destination-if-none-match:*", 198 | "header: cf-copy-destination-if-none-match:*", 199 | "header: cf-copy-destination-if-none-match: *", 200 | "header : cf-copy-destination-if-none-match: *", 201 | "header : cf-copy-destination-if-none-match : *", 202 | "header : cf-copy-destination-if-none-match : * ", 203 | ]; 204 | 205 | for input in INPUTS { 206 | assert_eq!(expected, S3CopyIfNotExists::from_str(input)); 207 | } 208 | } 209 | 210 | #[test] 211 | fn parse_s3_copy_if_not_exists_header_with_status_whitespace_invariant() { 212 | let expected = Some(S3CopyIfNotExists::HeaderWithStatus( 213 | "key".to_owned(), 214 | "value".to_owned(), 215 | reqwest::StatusCode::FORBIDDEN, 216 | )); 217 | 218 | const INPUTS: &[&str] = &[ 219 | "header-with-status:key:value:403", 220 | "header-with-status: key:value:403", 221 | "header-with-status: key: value:403", 222 | "header-with-status: key: value: 403", 223 | "header-with-status : key: value: 403", 224 | "header-with-status : key : value: 403", 225 | "header-with-status : key : value : 403", 226 | "header-with-status : key : value : 403 ", 227 | ]; 228 | 229 | for input in INPUTS { 230 | assert_eq!(expected, S3CopyIfNotExists::from_str(input)); 231 | } 232 | } 233 | } 234 | -------------------------------------------------------------------------------- /src/chunked.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | //! A [`ChunkedStore`] that can be used to test streaming behaviour 19 | 20 | use std::fmt::{Debug, Display, Formatter}; 21 | use std::ops::Range; 22 | use std::sync::Arc; 23 | 24 | use async_trait::async_trait; 25 | use bytes::{BufMut, Bytes, BytesMut}; 26 | use futures::StreamExt; 27 | use futures::stream::BoxStream; 28 | 29 | use crate::path::Path; 30 | use crate::{ 31 | CopyOptions, GetOptions, GetResult, GetResultPayload, ListResult, MultipartUpload, ObjectMeta, 32 | ObjectStore, PutMultipartOptions, PutOptions, PutResult, RenameOptions, 33 | }; 34 | use crate::{PutPayload, Result}; 35 | 36 | /// Wraps a [`ObjectStore`] and makes its get response return chunks 37 | /// in a controllable manner. 38 | /// 39 | /// A `ChunkedStore` makes the memory consumption and performance of 40 | /// the wrapped [`ObjectStore`] worse. It is intended for use within 41 | /// tests, to control the chunks in the produced output streams. For 42 | /// example, it is used to verify the delimiting logic in 43 | /// newline_delimited_stream. 44 | #[derive(Debug)] 45 | pub struct ChunkedStore { 46 | inner: Arc, 47 | chunk_size: usize, // chunks are in memory, so we use usize not u64 48 | } 49 | 50 | impl ChunkedStore { 51 | /// Creates a new [`ChunkedStore`] with the specified chunk_size 52 | pub fn new(inner: Arc, chunk_size: usize) -> Self { 53 | Self { inner, chunk_size } 54 | } 55 | } 56 | 57 | impl Display for ChunkedStore { 58 | fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { 59 | write!(f, "ChunkedStore({})", self.inner) 60 | } 61 | } 62 | 63 | #[async_trait] 64 | #[deny(clippy::missing_trait_methods)] 65 | impl ObjectStore for ChunkedStore { 66 | async fn put_opts( 67 | &self, 68 | location: &Path, 69 | payload: PutPayload, 70 | opts: PutOptions, 71 | ) -> Result { 72 | self.inner.put_opts(location, payload, opts).await 73 | } 74 | 75 | async fn put_multipart_opts( 76 | &self, 77 | location: &Path, 78 | opts: PutMultipartOptions, 79 | ) -> Result> { 80 | self.inner.put_multipart_opts(location, opts).await 81 | } 82 | 83 | async fn get_opts(&self, location: &Path, options: GetOptions) -> Result { 84 | let r = self.inner.get_opts(location, options).await?; 85 | let stream = match r.payload { 86 | #[cfg(all(feature = "fs", not(target_arch = "wasm32")))] 87 | GetResultPayload::File(file, path) => { 88 | crate::local::chunked_stream(file, path, r.range.clone(), self.chunk_size) 89 | } 90 | GetResultPayload::Stream(stream) => { 91 | let buffer = BytesMut::new(); 92 | futures::stream::unfold( 93 | (stream, buffer, false, self.chunk_size), 94 | |(mut stream, mut buffer, mut exhausted, chunk_size)| async move { 95 | // Keep accumulating bytes until we reach capacity as long as 96 | // the stream can provide them: 97 | if exhausted { 98 | return None; 99 | } 100 | while buffer.len() < chunk_size { 101 | match stream.next().await { 102 | None => { 103 | exhausted = true; 104 | let slice = buffer.split_off(0).freeze(); 105 | return Some(( 106 | Ok(slice), 107 | (stream, buffer, exhausted, chunk_size), 108 | )); 109 | } 110 | Some(Ok(bytes)) => { 111 | buffer.put(bytes); 112 | } 113 | Some(Err(e)) => { 114 | return Some(( 115 | Err(crate::Error::Generic { 116 | store: "ChunkedStore", 117 | source: Box::new(e), 118 | }), 119 | (stream, buffer, exhausted, chunk_size), 120 | )); 121 | } 122 | }; 123 | } 124 | // Return the chunked values as the next value in the stream 125 | let slice = buffer.split_to(chunk_size).freeze(); 126 | Some((Ok(slice), (stream, buffer, exhausted, chunk_size))) 127 | }, 128 | ) 129 | .boxed() 130 | } 131 | }; 132 | Ok(GetResult { 133 | payload: GetResultPayload::Stream(stream), 134 | ..r 135 | }) 136 | } 137 | 138 | async fn get_ranges(&self, location: &Path, ranges: &[Range]) -> Result> { 139 | self.inner.get_ranges(location, ranges).await 140 | } 141 | 142 | fn delete_stream( 143 | &self, 144 | locations: BoxStream<'static, Result>, 145 | ) -> BoxStream<'static, Result> { 146 | self.inner.delete_stream(locations) 147 | } 148 | 149 | fn list(&self, prefix: Option<&Path>) -> BoxStream<'static, Result> { 150 | self.inner.list(prefix) 151 | } 152 | 153 | fn list_with_offset( 154 | &self, 155 | prefix: Option<&Path>, 156 | offset: &Path, 157 | ) -> BoxStream<'static, Result> { 158 | self.inner.list_with_offset(prefix, offset) 159 | } 160 | 161 | async fn list_with_delimiter(&self, prefix: Option<&Path>) -> Result { 162 | self.inner.list_with_delimiter(prefix).await 163 | } 164 | 165 | async fn copy_opts(&self, from: &Path, to: &Path, options: CopyOptions) -> Result<()> { 166 | self.inner.copy_opts(from, to, options).await 167 | } 168 | 169 | async fn rename_opts(&self, from: &Path, to: &Path, options: RenameOptions) -> Result<()> { 170 | self.inner.rename_opts(from, to, options).await 171 | } 172 | } 173 | 174 | #[cfg(test)] 175 | mod tests { 176 | use futures::StreamExt; 177 | 178 | use crate::ObjectStoreExt; 179 | #[cfg(feature = "fs")] 180 | use crate::integration::*; 181 | #[cfg(feature = "fs")] 182 | use crate::local::LocalFileSystem; 183 | use crate::memory::InMemory; 184 | use crate::path::Path; 185 | 186 | use super::*; 187 | 188 | #[tokio::test] 189 | async fn test_chunked_basic() { 190 | let location = Path::parse("test").unwrap(); 191 | let store: Arc = Arc::new(InMemory::new()); 192 | store.put(&location, vec![0; 1001].into()).await.unwrap(); 193 | 194 | for chunk_size in [10, 20, 31] { 195 | let store = ChunkedStore::new(Arc::clone(&store), chunk_size); 196 | let mut s = match store.get(&location).await.unwrap().payload { 197 | GetResultPayload::Stream(s) => s, 198 | _ => unreachable!(), 199 | }; 200 | 201 | let mut remaining = 1001; 202 | while let Some(next) = s.next().await { 203 | let size = next.unwrap().len() as u64; 204 | let expected = remaining.min(chunk_size as u64); 205 | assert_eq!(size, expected); 206 | remaining -= expected; 207 | } 208 | assert_eq!(remaining, 0); 209 | } 210 | } 211 | 212 | #[cfg(feature = "fs")] 213 | #[tokio::test] 214 | async fn test_chunked() { 215 | let temporary = tempfile::tempdir().unwrap(); 216 | let integrations: &[Arc] = &[ 217 | Arc::new(InMemory::new()), 218 | Arc::new(LocalFileSystem::new_with_prefix(temporary.path()).unwrap()), 219 | ]; 220 | 221 | for integration in integrations { 222 | let integration = ChunkedStore::new(Arc::clone(integration), 100); 223 | 224 | put_get_delete_list(&integration).await; 225 | get_opts(&integration).await; 226 | list_uses_directories_correctly(&integration).await; 227 | list_with_delimiter(&integration).await; 228 | rename_and_copy(&integration).await; 229 | copy_if_not_exists(&integration).await; 230 | stream_get(&integration).await; 231 | } 232 | } 233 | } 234 | -------------------------------------------------------------------------------- /src/delimited.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | //! Utility for streaming newline delimited files from object storage 19 | 20 | use std::collections::VecDeque; 21 | 22 | use bytes::Bytes; 23 | use futures::{Stream, StreamExt}; 24 | 25 | use super::Result; 26 | 27 | #[derive(Debug, thiserror::Error)] 28 | enum Error { 29 | #[error("encountered unterminated string")] 30 | UnterminatedString, 31 | 32 | #[error("encountered trailing escape character")] 33 | TrailingEscape, 34 | } 35 | 36 | impl From for super::Error { 37 | fn from(err: Error) -> Self { 38 | Self::Generic { 39 | store: "LineDelimiter", 40 | source: Box::new(err), 41 | } 42 | } 43 | } 44 | 45 | /// The ASCII encoding of `"` 46 | const QUOTE: u8 = b'"'; 47 | 48 | /// The ASCII encoding of `\n` 49 | const NEWLINE: u8 = b'\n'; 50 | 51 | /// The ASCII encoding of `\` 52 | const ESCAPE: u8 = b'\\'; 53 | 54 | /// [`LineDelimiter`] is provided with a stream of [`Bytes`] and returns an iterator 55 | /// of [`Bytes`] containing a whole number of new line delimited records 56 | #[derive(Debug, Default)] 57 | struct LineDelimiter { 58 | /// Complete chunks of [`Bytes`] 59 | complete: VecDeque, 60 | /// Remainder bytes that form the next record 61 | remainder: Vec, 62 | /// True if the last character was the escape character 63 | is_escape: bool, 64 | /// True if currently processing a quoted string 65 | is_quote: bool, 66 | } 67 | 68 | impl LineDelimiter { 69 | /// Creates a new [`LineDelimiter`] with the provided delimiter 70 | fn new() -> Self { 71 | Self::default() 72 | } 73 | 74 | /// Adds the next set of [`Bytes`] 75 | fn push(&mut self, val: impl Into) { 76 | let val: Bytes = val.into(); 77 | 78 | let is_escape = &mut self.is_escape; 79 | let is_quote = &mut self.is_quote; 80 | let mut record_ends = val.iter().enumerate().filter_map(|(idx, v)| { 81 | if *is_escape { 82 | *is_escape = false; 83 | None 84 | } else if *v == ESCAPE { 85 | *is_escape = true; 86 | None 87 | } else if *v == QUOTE { 88 | *is_quote = !*is_quote; 89 | None 90 | } else if *is_quote { 91 | None 92 | } else { 93 | (*v == NEWLINE).then_some(idx + 1) 94 | } 95 | }); 96 | 97 | let start_offset = match self.remainder.is_empty() { 98 | true => 0, 99 | false => match record_ends.next() { 100 | Some(idx) => { 101 | self.remainder.extend_from_slice(&val[0..idx]); 102 | self.complete 103 | .push_back(Bytes::from(std::mem::take(&mut self.remainder))); 104 | idx 105 | } 106 | None => { 107 | self.remainder.extend_from_slice(&val); 108 | return; 109 | } 110 | }, 111 | }; 112 | let end_offset = record_ends.next_back().unwrap_or(start_offset); 113 | if start_offset != end_offset { 114 | self.complete.push_back(val.slice(start_offset..end_offset)); 115 | } 116 | 117 | if end_offset != val.len() { 118 | self.remainder.extend_from_slice(&val[end_offset..]) 119 | } 120 | } 121 | 122 | /// Marks the end of the stream, delimiting any remaining bytes 123 | /// 124 | /// Returns `true` if there is no remaining data to be read 125 | fn finish(&mut self) -> Result { 126 | if !self.remainder.is_empty() { 127 | if self.is_quote { 128 | Err(Error::UnterminatedString)?; 129 | } 130 | if self.is_escape { 131 | Err(Error::TrailingEscape)?; 132 | } 133 | 134 | self.complete 135 | .push_back(Bytes::from(std::mem::take(&mut self.remainder))) 136 | } 137 | Ok(self.complete.is_empty()) 138 | } 139 | } 140 | 141 | impl Iterator for LineDelimiter { 142 | type Item = Bytes; 143 | 144 | fn next(&mut self) -> Option { 145 | self.complete.pop_front() 146 | } 147 | } 148 | 149 | /// Given a [`Stream`] of [`Bytes`] returns a [`Stream`] where each 150 | /// yielded [`Bytes`] contains a whole number of new line delimited records 151 | /// accounting for `\` style escapes and `"` quotes 152 | pub fn newline_delimited_stream(s: S) -> impl Stream> 153 | where 154 | S: Stream> + Unpin, 155 | { 156 | let delimiter = LineDelimiter::new(); 157 | 158 | futures::stream::unfold( 159 | (s, delimiter, false), 160 | |(mut s, mut delimiter, mut exhausted)| async move { 161 | loop { 162 | if let Some(next) = delimiter.next() { 163 | return Some((Ok(next), (s, delimiter, exhausted))); 164 | } else if exhausted { 165 | return None; 166 | } 167 | 168 | match s.next().await { 169 | Some(Ok(bytes)) => delimiter.push(bytes), 170 | Some(Err(e)) => return Some((Err(e), (s, delimiter, exhausted))), 171 | None => { 172 | exhausted = true; 173 | match delimiter.finish() { 174 | Ok(true) => return None, 175 | Ok(false) => continue, 176 | Err(e) => return Some((Err(e), (s, delimiter, exhausted))), 177 | } 178 | } 179 | } 180 | } 181 | }, 182 | ) 183 | } 184 | 185 | #[cfg(test)] 186 | mod tests { 187 | use futures::stream::{BoxStream, TryStreamExt}; 188 | 189 | use super::*; 190 | 191 | #[test] 192 | fn test_delimiter() { 193 | let mut delimiter = LineDelimiter::new(); 194 | delimiter.push("hello\nworld"); 195 | delimiter.push("\n\n"); 196 | 197 | assert_eq!(delimiter.next().unwrap(), Bytes::from("hello\n")); 198 | assert_eq!(delimiter.next().unwrap(), Bytes::from("world\n")); 199 | assert_eq!(delimiter.next().unwrap(), Bytes::from("\n")); 200 | assert!(delimiter.next().is_none()); 201 | } 202 | 203 | #[test] 204 | fn test_delimiter_escaped() { 205 | let mut delimiter = LineDelimiter::new(); 206 | delimiter.push(""); 207 | delimiter.push("fo\\\n\"foo"); 208 | delimiter.push("bo\n\"bar\n"); 209 | delimiter.push("\"he"); 210 | delimiter.push("llo\"\n"); 211 | assert_eq!( 212 | delimiter.next().unwrap(), 213 | Bytes::from("fo\\\n\"foobo\n\"bar\n") 214 | ); 215 | assert_eq!(delimiter.next().unwrap(), Bytes::from("\"hello\"\n")); 216 | assert!(delimiter.next().is_none()); 217 | 218 | // Verify can push further data 219 | delimiter.push("\"foo\nbar\",\"fiz\\\"inner\\\"\"\nhello"); 220 | assert!(!delimiter.finish().unwrap()); 221 | 222 | assert_eq!( 223 | delimiter.next().unwrap(), 224 | Bytes::from("\"foo\nbar\",\"fiz\\\"inner\\\"\"\n") 225 | ); 226 | assert_eq!(delimiter.next().unwrap(), Bytes::from("hello")); 227 | assert!(delimiter.finish().unwrap()); 228 | assert!(delimiter.next().is_none()); 229 | } 230 | 231 | #[tokio::test] 232 | async fn test_delimiter_stream() { 233 | let input = vec!["hello\nworld\nbin", "go\ncup", "cakes"]; 234 | let input_stream = futures::stream::iter(input.into_iter().map(|s| Ok(Bytes::from(s)))); 235 | let stream = newline_delimited_stream(input_stream); 236 | 237 | let results: Vec<_> = stream.try_collect().await.unwrap(); 238 | assert_eq!( 239 | results, 240 | vec![ 241 | Bytes::from("hello\nworld\n"), 242 | Bytes::from("bingo\n"), 243 | Bytes::from("cupcakes") 244 | ] 245 | ) 246 | } 247 | #[tokio::test] 248 | async fn test_delimiter_unfold_stream() { 249 | let input_stream: BoxStream<'static, Result> = futures::stream::unfold( 250 | VecDeque::from(["hello\nworld\nbin", "go\ncup", "cakes"]), 251 | |mut input| async move { 252 | if !input.is_empty() { 253 | Some((Ok(Bytes::from(input.pop_front().unwrap())), input)) 254 | } else { 255 | None 256 | } 257 | }, 258 | ) 259 | .boxed(); 260 | let stream = newline_delimited_stream(input_stream); 261 | 262 | let results: Vec<_> = stream.try_collect().await.unwrap(); 263 | assert_eq!( 264 | results, 265 | vec![ 266 | Bytes::from("hello\nworld\n"), 267 | Bytes::from("bingo\n"), 268 | Bytes::from("cupcakes") 269 | ] 270 | ) 271 | } 272 | } 273 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | --- 19 | name: CI 20 | 21 | concurrency: 22 | group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }} 23 | cancel-in-progress: true 24 | 25 | # trigger for all PRs that touch certain files and changes to main 26 | on: 27 | push: 28 | branches: 29 | - main 30 | pull_request: 31 | 32 | jobs: 33 | clippy: 34 | name: Clippy 35 | runs-on: ubuntu-latest 36 | container: 37 | image: amd64/rust 38 | steps: 39 | - uses: actions/checkout@v6 40 | - name: Setup Clippy 41 | run: rustup component add clippy 42 | # Run different tests for the library on its own as well as 43 | # all targets to ensure that it still works in the absence of 44 | # features that might be enabled by dev-dependencies of other 45 | # targets. 46 | - name: Run clippy with default features 47 | run: cargo clippy -- -D warnings 48 | - name: Run clippy without default features 49 | run: cargo clippy --no-default-features -- -D warnings 50 | - name: Run clippy with fs features 51 | run: cargo clippy --no-default-features --features fs -- -D warnings 52 | - name: Run clippy with aws feature 53 | run: cargo clippy --features aws -- -D warnings 54 | - name: Run clippy with gcp feature 55 | run: cargo clippy --features gcp -- -D warnings 56 | - name: Run clippy with azure feature 57 | run: cargo clippy --features azure -- -D warnings 58 | - name: Run clippy with http feature 59 | run: cargo clippy --features http -- -D warnings 60 | - name: Run clippy with integration feature 61 | run: cargo clippy --no-default-features --features integration -- -D warnings 62 | - name: Run clippy with all features 63 | run: cargo clippy --all-features -- -D warnings 64 | - name: Run clippy with all features and all targets 65 | run: cargo clippy --all-features --all-targets -- -D warnings 66 | 67 | # test doc links still work 68 | docs: 69 | name: Rustdocs 70 | runs-on: ubuntu-latest 71 | env: 72 | RUSTDOCFLAGS: "-Dwarnings" 73 | steps: 74 | - uses: actions/checkout@v6 75 | - name: Run cargo doc 76 | run: cargo doc --document-private-items --no-deps --all-features 77 | 78 | # test the crate 79 | # This runs outside a container to workaround lack of support for passing arguments 80 | # to service containers - https://github.com/orgs/community/discussions/26688 81 | linux-test: 82 | name: Emulator Tests 83 | runs-on: ubuntu-latest 84 | env: 85 | # Disable full debug symbol generation to speed up CI build and keep memory down 86 | # "1" means line tables only, which is useful for panic tracebacks. 87 | RUSTFLAGS: "-C debuginfo=1" 88 | RUST_BACKTRACE: "1" 89 | # Run integration tests 90 | TEST_INTEGRATION: 1 91 | EC2_METADATA_ENDPOINT: http://localhost:1338 92 | AZURE_CONTAINER_NAME: test-bucket 93 | AZURE_STORAGE_USE_EMULATOR: "1" 94 | AZURITE_BLOB_STORAGE_URL: "http://localhost:10000" 95 | AZURITE_QUEUE_STORAGE_URL: "http://localhost:10001" 96 | AWS_BUCKET: test-bucket 97 | AWS_DEFAULT_REGION: "us-east-1" 98 | AWS_ACCESS_KEY_ID: test 99 | AWS_SECRET_ACCESS_KEY: test 100 | AWS_ENDPOINT: http://localhost:4566 101 | AWS_ALLOW_HTTP: true 102 | AWS_COPY_IF_NOT_EXISTS: multipart 103 | AWS_CONDITIONAL_PUT: etag 104 | AWS_SERVER_SIDE_ENCRYPTION: aws:kms 105 | HTTP_URL: "http://localhost:8080" 106 | GOOGLE_BUCKET: test-bucket 107 | GOOGLE_SERVICE_ACCOUNT: "/tmp/gcs.json" 108 | 109 | steps: 110 | - uses: actions/checkout@v6 111 | 112 | # We are forced to use docker commands instead of service containers as we need to override the entrypoints 113 | # which is currently not supported - https://github.com/actions/runner/discussions/1872 114 | - name: Configure Fake GCS Server (GCP emulation) 115 | # Custom image - see fsouza/fake-gcs-server#1164 116 | run: | 117 | echo "GCS_CONTAINER=$(docker run -d -p 4443:4443 tustvold/fake-gcs-server -scheme http -backend memory -public-host localhost:4443)" >> $GITHUB_ENV 118 | # Give the container a moment to start up prior to configuring it 119 | sleep 1 120 | curl -v -X POST --data-binary '{"name":"test-bucket"}' -H "Content-Type: application/json" "http://localhost:4443/storage/v1/b" 121 | echo '{"gcs_base_url": "http://localhost:4443", "disable_oauth": true, "client_email": "", "private_key": "", "private_key_id": ""}' > "$GOOGLE_SERVICE_ACCOUNT" 122 | 123 | - name: Setup WebDav 124 | run: docker run -d -p 8080:80 rclone/rclone serve webdav /data --addr :80 125 | 126 | - name: Setup LocalStack (AWS emulation) 127 | run: | 128 | echo "LOCALSTACK_CONTAINER=$(docker run -d -p 4566:4566 localstack/localstack:4.11.1)" >> $GITHUB_ENV 129 | echo "EC2_METADATA_CONTAINER=$(docker run -d -p 1338:1338 amazon/amazon-ec2-metadata-mock:v1.9.2 --imdsv2)" >> $GITHUB_ENV 130 | aws --endpoint-url=http://localhost:4566 s3 mb s3://test-bucket 131 | aws --endpoint-url=http://localhost:4566 s3 mb s3://test-bucket-for-spawn 132 | aws --endpoint-url=http://localhost:4566 s3 mb s3://test-bucket-for-checksum 133 | aws --endpoint-url=http://localhost:4566 s3 mb s3://test-bucket-for-copy-if-not-exists 134 | aws --endpoint-url=http://localhost:4566 s3api create-bucket --bucket test-object-lock --object-lock-enabled-for-bucket 135 | 136 | KMS_KEY=$(aws --endpoint-url=http://localhost:4566 kms create-key --description "test key") 137 | echo "AWS_SSE_KMS_KEY_ID=$(echo $KMS_KEY | jq -r .KeyMetadata.KeyId)" >> $GITHUB_ENV 138 | 139 | - name: Configure Azurite (Azure emulation) 140 | # the magical connection string is from 141 | # https://docs.microsoft.com/en-us/azure/storage/common/storage-use-azurite?tabs=visual-studio#http-connection-strings 142 | run: | 143 | echo "AZURITE_CONTAINER=$(docker run -d -p 10000:10000 -p 10001:10001 -p 10002:10002 mcr.microsoft.com/azure-storage/azurite)" >> $GITHUB_ENV 144 | az storage container create -n test-bucket --connection-string 'DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=http://localhost:10000/devstoreaccount1;QueueEndpoint=http://localhost:10001/devstoreaccount1;' 145 | 146 | - name: Setup Rust toolchain 147 | run: | 148 | rustup toolchain install stable 149 | rustup default stable 150 | 151 | - name: Run object_store tests 152 | run: cargo test --features=aws,azure,gcp,http 153 | 154 | # Don't rerun doc tests (some of them rely on features other than aws) 155 | - name: Run object_store tests (AWS native conditional put) 156 | run: cargo test --lib --tests --features=aws 157 | env: 158 | AWS_CONDITIONAL_PUT: etag 159 | AWS_COPY_IF_NOT_EXISTS: multipart 160 | 161 | - name: GCS Output 162 | if: ${{ !cancelled() }} 163 | run: docker logs $GCS_CONTAINER 164 | 165 | - name: LocalStack Output 166 | if: ${{ !cancelled() }} 167 | run: docker logs $LOCALSTACK_CONTAINER 168 | 169 | - name: EC2 Metadata Output 170 | if: ${{ !cancelled() }} 171 | run: docker logs $EC2_METADATA_CONTAINER 172 | 173 | - name: Azurite Output 174 | if: ${{ !cancelled() }} 175 | run: docker logs $AZURITE_CONTAINER 176 | 177 | # test the object_store crate builds against wasm32 in stable rust 178 | wasm32-build: 179 | name: Build wasm32 180 | runs-on: ubuntu-latest 181 | container: 182 | image: amd64/rust 183 | steps: 184 | - uses: actions/checkout@v6 185 | with: 186 | submodules: true 187 | - name: Install clang (needed for ring) 188 | run: apt-get update && apt-get install -y clang 189 | - name: Install wasm32-unknown-unknown 190 | run: rustup target add wasm32-unknown-unknown 191 | - name: Build wasm32-unknown-unknown 192 | run: cargo build --target wasm32-unknown-unknown 193 | - name: Install wasm32-wasip1 194 | run: rustup target add wasm32-wasip1 195 | - name: Build wasm32-wasip1 196 | run: cargo build --all-features --target wasm32-wasip1 197 | - name: Install wasm-pack 198 | run: curl https://rustwasm.github.io/wasm-pack/installer/init.sh -sSf | sh 199 | - uses: actions/setup-node@v6 200 | with: 201 | node-version: 20 202 | - name: Run wasm32-unknown-unknown tests (via Node) 203 | run: wasm-pack test --node --features http --no-default-features 204 | 205 | windows: 206 | name: cargo test LocalFileSystem (win64) 207 | runs-on: windows-latest 208 | steps: 209 | - uses: actions/checkout@v6 210 | with: 211 | submodules: true 212 | - name: Run LocalFileSystem tests 213 | run: cargo test local::tests 214 | -------------------------------------------------------------------------------- /src/prefix.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | //! An object store wrapper handling a constant path prefix 19 | use bytes::Bytes; 20 | use futures::{StreamExt, TryStreamExt, stream::BoxStream}; 21 | use std::ops::Range; 22 | 23 | use crate::path::Path; 24 | use crate::{ 25 | CopyOptions, GetOptions, GetResult, ListResult, MultipartUpload, ObjectMeta, ObjectStore, 26 | PutMultipartOptions, PutOptions, PutPayload, PutResult, RenameOptions, Result, 27 | }; 28 | 29 | /// Store wrapper that applies a constant prefix to all paths handled by the store. 30 | #[derive(Debug, Clone)] 31 | pub struct PrefixStore { 32 | prefix: Path, 33 | inner: T, 34 | } 35 | 36 | impl std::fmt::Display for PrefixStore { 37 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 38 | write!(f, "PrefixObjectStore({})", self.prefix.as_ref()) 39 | } 40 | } 41 | 42 | impl PrefixStore { 43 | /// Create a new instance of [`PrefixStore`] 44 | pub fn new(store: T, prefix: impl Into) -> Self { 45 | Self { 46 | prefix: prefix.into(), 47 | inner: store, 48 | } 49 | } 50 | 51 | /// Create the full path from a path relative to prefix 52 | fn full_path(&self, location: &Path) -> Path { 53 | full_path(&self.prefix, location) 54 | } 55 | 56 | /// Strip the constant prefix from a given path 57 | fn strip_prefix(&self, path: Path) -> Path { 58 | strip_prefix(&self.prefix, path) 59 | } 60 | 61 | /// Strip the constant prefix from a given ObjectMeta 62 | fn strip_meta(&self, meta: ObjectMeta) -> ObjectMeta { 63 | strip_meta(&self.prefix, meta) 64 | } 65 | } 66 | 67 | // Note: This is a relative hack to move these functions to pure functions so they don't rely 68 | // on the `self` lifetime. 69 | 70 | /// Create the full path from a path relative to prefix 71 | fn full_path(prefix: &Path, path: &Path) -> Path { 72 | prefix.parts().chain(path.parts()).collect() 73 | } 74 | 75 | /// Strip the constant prefix from a given path 76 | fn strip_prefix(prefix: &Path, path: Path) -> Path { 77 | // Note cannot use match because of borrow checker 78 | if let Some(suffix) = path.prefix_match(prefix) { 79 | return suffix.collect(); 80 | } 81 | path 82 | } 83 | 84 | /// Strip the constant prefix from a given ObjectMeta 85 | fn strip_meta(prefix: &Path, meta: ObjectMeta) -> ObjectMeta { 86 | ObjectMeta { 87 | last_modified: meta.last_modified, 88 | size: meta.size, 89 | location: strip_prefix(prefix, meta.location), 90 | e_tag: meta.e_tag, 91 | version: None, 92 | } 93 | } 94 | 95 | #[async_trait::async_trait] 96 | #[deny(clippy::missing_trait_methods)] 97 | impl ObjectStore for PrefixStore { 98 | async fn put_opts( 99 | &self, 100 | location: &Path, 101 | payload: PutPayload, 102 | opts: PutOptions, 103 | ) -> Result { 104 | let full_path = self.full_path(location); 105 | self.inner.put_opts(&full_path, payload, opts).await 106 | } 107 | 108 | async fn put_multipart_opts( 109 | &self, 110 | location: &Path, 111 | opts: PutMultipartOptions, 112 | ) -> Result> { 113 | let full_path = self.full_path(location); 114 | self.inner.put_multipart_opts(&full_path, opts).await 115 | } 116 | 117 | async fn get_opts(&self, location: &Path, options: GetOptions) -> Result { 118 | let full_path = self.full_path(location); 119 | self.inner.get_opts(&full_path, options).await 120 | } 121 | 122 | async fn get_ranges(&self, location: &Path, ranges: &[Range]) -> Result> { 123 | let full_path = self.full_path(location); 124 | self.inner.get_ranges(&full_path, ranges).await 125 | } 126 | 127 | fn delete_stream( 128 | &self, 129 | locations: BoxStream<'static, Result>, 130 | ) -> BoxStream<'static, Result> { 131 | let prefix = self.prefix.clone(); 132 | let locations = locations 133 | .map(move |location| location.map(|loc| full_path(&prefix, &loc))) 134 | .boxed(); 135 | let prefix = self.prefix.clone(); 136 | self.inner 137 | .delete_stream(locations) 138 | .map(move |location| location.map(|loc| strip_prefix(&prefix, loc))) 139 | .boxed() 140 | } 141 | 142 | fn list(&self, prefix: Option<&Path>) -> BoxStream<'static, Result> { 143 | let prefix = self.full_path(prefix.unwrap_or(&Path::default())); 144 | let s = self.inner.list(Some(&prefix)); 145 | let slf_prefix = self.prefix.clone(); 146 | s.map_ok(move |meta| strip_meta(&slf_prefix, meta)).boxed() 147 | } 148 | 149 | fn list_with_offset( 150 | &self, 151 | prefix: Option<&Path>, 152 | offset: &Path, 153 | ) -> BoxStream<'static, Result> { 154 | let offset = self.full_path(offset); 155 | let prefix = self.full_path(prefix.unwrap_or(&Path::default())); 156 | let s = self.inner.list_with_offset(Some(&prefix), &offset); 157 | let slf_prefix = self.prefix.clone(); 158 | s.map_ok(move |meta| strip_meta(&slf_prefix, meta)).boxed() 159 | } 160 | 161 | async fn list_with_delimiter(&self, prefix: Option<&Path>) -> Result { 162 | let prefix = self.full_path(prefix.unwrap_or(&Path::default())); 163 | self.inner 164 | .list_with_delimiter(Some(&prefix)) 165 | .await 166 | .map(|lst| ListResult { 167 | common_prefixes: lst 168 | .common_prefixes 169 | .into_iter() 170 | .map(|p| self.strip_prefix(p)) 171 | .collect(), 172 | objects: lst 173 | .objects 174 | .into_iter() 175 | .map(|meta| self.strip_meta(meta)) 176 | .collect(), 177 | }) 178 | } 179 | 180 | async fn copy_opts(&self, from: &Path, to: &Path, options: CopyOptions) -> Result<()> { 181 | let full_from = self.full_path(from); 182 | let full_to = self.full_path(to); 183 | self.inner.copy_opts(&full_from, &full_to, options).await 184 | } 185 | 186 | async fn rename_opts(&self, from: &Path, to: &Path, options: RenameOptions) -> Result<()> { 187 | let full_from = self.full_path(from); 188 | let full_to = self.full_path(to); 189 | self.inner.rename_opts(&full_from, &full_to, options).await 190 | } 191 | } 192 | 193 | #[cfg(not(target_arch = "wasm32"))] 194 | #[cfg(test)] 195 | mod tests { 196 | use std::slice; 197 | 198 | use super::*; 199 | use crate::local::LocalFileSystem; 200 | use crate::{ObjectStoreExt, integration::*}; 201 | 202 | use tempfile::TempDir; 203 | 204 | #[tokio::test] 205 | async fn prefix_test() { 206 | let root = TempDir::new().unwrap(); 207 | let inner = LocalFileSystem::new_with_prefix(root.path()).unwrap(); 208 | let integration = PrefixStore::new(inner, "prefix"); 209 | 210 | put_get_delete_list(&integration).await; 211 | get_opts(&integration).await; 212 | list_uses_directories_correctly(&integration).await; 213 | list_with_delimiter(&integration).await; 214 | rename_and_copy(&integration).await; 215 | copy_if_not_exists(&integration).await; 216 | stream_get(&integration).await; 217 | } 218 | 219 | #[tokio::test] 220 | async fn prefix_test_applies_prefix() { 221 | let tmpdir = TempDir::new().unwrap(); 222 | let local = LocalFileSystem::new_with_prefix(tmpdir.path()).unwrap(); 223 | 224 | let location = Path::from("prefix/test_file.json"); 225 | let data = Bytes::from("arbitrary data"); 226 | 227 | local.put(&location, data.clone().into()).await.unwrap(); 228 | 229 | let prefix = PrefixStore::new(local, "prefix"); 230 | let location_prefix = Path::from("test_file.json"); 231 | 232 | let content_list = flatten_list_stream(&prefix, None).await.unwrap(); 233 | assert_eq!(content_list, slice::from_ref(&location_prefix)); 234 | 235 | let root = Path::from("/"); 236 | let content_list = flatten_list_stream(&prefix, Some(&root)).await.unwrap(); 237 | assert_eq!(content_list, slice::from_ref(&location_prefix)); 238 | 239 | let read_data = prefix 240 | .get(&location_prefix) 241 | .await 242 | .unwrap() 243 | .bytes() 244 | .await 245 | .unwrap(); 246 | assert_eq!(&*read_data, data); 247 | 248 | let target_prefix = Path::from("/test_written.json"); 249 | prefix 250 | .put(&target_prefix, data.clone().into()) 251 | .await 252 | .unwrap(); 253 | 254 | prefix.delete(&location_prefix).await.unwrap(); 255 | 256 | let local = LocalFileSystem::new_with_prefix(tmpdir.path()).unwrap(); 257 | 258 | let err = local.get(&location).await.unwrap_err(); 259 | assert!(matches!(err, crate::Error::NotFound { .. }), "{}", err); 260 | 261 | let location = Path::from("prefix/test_written.json"); 262 | let read_data = local.get(&location).await.unwrap().bytes().await.unwrap(); 263 | assert_eq!(&*read_data, data) 264 | } 265 | } 266 | -------------------------------------------------------------------------------- /src/payload.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | use bytes::Bytes; 19 | use std::sync::Arc; 20 | 21 | /// A cheaply cloneable, ordered collection of [`Bytes`] 22 | #[derive(Debug, Clone)] 23 | pub struct PutPayload(Arc<[Bytes]>); 24 | 25 | impl Default for PutPayload { 26 | fn default() -> Self { 27 | Self(Arc::new([])) 28 | } 29 | } 30 | 31 | impl PutPayload { 32 | /// Create a new empty [`PutPayload`] 33 | pub fn new() -> Self { 34 | Self::default() 35 | } 36 | 37 | /// Creates a [`PutPayload`] from a static slice 38 | pub fn from_static(s: &'static [u8]) -> Self { 39 | s.into() 40 | } 41 | 42 | /// Creates a [`PutPayload`] from a [`Bytes`] 43 | pub fn from_bytes(s: Bytes) -> Self { 44 | s.into() 45 | } 46 | 47 | /// Returns the total length of the [`Bytes`] in this payload 48 | pub fn content_length(&self) -> usize { 49 | self.0.iter().map(|b| b.len()).sum() 50 | } 51 | 52 | /// Returns an iterator over the [`Bytes`] in this payload 53 | pub fn iter(&self) -> PutPayloadIter<'_> { 54 | PutPayloadIter(self.0.iter()) 55 | } 56 | } 57 | 58 | impl AsRef<[Bytes]> for PutPayload { 59 | fn as_ref(&self) -> &[Bytes] { 60 | self.0.as_ref() 61 | } 62 | } 63 | 64 | impl<'a> IntoIterator for &'a PutPayload { 65 | type Item = &'a Bytes; 66 | type IntoIter = PutPayloadIter<'a>; 67 | 68 | fn into_iter(self) -> Self::IntoIter { 69 | self.iter() 70 | } 71 | } 72 | 73 | impl IntoIterator for PutPayload { 74 | type Item = Bytes; 75 | type IntoIter = PutPayloadIntoIter; 76 | 77 | fn into_iter(self) -> Self::IntoIter { 78 | PutPayloadIntoIter { 79 | payload: self, 80 | idx: 0, 81 | } 82 | } 83 | } 84 | 85 | /// An iterator over [`PutPayload`] 86 | #[derive(Debug)] 87 | pub struct PutPayloadIter<'a>(std::slice::Iter<'a, Bytes>); 88 | 89 | impl<'a> Iterator for PutPayloadIter<'a> { 90 | type Item = &'a Bytes; 91 | 92 | fn next(&mut self) -> Option { 93 | self.0.next() 94 | } 95 | 96 | fn size_hint(&self) -> (usize, Option) { 97 | self.0.size_hint() 98 | } 99 | } 100 | 101 | /// An owning iterator of [`PutPayload`] 102 | #[derive(Debug)] 103 | pub struct PutPayloadIntoIter { 104 | payload: PutPayload, 105 | idx: usize, 106 | } 107 | 108 | impl Iterator for PutPayloadIntoIter { 109 | type Item = Bytes; 110 | 111 | fn next(&mut self) -> Option { 112 | let p = self.payload.0.get(self.idx)?.clone(); 113 | self.idx += 1; 114 | Some(p) 115 | } 116 | 117 | fn size_hint(&self) -> (usize, Option) { 118 | let l = self.payload.0.len() - self.idx; 119 | (l, Some(l)) 120 | } 121 | } 122 | 123 | impl From for PutPayload { 124 | fn from(value: Bytes) -> Self { 125 | Self(Arc::new([value])) 126 | } 127 | } 128 | 129 | impl From> for PutPayload { 130 | fn from(value: Vec) -> Self { 131 | Self(Arc::new([value.into()])) 132 | } 133 | } 134 | 135 | impl From<&'static str> for PutPayload { 136 | fn from(value: &'static str) -> Self { 137 | Bytes::from(value).into() 138 | } 139 | } 140 | 141 | impl From<&'static [u8]> for PutPayload { 142 | fn from(value: &'static [u8]) -> Self { 143 | Bytes::from(value).into() 144 | } 145 | } 146 | 147 | impl From for PutPayload { 148 | fn from(value: String) -> Self { 149 | Bytes::from(value).into() 150 | } 151 | } 152 | 153 | impl FromIterator for PutPayload { 154 | fn from_iter>(iter: T) -> Self { 155 | Bytes::from_iter(iter).into() 156 | } 157 | } 158 | 159 | impl FromIterator for PutPayload { 160 | fn from_iter>(iter: T) -> Self { 161 | Self(iter.into_iter().collect()) 162 | } 163 | } 164 | 165 | impl From for Bytes { 166 | fn from(value: PutPayload) -> Self { 167 | match value.0.len() { 168 | 0 => Self::new(), 169 | 1 => value.0[0].clone(), 170 | _ => { 171 | let mut buf = Vec::with_capacity(value.content_length()); 172 | value.iter().for_each(|x| buf.extend_from_slice(x)); 173 | buf.into() 174 | } 175 | } 176 | } 177 | } 178 | 179 | /// A builder for [`PutPayload`] that avoids reallocating memory 180 | /// 181 | /// Data is allocated in fixed blocks, which are flushed to [`Bytes`] once full. 182 | /// Unlike [`Vec`] this avoids needing to repeatedly reallocate blocks of memory, 183 | /// which typically involves copying all the previously written data to a new 184 | /// contiguous memory region. 185 | #[derive(Debug)] 186 | pub struct PutPayloadMut { 187 | len: usize, 188 | completed: Vec, 189 | in_progress: Vec, 190 | block_size: usize, 191 | } 192 | 193 | impl Default for PutPayloadMut { 194 | fn default() -> Self { 195 | Self { 196 | len: 0, 197 | completed: vec![], 198 | in_progress: vec![], 199 | 200 | block_size: 8 * 1024, 201 | } 202 | } 203 | } 204 | 205 | impl PutPayloadMut { 206 | /// Create a new [`PutPayloadMut`] 207 | pub fn new() -> Self { 208 | Self::default() 209 | } 210 | 211 | /// Configures the minimum allocation size 212 | /// 213 | /// Defaults to 8KB 214 | pub fn with_block_size(self, block_size: usize) -> Self { 215 | Self { block_size, ..self } 216 | } 217 | 218 | /// Write bytes into this [`PutPayloadMut`] 219 | /// 220 | /// If there is an in-progress block, data will be first written to it, flushing 221 | /// it to [`Bytes`] once full. If data remains to be written, a new block of memory 222 | /// of at least the configured block size will be allocated, to hold the remaining data. 223 | pub fn extend_from_slice(&mut self, slice: &[u8]) { 224 | let remaining = self.in_progress.capacity() - self.in_progress.len(); 225 | let to_copy = remaining.min(slice.len()); 226 | 227 | self.in_progress.extend_from_slice(&slice[..to_copy]); 228 | if self.in_progress.capacity() == self.in_progress.len() { 229 | let new_cap = self.block_size.max(slice.len() - to_copy); 230 | let completed = std::mem::replace(&mut self.in_progress, Vec::with_capacity(new_cap)); 231 | if !completed.is_empty() { 232 | self.completed.push(completed.into()) 233 | } 234 | self.in_progress.extend_from_slice(&slice[to_copy..]) 235 | } 236 | self.len += slice.len(); 237 | } 238 | 239 | /// Append a [`Bytes`] to this [`PutPayloadMut`] without copying 240 | /// 241 | /// This will close any currently buffered block populated by [`Self::extend_from_slice`], 242 | /// and append `bytes` to this payload without copying. 243 | pub fn push(&mut self, bytes: Bytes) { 244 | if !self.in_progress.is_empty() { 245 | let completed = std::mem::take(&mut self.in_progress); 246 | self.completed.push(completed.into()) 247 | } 248 | self.len += bytes.len(); 249 | self.completed.push(bytes); 250 | } 251 | 252 | /// Returns `true` if this [`PutPayloadMut`] contains no bytes 253 | #[inline] 254 | pub fn is_empty(&self) -> bool { 255 | self.len == 0 256 | } 257 | 258 | /// Returns the total length of the [`Bytes`] in this payload 259 | #[inline] 260 | pub fn content_length(&self) -> usize { 261 | self.len 262 | } 263 | 264 | /// Convert into [`PutPayload`] 265 | pub fn freeze(mut self) -> PutPayload { 266 | if !self.in_progress.is_empty() { 267 | let completed = std::mem::take(&mut self.in_progress).into(); 268 | self.completed.push(completed); 269 | } 270 | PutPayload(self.completed.into()) 271 | } 272 | } 273 | 274 | impl From for PutPayload { 275 | fn from(value: PutPayloadMut) -> Self { 276 | value.freeze() 277 | } 278 | } 279 | 280 | #[cfg(test)] 281 | mod test { 282 | use crate::PutPayloadMut; 283 | 284 | #[test] 285 | fn test_put_payload() { 286 | let mut chunk = PutPayloadMut::new().with_block_size(23); 287 | chunk.extend_from_slice(&[1; 16]); 288 | chunk.extend_from_slice(&[2; 32]); 289 | chunk.extend_from_slice(&[2; 5]); 290 | chunk.extend_from_slice(&[2; 21]); 291 | chunk.extend_from_slice(&[2; 40]); 292 | chunk.extend_from_slice(&[0; 0]); 293 | chunk.push("foobar".into()); 294 | 295 | let payload = chunk.freeze(); 296 | assert_eq!(payload.content_length(), 120); 297 | 298 | let chunks = payload.as_ref(); 299 | assert_eq!(chunks.len(), 6); 300 | 301 | assert_eq!(chunks[0].len(), 23); 302 | assert_eq!(chunks[1].len(), 25); // 32 - (23 - 16) 303 | assert_eq!(chunks[2].len(), 23); 304 | assert_eq!(chunks[3].len(), 23); 305 | assert_eq!(chunks[4].len(), 20); 306 | assert_eq!(chunks[5].len(), 6); 307 | } 308 | 309 | #[test] 310 | fn test_content_length() { 311 | let mut chunk = PutPayloadMut::new(); 312 | chunk.push(vec![0; 23].into()); 313 | assert_eq!(chunk.content_length(), 23); 314 | chunk.extend_from_slice(&[0; 4]); 315 | assert_eq!(chunk.content_length(), 27); 316 | chunk.push(vec![0; 121].into()); 317 | assert_eq!(chunk.content_length(), 148); 318 | let payload = chunk.freeze(); 319 | assert_eq!(payload.content_length(), 148); 320 | } 321 | } 322 | --------------------------------------------------------------------------------