├── .github ├── renovate.json └── workflows │ ├── binaries.yml │ ├── release-plz.yml │ └── test_suite.yml ├── .gitignore ├── .gitmodules ├── .pre-commit-config.yaml ├── CHANGELOG.md ├── Cargo.toml ├── Development.md ├── Dockerfile ├── Installation.md ├── LICENSE ├── README.md ├── Usage.md ├── ci ├── minio_start.sh └── minio_stop.sh ├── src ├── args.rs └── main.rs └── tests ├── files_on_localfs.rs └── files_on_s3.rs /.github/renovate.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": [ 3 | "config:base" 4 | ], 5 | "packageRules": [ 6 | { 7 | "matchPackagePatterns": [ 8 | "*" 9 | ], 10 | "automerge": true 11 | } 12 | ], 13 | "dependencyDashboard": true 14 | } 15 | -------------------------------------------------------------------------------- /.github/workflows/binaries.yml: -------------------------------------------------------------------------------- 1 | name: "Publish binaries" 2 | 3 | permissions: 4 | contents: write 5 | 6 | on: 7 | release: 8 | types: [created] 9 | branches: [main] 10 | 11 | env: 12 | CARGO_INCREMENTAL: 0 13 | CARGO_NET_GIT_FETCH_WITH_CLI: true 14 | CARGO_NET_RETRY: 10 15 | CARGO_TERM_COLOR: always 16 | RUST_BACKTRACE: 1 17 | RUSTFLAGS: -D warnings 18 | RUSTUP_MAX_RETRIES: 10 19 | 20 | defaults: 21 | run: 22 | shell: bash 23 | 24 | jobs: 25 | upload-assets: 26 | name: ${{ matrix.target }} 27 | runs-on: ${{ matrix.os }} 28 | strategy: 29 | matrix: 30 | include: 31 | - target: aarch64-unknown-linux-gnu 32 | os: ubuntu-22.04 33 | - target: aarch64-unknown-linux-musl 34 | os: ubuntu-22.04 35 | - target: aarch64-apple-darwin 36 | os: macos-12 37 | - target: aarch64-pc-windows-msvc 38 | os: windows-2022 39 | - target: x86_64-unknown-linux-gnu 40 | os: ubuntu-22.04 41 | - target: x86_64-unknown-linux-musl 42 | os: ubuntu-22.04 43 | - target: x86_64-apple-darwin 44 | os: macos-12 45 | - target: x86_64-pc-windows-msvc 46 | os: windows-2022 47 | - target: x86_64-unknown-freebsd 48 | os: ubuntu-22.04 49 | timeout-minutes: 60 50 | steps: 51 | - name: Checkout repository 52 | uses: actions/checkout@v4 53 | - name: Install Rust toolchain 54 | uses: dtolnay/rust-toolchain@stable 55 | - uses: taiki-e/setup-cross-toolchain-action@v1 56 | with: 57 | target: ${{ matrix.target }} 58 | if: startsWith(matrix.os, 'ubuntu') && !contains(matrix.target, '-musl') 59 | - uses: taiki-e/install-action@cross 60 | if: contains(matrix.target, '-musl') 61 | - run: echo "RUSTFLAGS=${RUSTFLAGS} -C target-feature=+crt-static" >> "${GITHUB_ENV}" 62 | if: endsWith(matrix.target, 'windows-msvc') 63 | - uses: taiki-e/upload-rust-binary-action@v1 64 | with: 65 | bin: qv 66 | target: ${{ matrix.target }} 67 | tar: all 68 | zip: windows 69 | token: ${{ secrets.GITHUB_TOKEN }} -------------------------------------------------------------------------------- /.github/workflows/release-plz.yml: -------------------------------------------------------------------------------- 1 | name: Release-plz 2 | 3 | permissions: 4 | pull-requests: write 5 | contents: write 6 | 7 | on: 8 | push: 9 | branches: 10 | - main 11 | workflow_dispatch: 12 | 13 | jobs: 14 | release-plz: 15 | name: Release-plz 16 | runs-on: ubuntu-latest 17 | steps: 18 | - name: Checkout repository 19 | uses: actions/checkout@v4 20 | with: 21 | fetch-depth: 0 22 | - name: Install Rust toolchain 23 | uses: dtolnay/rust-toolchain@stable 24 | - name: Run release-plz 25 | uses: MarcoIeni/release-plz-action@v0.5 26 | env: 27 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 28 | CARGO_REGISTRY_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }} -------------------------------------------------------------------------------- /.github/workflows/test_suite.yml: -------------------------------------------------------------------------------- 1 | name: "Test Suite" 2 | on: 3 | push: 4 | branches-ignore: 5 | - 'dependabot/**' #avoid duplicates: only run the PR, not the push 6 | - 'renovate/**' #avoid duplicates: only run the PR, not the push 7 | - 'release-plz**' #avoid duplicates: only run the PR, not the push 8 | - 'gh-pages' #github pages do not trigger all tests 9 | tags-ignore: 10 | - 'v*' #avoid rerun existing commit on release 11 | pull_request: 12 | branches: 13 | - 'main' 14 | workflow_dispatch: 15 | 16 | concurrency: 17 | group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} 18 | cancel-in-progress: true 19 | 20 | jobs: 21 | test: 22 | if: (github.event_name != 'pull_request' && ! github.event.pull_request.head.repo.fork) || (github.event_name == 'pull_request' && (github.event.pull_request.head.repo.fork || startsWith(github.head_ref, 'dependabot/') || startsWith(github.head_ref, 'renovate/') || startsWith(github.head_ref, 'release-plz'))) 23 | name: cargo test 24 | runs-on: ubuntu-latest 25 | steps: 26 | - uses: actions/checkout@v4 27 | with: 28 | submodules: recursive 29 | 30 | - uses: actions-rust-lang/setup-rust-toolchain@v1 31 | with: 32 | toolchain: nightly 33 | components: llvm-tools-preview 34 | 35 | - name: Install grcov 36 | uses: taiki-e/install-action@v2 37 | with: 38 | tool: grcov 39 | 40 | - name: Build 41 | run: cargo build --all-targets 42 | env: 43 | RUSTFLAGS: "-Cinstrument-coverage" 44 | 45 | - name: Start minio 46 | run: ./ci/minio_start.sh 47 | 48 | - name: Test 49 | run: cargo test --all-features -- -Z unstable-options --format json --report-time | tee results.json 50 | env: 51 | LLVM_PROFILE_FILE: "target/coverage/prof/%p-%m.profraw" 52 | RUSTFLAGS: "-Cinstrument-coverage" 53 | 54 | - name: Stop minio 55 | run: ./ci/minio_stop.sh 56 | 57 | - name: Prepare junit report 58 | id: cargo_reporter 59 | uses: innoq/action-cargo-test-report@v1 60 | with: 61 | cargo-test-report-json: 'results.json' 62 | 63 | - name: Publish Test Report 64 | uses: mikepenz/action-junit-report@v5 65 | if: always() # always run even if the previous step fails 66 | with: 67 | check_name: Test Report 68 | fail_on_failure: true 69 | require_tests: true 70 | annotate_only: true 71 | summary: ${{ steps.cargo_reporter.outputs.summary }} 72 | 73 | - name: Create coverage report 74 | run: | 75 | grcov \ 76 | --source-dir . \ 77 | --binary-path target/debug \ 78 | --branch \ 79 | --excl-start 'mod tests \{' \ 80 | --ignore 'tests/*' \ 81 | -t lcov \ 82 | -o lcov.info \ 83 | target/coverage/prof 84 | 85 | - name: Upload coverage to Codecov 86 | uses: codecov/codecov-action@v5 87 | with: 88 | files: lcov.info 89 | fail_ci_if_error: false 90 | token: ${{ secrets.CODECOV_TOKEN }} 91 | 92 | # Check formatting with rustfmt 93 | formatting: 94 | if: (github.event_name != 'pull_request' && ! github.event.pull_request.head.repo.fork) || (github.event_name == 'pull_request' && (github.event.pull_request.head.repo.fork || startsWith(github.head_ref, 'dependabot/') || startsWith(github.head_ref, 'renovate/') || startsWith(github.ref, 'release-plz'))) 95 | name: cargo fmt 96 | runs-on: ubuntu-latest 97 | steps: 98 | - uses: actions/checkout@v4 99 | # Ensure rustfmt is installed and setup problem matcher 100 | - uses: actions-rust-lang/setup-rust-toolchain@v1 101 | with: 102 | components: rustfmt 103 | - name: Rustfmt Check 104 | uses: actions-rust-lang/rustfmt@v1 105 | 106 | lint: 107 | if: (github.event_name != 'pull_request' && ! github.event.pull_request.head.repo.fork) || (github.event_name == 'pull_request' && (github.event.pull_request.head.repo.fork || startsWith(github.head_ref, 'dependabot/') || startsWith(github.head_ref, 'renovate/') || startsWith(github.ref, 'release-plz'))) 108 | name: cargo clippy 109 | runs-on: ubuntu-latest 110 | steps: 111 | - uses: actions/checkout@v4 112 | - uses: actions-rust-lang/setup-rust-toolchain@v1 113 | - run: cargo clippy --all-features --all-targets --workspace -- -D warnings 114 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Generated by Cargo 2 | # will have compiled files and executables 3 | /target/ 4 | 5 | # Remove Cargo.lock from gitignore if creating an executable, leave it for libraries 6 | # More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html 7 | Cargo.lock 8 | 9 | # These are backup files generated by rustfmt 10 | **/*.rs.bk 11 | 12 | 13 | # Added by cargo 14 | 15 | /target 16 | .idea 17 | 18 | ## Ignore instrumentation 19 | *.profraw 20 | lcov.info 21 | ccov.zip 22 | results.xml 23 | cobertura.xml 24 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | 2 | [submodule "testing"] 3 | path = testing 4 | url = https://github.com/timvw/arrow-testing 5 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/doublify/pre-commit-rust 3 | rev: v1.0 4 | hooks: 5 | - id: fmt 6 | - id: clippy 7 | - id: cargo-check -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | All notable changes to this project will be documented in this file. 3 | 4 | The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), 5 | and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). 6 | 7 | ## [Unreleased] 8 | 9 | ## [0.9.6](https://github.com/timvw/qv/compare/v0.9.5...v0.9.6) - 2024-03-30 10 | 11 | ### Other 12 | - Merge branch 'main' into ci/attempt-to-build-binaries 13 | - attempt to build binaries on release 14 | 15 | ## [0.9.5](https://github.com/timvw/qv/compare/v0.9.4...v0.9.5) - 2024-03-30 16 | 17 | ### Added 18 | - add support for running on gcs 19 | - Add support for gcs back 20 | 21 | ### Other 22 | - allow creation of manual run 23 | - one more attempt to trigger builds 24 | - lint 25 | - remove ref to mod 26 | - remove more unused files 27 | - lint 28 | - remove unused files 29 | 30 | ## [0.9.4](https://github.com/timvw/qv/compare/v0.9.3...v0.9.4) - 2024-03-30 31 | 32 | ### Other 33 | - build and publish more binaries upon release 34 | 35 | ## [0.9.3](https://github.com/timvw/qv/compare/v0.9.2...v0.9.3) - 2024-03-30 36 | 37 | ### Other 38 | - attempt to triggers builds on release-plz mr/branch 39 | - split tests 40 | 41 | ## [0.9.2](https://github.com/timvw/qv/compare/v0.9.1...v0.9.2) - 2024-03-30 42 | 43 | ### Added 44 | - add test to verify that ndjson works 45 | 46 | ### Other 47 | - allow pr builds for release-plz 48 | - do not build with verbose flag 49 | - provide code-cov token 50 | - allow manual launch of test flow 51 | - fmt 52 | - add test to verify that gzipped json file is supported 53 | 54 | ## [0.9.1](https://github.com/timvw/qv/compare/v0.9.0...v0.9.1) - 2024-03-29 55 | 56 | ### Added 57 | - make changes such that a glue deltalake table can be loaded 58 | - add deltalake support again 59 | - add badges to readme 60 | - infer schema from glue catalog info 61 | - add support for listing on s3 as well 62 | - add support for listing files in a folder (also on s3) 63 | - add support for aws s3 console url 64 | - leverage rust aws sdk to get credentials 65 | - leverage opendal instead of object_store features 66 | 67 | ### Fixed 68 | - remove unwantend print 69 | - change expected output 70 | - add missing region for test 71 | 72 | ### Other 73 | - lint 74 | - *(deps)* update codecov/codecov-action action to v4 ([#89](https://github.com/timvw/qv/pull/89)) 75 | - attempt to add codecoverage 76 | - attempt to trigger test run only once 77 | - only annotate tests results 78 | - do not group prs 79 | - specify versions 80 | - use nightly 81 | - attempt to capture test results and upload them 82 | - revert to datafusion 35 such that we can add the deltalake crate 83 | - move things around 84 | - *(deps)* update rust docker tag to v1.77 ([#69](https://github.com/timvw/qv/pull/69)) 85 | - add test to verify that s3 console url works 86 | - improve the way we build expected output 87 | - fmt 88 | - more documentation on how aws s3 profiles work 89 | - changes for gcs introduction 90 | - add entry on s3 creds 91 | - add entry on s3 creds 92 | - trim expected output 93 | - lint 94 | - added entry on releases 95 | - updated developer instructions 96 | - start/stop minio before/after tests 97 | - remove unused files 98 | - change to tokio 1 to have latest 99 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "qv" 3 | description = "quickly view your data" 4 | version = "0.9.6" 5 | edition = "2021" 6 | homepage = "https://github.com/timvw/qv" 7 | repository = "https://github.com/timvw/qv" 8 | readme = "README.md" 9 | authors = ["Tim Van Wassenhove "] 10 | license = "Apache-2.0" 11 | keywords = [ "quickview", "data", "query", "sql", "datafusion" ] 12 | exclude = [ 13 | ".github/*", 14 | "ci/*", 15 | "dev/*", 16 | "testing/*", 17 | ] 18 | 19 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 20 | 21 | [dependencies] 22 | aws-config = "1.2.1" 23 | aws-sdk-glue = "1.27" 24 | aws-types = "1.2" 25 | aws-credential-types = "1.2" 26 | chrono = "0.4.38" 27 | clap = { version = "4.5.4", features = ["derive"] } 28 | #datafusion = { version = "44", features = ["avro"] } 29 | datafusion = { git = "https://github.com/timvw/datafusion.git", rev = "6e7c5467510636d97ee9ecf498cd54d7719f031d", features = ["avro"]} 30 | deltalake = { git = "https://github.com/delta-io/delta-rs.git", rev = "rust-v0.23.0", default-features = false, features = ["datafusion-ext", "s3", "gcs"] } 31 | futures = "0.3" 32 | glob = "0.3" 33 | object_store = { version = "0.11", features = ["aws", "gcp"] } 34 | regex = "1.10" 35 | tokio = { version = "1", features = ["macros", "rt", "rt-multi-thread", "sync", "parking_lot"] } 36 | url = "2.5" 37 | 38 | [patch.crates-io] 39 | datafusion = { git = "https://github.com/timvw/datafusion.git", rev = "6e7c5467510636d97ee9ecf498cd54d7719f031d", features = ["avro"]} 40 | datafusion-expr = { git = "https://github.com/timvw/datafusion.git", rev = "6e7c5467510636d97ee9ecf498cd54d7719f031d"} 41 | datafusion-common = { git = "https://github.com/timvw/datafusion.git", rev = "6e7c5467510636d97ee9ecf498cd54d7719f031d", features = ["avro"]} 42 | datafusion-proto = { git = "https://github.com/timvw/datafusion.git", rev = "6e7c5467510636d97ee9ecf498cd54d7719f031d"} 43 | datafusion-sql = { git = "https://github.com/timvw/datafusion.git", rev = "6e7c5467510636d97ee9ecf498cd54d7719f031d"} 44 | datafusion-physical-expr = { git = "https://github.com/timvw/datafusion.git", rev = "6e7c5467510636d97ee9ecf498cd54d7719f031d"} 45 | datafusion-physical-plan = { git = "https://github.com/timvw/datafusion.git", rev = "6e7c5467510636d97ee9ecf498cd54d7719f031d"} 46 | datafusion-functions = { git = "https://github.com/timvw/datafusion.git", rev = "6e7c5467510636d97ee9ecf498cd54d7719f031d"} 47 | datafusion-functions-aggregate = { git = "https://github.com/timvw/datafusion.git", rev = "6e7c5467510636d97ee9ecf498cd54d7719f031d"} 48 | 49 | [dev-dependencies] 50 | assert_cmd = "2.0.14" 51 | predicates = "3.1" 52 | -------------------------------------------------------------------------------- /Development.md: -------------------------------------------------------------------------------- 1 | # Development 2 | 3 | ## Standard rust toolchain: 4 | 5 | Uses the familiar cargo targets: build, test, fmt, clippy 6 | 7 | ## Testing 8 | 9 | ```bash 10 | ./ci/minio_start.sh 11 | cargo test 12 | ./ci/minio_stop.sh 13 | ``` 14 | 15 | ## Releasing 16 | 17 | Leverages [Release-plz](https://github.com/MarcoIeni/release-plz) to build Release PR's. 18 | 19 | 20 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | FROM rust:1.83 as builder 19 | 20 | WORKDIR /usr/src/qv 21 | COPY ./Cargo.toml ./Cargo.toml 22 | ## Remove Cargo.lock from gitignore if creating an executable, leave it for libraries 23 | # More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html 24 | #COPY ./Cargo.lock ./Cargo.lock 25 | COPY ./src ./src 26 | 27 | RUN rustup component add rustfmt 28 | RUN cargo build --release 29 | 30 | FROM debian:bullseye-slim 31 | RUN apt-get update 32 | RUN apt-get install --no-install-recommends -y ca-certificates 33 | #RUN apt-get install -y extra-runtime-dependencies 34 | #RUN rm -rf /var/lib/apt/lists/* 35 | COPY --from=builder /usr/src/qv/target/release/qv /usr/local/bin 36 | 37 | ENTRYPOINT ["qv"] -------------------------------------------------------------------------------- /Installation.md: -------------------------------------------------------------------------------- 1 | # Installation 2 | 3 | ## As a [Homebrew](https://brew.sh/) package 4 | 5 | ```bash 6 | brew tap timvw/tap 7 | brew install qv 8 | ``` 9 | 10 | ## Download a binary from [Github Release](https://github.com/timvw/qv/releases/latest) 11 | ```bash 12 | wget https://github.com/timvw/qv/releases/download/v0.4.0/qv-0.4.0-x86_64-apple-darwin-generic.tar.gz 13 | tar -zxf qv-0.4.0-x86_64-apple-darwin-generic.tar.gz 14 | ``` 15 | 16 | ## Run as a [container](https://github.com/timvw/qv/pkgs/container/qv) image 17 | 18 | ```bash 19 | docker run --rm -it -v $HOME/.aws:/root/.aws -e AWS_PROFILE=icteam ghcr.io/timvw/qv:0.4.0 s3://datafusion-testing/data/avro/alltypes_plain.avro 20 | ``` 21 | 22 | ## Via rust toolchain 23 | 24 | ```bash 25 | cargo install --git https://github.com/timvw/qv --tag v0.4.0 26 | ``` -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Quickly view your data (qv) 2 | 3 | [github](https://github.com/timvw/qv) 4 | [crates.io](https://crates.io/crates/qv) 5 | [docs.rs](https://docs.rs/qv) 6 | [build status](https://github.com/timvw/qv/actions?query=branch%3Amain) 7 | 8 | A simply CLI to quickly view your data. Powered by [DataFusion](https://github.com/apache/arrow-datafusion). 9 | 10 | ## Example: View data on local filesystem 11 | 12 | ```bash 13 | qv /mnt/datasets/nyc/green_tripdata_2020-07.csv 14 | ``` 15 | 16 | Example output: 17 | 18 | ``` 19 | +----------+----------------------+-----------------------+--------------------+------------+--------------+--------------+-----------------+---------------+-------------+-------+---------+------------+--------------+-----------+-----------------------+--------------+--------------+-----------+----------------------+ 20 | | VendorID | lpep_pickup_datetime | lpep_dropoff_datetime | store_and_fwd_flag | RatecodeID | PULocationID | DOLocationID | passenger_count | trip_distance | fare_amount | extra | mta_tax | tip_amount | tolls_amount | ehail_fee | improvement_surcharge | total_amount | payment_type | trip_type | congestion_surcharge | 21 | +----------+----------------------+-----------------------+--------------------+------------+--------------+--------------+-----------------+---------------+-------------+-------+---------+------------+--------------+-----------+-----------------------+--------------+--------------+-----------+----------------------+ 22 | | 2 | 2020-07-01 00:05:18 | 2020-07-01 00:22:07 | N | 1 | 134 | 35 | 2 | 6.38 | 20.5 | 0.5 | 0.5 | 0 | 0 | | 0.3 | 21.8 | 2 | 1 | 0 | 23 | | 2 | 2020-07-01 00:47:06 | 2020-07-01 00:52:13 | N | 1 | 41 | 42 | 1 | 1.06 | 6 | 0.5 | 0.5 | 1.46 | 0 | | 0.3 | 8.76 | 1 | 1 | 0 | 24 | | 2 | 2020-07-01 00:24:59 | 2020-07-01 00:35:18 | N | 1 | 42 | 159 | 1 | 2.1 | 9 | 0.5 | 0.5 | 0 | 0 | | 0.3 | 10.3 | 2 | 1 | 0 | 25 | | 2 | 2020-07-01 00:55:12 | 2020-07-01 00:58:45 | N | 1 | 116 | 116 | 1 | 0.7 | 5 | 0.5 | 0.5 | 0 | 0 | | 0.3 | 6.3 | 2 | 1 | 0 | 26 | | 2 | 2020-07-01 00:12:36 | 2020-07-01 00:20:14 | N | 1 | 43 | 141 | 1 | 1.84 | 8 | 0.5 | 0.5 | 0 | 0 | | 0.3 | 12.05 | 2 | 1 | 2.75 | 27 | | 2 | 2020-07-01 00:30:55 | 2020-07-01 00:37:05 | N | 5 | 74 | 262 | 1 | 2.04 | 27 | 0 | 0 | 0 | 0 | | 0.3 | 30.05 | 2 | 1 | 2.75 | 28 | | 2 | 2020-07-01 00:13:00 | 2020-07-01 00:19:09 | N | 1 | 159 | 119 | 1 | 1.35 | 6.5 | 0.5 | 0.5 | 0 | 0 | | 0.3 | 7.8 | 2 | 1 | 0 | 29 | | 2 | 2020-07-01 00:39:09 | 2020-07-01 00:40:55 | N | 1 | 75 | 75 | 1 | 0.35 | -3.5 | -0.5 | -0.5 | 0 | 0 | | -0.3 | -4.8 | 4 | 1 | 0 | 30 | | 2 | 2020-07-01 00:39:09 | 2020-07-01 00:40:55 | N | 1 | 75 | 75 | 1 | 0.35 | 3.5 | 0.5 | 0.5 | 0 | 0 | | 0.3 | 4.8 | 2 | 1 | 0 | 31 | | 2 | 2020-07-01 00:45:59 | 2020-07-01 01:01:02 | N | 1 | 75 | 87 | 1 | 8.17 | 24 | 0.5 | 0.5 | 4.21 | 0 | | 0.3 | 32.26 | 1 | 1 | 2.75 | 32 | +----------+----------------------+-----------------------+--------------------+------------+--------------+--------------+-----------------+---------------+-------------+-------+---------+------------+--------------+-----------+-----------------------+--------------+--------------+-----------+----------------------+ 33 | ``` 34 | 35 | For more examples consult our [Usage examples](Usage.md). 36 | 37 | ## Features 38 | 39 | * View file (and directories of files) contents 40 | * Run SQL against files 41 | * View file schemas 42 | * Supported formats: 43 | - [Deltalake](https://delta.io/) 44 | - [Parquet](https://parquet.apache.org/) 45 | - [Avro](https://avro.apache.org/) 46 | - [CSV](https://en.wikipedia.org/wiki/Comma-separated_values) 47 | - [NDJSON](http://ndjson.org/) 48 | * Supported storage sytems: 49 | - local file system 50 | - [S3](https://aws.amazon.com/s3/) (+ https links from AWS S3 console) 51 | - [GCS](https://cloud.google.com/storage) 52 | 53 | ## Installation 54 | Read the [Installation instructions](Installation.md). 55 | 56 | ## Development 57 | Read the [Development instructions](Development.md). 58 | 59 | 60 | -------------------------------------------------------------------------------- /Usage.md: -------------------------------------------------------------------------------- 1 | # Usage 2 | 3 | ## Run query on data 4 | 5 | ```bash 6 | qv s3://tpc-h-parquet/1/customer -q 'select c_custkey, UPPER(c_name) from tbl' 7 | ``` 8 | 9 | ## View schema of data 10 | 11 | ```bash 12 | qv ./datasets/tpc-h-parquet/1/customer -s 13 | ``` 14 | 15 | ## View data on GCS. 16 | 17 | ### Configuration 18 | 19 | QV expects the environment variable 'GOOGLE_APPLICATION_CREDENTIALS' to exist and point to a file which contains google credentials. 20 | 21 | ```bash 22 | qv gs://datafusion-delta-testing/data/delta/COVID-19_NYT 23 | ``` 24 | 25 | ## View data on S3 26 | 27 | ### Configuration 28 | 29 | Usually [Credential](https://github.com/awslabs/aws-sdk-rust/blob/main/sdk/aws-config/src/default_provider/credentials.rs#L25) loading works out of the box when using the [AWS SDK for Rust](https://github.com/awslabs/aws-sdk-rust/tree/main). 30 | 31 | The following environment variables are needed for credentials: 32 | 33 | * AWS_REGION 34 | * AWS_ACCESS_KEY_ID 35 | * AWS_SECRET_ACCESS_KEY 36 | 37 | In case you have AWS SSO credentials you need to set the following: 38 | * AWS_PROFILE 39 | 40 | In case you have a custom endpoint in place (eg: [minio](https://min.io/)) you also need to set: 41 | #* AWS_ENDPOINT_URL 42 | AWS_ENDPOINT 43 | AWS_ALLOW_HTTP 44 | https://docs.rs/object_store/latest/object_store/aws/struct.AmazonS3Builder.html 45 | 46 | 47 | 48 | ```bash 49 | qv s3://tpc-h-parquet/1/customer 50 | ``` 51 | 52 | ## Specify AWS (SSO) profile to use 53 | 54 | ```bash 55 | qv s3://tpc-h-parquet/1/customer --profile my-user 56 | ``` 57 | 58 | This is the same as: 59 | 60 | ```bash 61 | AWS_PROFILE=my-user qv s3://tpc-h-parquet/1/customer 62 | ``` 63 | 64 | ## View data from S3 console URL 65 | 66 | ```bash 67 | qv https://s3.console.aws.amazon.com/s3/buckets/datafusion-delta-testing?region=eu-central-1&prefix=simple_table/&showversions=false 68 | ``` 69 | 70 | ## View data which matches a globbing pattern: 71 | 72 | ```bash 73 | qv "s3://datafusion-parquet-testing/data/alltypes_pla*n.parquet" 74 | ``` 75 | 76 | ## View delta table (no need for a manifest) 77 | 78 | ```bash 79 | qv /Users/timvw/src/github/delta-rs/rust/tests/data/COVID-19_NYT 80 | ``` 81 | 82 | ## View delta table at specific point in time 83 | 84 | ```bash 85 | qv /Users/timvw/src/github/delta-rs/rust/tests/data/COVID-19_NYT --at "2022-01-01T16:39:00+01:00" 86 | ``` 87 | 88 | ## View glue table 89 | 90 | ```bash 91 | qv glue://mydb.table1 92 | ``` -------------------------------------------------------------------------------- /ci/minio_start.sh: -------------------------------------------------------------------------------- 1 | docker run \ 2 | --detach \ 3 | --name minio-qv \ 4 | --rm \ 5 | --publish 9000:9000 \ 6 | --publish 9001:9001 \ 7 | --volume "$PWD/testing:/data" \ 8 | --env "MINIO_ROOT_USER=AKIAIOSFODNN7EXAMPLE" \ 9 | --env "MINIO_ROOT_PASSWORD=wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY" \ 10 | quay.io/minio/minio:RELEASE.2022-05-26T05-48-41Z server /data \ 11 | --console-address ":9001" -------------------------------------------------------------------------------- /ci/minio_stop.sh: -------------------------------------------------------------------------------- 1 | docker stop minio-qv -------------------------------------------------------------------------------- /src/args.rs: -------------------------------------------------------------------------------- 1 | use chrono::{DateTime, Utc}; 2 | use clap::Parser; 3 | 4 | #[derive(Parser, Debug)] 5 | #[clap(author, version, about, long_about = None)] 6 | pub struct Args { 7 | /// Location where the data is located 8 | pub path: String, 9 | 10 | /// Query to execute 11 | #[clap(short, long, default_value_t = String::from("select * from tbl"), group = "sql")] 12 | pub query: String, 13 | 14 | /// When provided the schema is shown 15 | #[clap(short, long, group = "sql")] 16 | pub schema: bool, 17 | 18 | /// Rows to return 19 | #[clap(short, long, default_value_t = 10)] 20 | pub limit: usize, 21 | 22 | /// Optional AWS Profile to use 23 | #[clap(short, long)] 24 | pub profile: Option, 25 | 26 | /// Optional timestamp for delta table 27 | #[clap( 28 | short, 29 | long, 30 | help = "Timestamp to load deltatable in RFC format, eg: 2022-01-13T16:39:00+01:00" 31 | )] 32 | pub at: Option>, 33 | } 34 | 35 | impl Args { 36 | pub fn get_query(&self) -> &str { 37 | let query = if self.schema { 38 | "SELECT column_name, data_type, is_nullable FROM information_schema.columns WHERE table_name = 'tbl'" 39 | } else { 40 | self.query.as_str() 41 | }; 42 | query 43 | } 44 | 45 | /* 46 | pub async fn get_globbing_path(&self) -> Result { 47 | let (data_location, maybe_sdk_config) = match update_s3_console_url(&self.path) { 48 | (true, updated_location) => (updated_location, Some(get_sdk_config(self).await)), 49 | (false, location) => (location, None), 50 | }; 51 | 52 | let data_location = match parse_glue_url(&data_location) { 53 | // When the provided s looks like glue://database.table we lookup the storage location 54 | // When the provided s does not look like glue://database.table, return s as is. 55 | Some((database_name, table_name)) => { 56 | let sdk_config = match maybe_sdk_config { 57 | Some(sdk_config) => sdk_config, 58 | None => get_sdk_config(self).await, 59 | }; 60 | 61 | get_storage_location(&sdk_config, &database_name, &table_name) 62 | .await 63 | .unwrap_or_else(|_| { 64 | panic!( 65 | "failed to get storage location for {}.{}", 66 | database_name, table_name 67 | ) 68 | }) 69 | } 70 | None => data_location, 71 | }; 72 | 73 | let globbing_path = GlobbingPath::parse(&data_location)?; 74 | Ok(globbing_path) 75 | }*/ 76 | } 77 | -------------------------------------------------------------------------------- /src/main.rs: -------------------------------------------------------------------------------- 1 | use std::collections::HashMap; 2 | use std::env; 3 | use std::sync::Arc; 4 | 5 | use aws_config::BehaviorVersion; 6 | use aws_credential_types::provider::ProvideCredentials; 7 | use aws_sdk_glue::types::{StorageDescriptor, Table}; 8 | use aws_sdk_glue::Client; 9 | use aws_types::SdkConfig; 10 | use clap::Parser; 11 | use datafusion::common::{DataFusionError, Result}; 12 | use datafusion::datasource::file_format::avro::AvroFormat; 13 | use datafusion::datasource::file_format::csv::CsvFormat; 14 | use datafusion::datasource::file_format::json::JsonFormat; 15 | use datafusion::datasource::file_format::parquet::ParquetFormat; 16 | use datafusion::datasource::file_format::FileFormat; 17 | use datafusion::datasource::listing::{ 18 | ListingOptions, ListingTable, ListingTableConfig, ListingTableUrl, 19 | }; 20 | use datafusion::datasource::TableProvider; 21 | use datafusion::prelude::*; 22 | use datafusion::sql::TableReference; 23 | use deltalake::open_table; 24 | use object_store::aws::{AmazonS3, AmazonS3Builder}; 25 | use object_store::gcp::{GoogleCloudStorage, GoogleCloudStorageBuilder}; 26 | use object_store::path::Path; 27 | use object_store::ObjectStore; 28 | use regex::Regex; 29 | use url::Url; 30 | 31 | use crate::args::Args; 32 | 33 | mod args; 34 | 35 | #[tokio::main] 36 | async fn main() -> Result<()> { 37 | let config = SessionConfig::new().with_information_schema(true); 38 | let ctx = SessionContext::new_with_config(config); 39 | 40 | let args: Args = Args::parse(); 41 | 42 | let (_, data_path) = replace_s3_console_url_with_s3_path(&args.path.clone()); 43 | 44 | let sdk_config = get_sdk_config(&args).await; 45 | 46 | let (data_path, file_format) = replace_glue_table_with_path(&data_path, &sdk_config).await?; 47 | 48 | let data_path = if data_path.starts_with("s3://") { 49 | // register s3 object store 50 | let s3_url = Url::parse(&data_path) 51 | .map_err(|e| DataFusionError::Execution(format!("Failed to parse url, {e}")))?; 52 | let s3 = build_s3(&s3_url, &sdk_config).await?; 53 | let s3_arc = Arc::new(s3); 54 | ctx.runtime_env() 55 | .register_object_store(&s3_url, s3_arc.clone()); 56 | 57 | deltalake::aws::register_handlers(None); 58 | 59 | // add trailing slash to folder 60 | if !data_path.ends_with('/') { 61 | let path = Path::parse(s3_url.path())?; 62 | if s3_arc.head(&path).await.is_err() { 63 | format!("{data_path}/") 64 | } else { 65 | data_path 66 | } 67 | } else { 68 | data_path 69 | } 70 | } else { 71 | data_path 72 | }; 73 | 74 | let data_path = if data_path.starts_with("gs://") || data_path.starts_with("gcs://") { 75 | let gcs_url = Url::parse(&data_path) 76 | .map_err(|e| DataFusionError::Execution(format!("Failed to parse url, {e}")))?; 77 | let gcs = build_gcs(&gcs_url).await?; 78 | let gcs_arc = Arc::new(gcs); 79 | ctx.runtime_env() 80 | .register_object_store(&gcs_url, gcs_arc.clone()); 81 | 82 | deltalake::gcp::register_handlers(None); 83 | 84 | // add trailing slash to folder 85 | if !data_path.ends_with('/') { 86 | let path = Path::parse(gcs_url.path())?; 87 | if gcs_arc.head(&path).await.is_err() { 88 | format!("{data_path}/") 89 | } else { 90 | data_path 91 | } 92 | } else { 93 | data_path 94 | } 95 | } else { 96 | data_path 97 | }; 98 | 99 | let table: Arc = if let Ok(mut delta_table) = open_table(&data_path).await { 100 | if let Some(at) = args.at { 101 | delta_table.load_with_datetime(at).await?; 102 | } 103 | Arc::new(delta_table) 104 | } else { 105 | let table_path = ListingTableUrl::parse(&data_path)?; 106 | let mut config = ListingTableConfig::new(table_path); 107 | 108 | config = if let Some(format) = file_format { 109 | config.with_listing_options(ListingOptions::new(format)) 110 | } else { 111 | config.infer_options(&ctx.state()).await? 112 | }; 113 | 114 | config = config.infer_schema(&ctx.state()).await?; 115 | let table = ListingTable::try_new(config)?; 116 | Arc::new(table) 117 | }; 118 | 119 | ctx.register_table(TableReference::from("datafusion.public.tbl"), table)?; 120 | 121 | let query = &args.get_query(); 122 | let df = ctx.sql(query).await?; 123 | if args.schema { 124 | df.show().await?; 125 | } else { 126 | df.show_limit(args.limit).await?; 127 | } 128 | 129 | Ok(()) 130 | } 131 | 132 | async fn get_sdk_config(args: &Args) -> SdkConfig { 133 | set_aws_profile_when_needed(args); 134 | set_aws_region_when_needed(); 135 | 136 | aws_config::load_defaults(BehaviorVersion::latest()).await 137 | } 138 | 139 | fn set_aws_profile_when_needed(args: &Args) { 140 | if let Some(aws_profile) = &args.profile { 141 | env::set_var("AWS_PROFILE", aws_profile); 142 | } 143 | } 144 | 145 | fn set_aws_region_when_needed() { 146 | match env::var("AWS_DEFAULT_REGION") { 147 | Ok(_) => {} 148 | Err(_) => env::set_var("AWS_DEFAULT_REGION", "eu-central-1"), 149 | } 150 | } 151 | 152 | /// When the provided s looks like an https url from the amazon webui convert it to an s3:// url 153 | /// When the provided s does not like such url, return it as is. 154 | fn replace_s3_console_url_with_s3_path(s: &str) -> (bool, String) { 155 | if s.starts_with("https://s3.console.aws.amazon.com/s3/buckets/") { 156 | let parsed_url = Url::parse(s).unwrap_or_else(|_| panic!("Failed to parse {}", s)); 157 | let path_segments = parsed_url 158 | .path_segments() 159 | .map(|c| c.collect::>()) 160 | .unwrap_or_default(); 161 | if path_segments.len() == 3 { 162 | let bucket_name = path_segments[2]; 163 | let params: HashMap = parsed_url 164 | .query() 165 | .map(|v| { 166 | url::form_urlencoded::parse(v.as_bytes()) 167 | .into_owned() 168 | .collect() 169 | }) 170 | .unwrap_or_default(); 171 | params 172 | .get("prefix") 173 | .map(|prefix| format!("s3://{}/{}", bucket_name, prefix)) 174 | .map(|x| (true, x)) 175 | .unwrap_or_else(|| (false, s.to_string())) 176 | } else { 177 | (false, s.to_string()) 178 | } 179 | } else { 180 | (false, s.to_string()) 181 | } 182 | } 183 | 184 | #[test] 185 | fn test_replace_s3_console_url_with_s3_path() -> Result<()> { 186 | assert_eq!( 187 | replace_s3_console_url_with_s3_path("/Users/timvw/test"), 188 | (false, "/Users/timvw/test".to_string()) 189 | ); 190 | assert_eq!(replace_s3_console_url_with_s3_path("https://s3.console.aws.amazon.com/s3/buckets/datafusion-delta-testing?region=eu-central-1&prefix=COVID-19_NYT/&showversions=false"), (true, "s3://datafusion-delta-testing/COVID-19_NYT/".to_string())); 191 | assert_eq!(replace_s3_console_url_with_s3_path("https://s3.console.aws.amazon.com/s3/buckets/datafusion-delta-testing?prefix=COVID-19_NYT/®ion=eu-central-1"), (true, "s3://datafusion-delta-testing/COVID-19_NYT/".to_string())); 192 | Ok(()) 193 | } 194 | 195 | async fn replace_glue_table_with_path( 196 | path: &str, 197 | sdk_config: &SdkConfig, 198 | ) -> Result<(String, Option>)> { 199 | if let Some((database, table)) = parse_glue_url(path) { 200 | let (location, format) = get_path_and_format(sdk_config, &database, &table).await?; 201 | Ok((location, Some(format))) 202 | } else { 203 | Ok((String::from(path), None)) 204 | } 205 | } 206 | 207 | fn parse_glue_url(s: &str) -> Option<(String, String)> { 208 | let re: Regex = Regex::new(r"^glue://(\w+)\.(\w+)$").unwrap(); 209 | re.captures(s).map(|captures| { 210 | let database_name = &captures[1]; 211 | let table_name = &captures[2]; 212 | (database_name.to_string(), table_name.to_string()) 213 | }) 214 | } 215 | 216 | #[test] 217 | fn test_parse_glue_url() { 218 | assert_eq!(None, parse_glue_url("file:///a")); 219 | assert_eq!( 220 | Some(("db".to_string(), "table".to_string())), 221 | parse_glue_url("glue://db.table") 222 | ); 223 | } 224 | 225 | async fn get_path_and_format( 226 | sdk_config: &SdkConfig, 227 | database_name: &str, 228 | table_name: &str, 229 | ) -> Result<(String, Arc)> { 230 | let client: Client = Client::new(sdk_config); 231 | let table = client 232 | .get_table() 233 | .set_database_name(Some(database_name.to_string())) 234 | .set_name(Some(table_name.to_string())) 235 | .send() 236 | .await 237 | .map_err(|e| DataFusionError::External(Box::new(e)))? 238 | .table 239 | .ok_or_else(|| { 240 | DataFusionError::Execution(format!( 241 | "Could not find {}.{} in glue", 242 | database_name, table_name 243 | )) 244 | })?; 245 | 246 | let sd = table.storage_descriptor().ok_or_else(|| { 247 | DataFusionError::Execution(format!( 248 | "Could not find storage descriptor for {}.{} in glue", 249 | database_name, table_name 250 | )) 251 | })?; 252 | 253 | let location = lookup_storage_location(sd)?; 254 | let format_arc = lookup_file_format(table.clone(), sd)?; 255 | Ok((location, format_arc)) 256 | } 257 | 258 | fn lookup_storage_location(sd: &StorageDescriptor) -> Result { 259 | let location = sd.location().ok_or_else(|| { 260 | DataFusionError::Execution(format!("Could not find sd.location for {sd:#?}",)) 261 | })?; 262 | Ok(location.to_string()) 263 | } 264 | 265 | fn lookup_file_format(table: Table, sd: &StorageDescriptor) -> Result> { 266 | let empty_str = String::from(""); 267 | let input_format = sd.input_format.as_ref().unwrap_or(&empty_str); 268 | let output_format = sd.output_format.as_ref().unwrap_or(&empty_str); 269 | let serde_info = sd.serde_info.as_ref().ok_or_else(|| { 270 | DataFusionError::Execution( 271 | "Failed to find serde_info in storage descriptor for glue table".to_string(), 272 | ) 273 | })?; 274 | let serialization_library = serde_info 275 | .serialization_library 276 | .as_ref() 277 | .unwrap_or(&empty_str); 278 | let serde_info_parameters = serde_info 279 | .parameters 280 | .as_ref() 281 | .ok_or_else(|| { 282 | DataFusionError::Execution( 283 | "Failed to find parameters of serde_info in storage descriptor for glue table" 284 | .to_string(), 285 | ) 286 | })? 287 | .clone(); 288 | let sd_parameters = match &sd.parameters { 289 | Some(x) => x.clone(), 290 | None => HashMap::new(), 291 | }; 292 | 293 | let table_parameters = table.parameters.unwrap_or_default(); 294 | let _table_type = table_parameters 295 | .get("table_type") 296 | .map(|x| x.as_str()) 297 | .unwrap_or_default(); 298 | 299 | // this can be delta... 300 | // or ICEBERG... 301 | 302 | /* 303 | Table format: Apache Iceberg 304 | Input format: - 305 | Output format: - 306 | Serde serialization lib:- 307 | */ 308 | 309 | let item: (&str, &str, &str) = (input_format, output_format, serialization_library); 310 | let format_result: Result> = match item { 311 | ( 312 | "org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat", 313 | "org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat", 314 | "org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe", 315 | ) => Ok(Arc::new(ParquetFormat::default())), 316 | ( 317 | // actually this is Deltalake format... 318 | "org.apache.hadoop.mapred.SequenceFileInputFormat", 319 | "org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat", 320 | "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe", 321 | ) => Ok(Arc::new(ParquetFormat::default())), 322 | ( 323 | "org.apache.hadoop.mapred.TextInputFormat", 324 | "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat", 325 | "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe", 326 | ) => { 327 | let mut format = CsvFormat::default(); 328 | let delim = serde_info_parameters 329 | .get("field.delim") 330 | .ok_or_else(|| { 331 | DataFusionError::Execution( 332 | "Failed to find field.delim in serde_info parameters".to_string(), 333 | ) 334 | })? 335 | .as_bytes(); 336 | let delim_char = delim[0]; 337 | format = format.with_delimiter(delim_char); 338 | let has_header = sd_parameters 339 | .get("skip.header.line.count") 340 | .unwrap_or(&empty_str) 341 | .eq("1"); 342 | format = format.with_has_header(has_header); 343 | Ok(Arc::new(format)) 344 | } 345 | ( 346 | "org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat", 347 | "org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat", 348 | "org.apache.hadoop.hive.serde2.avro.AvroSerDe", 349 | ) => Ok(Arc::new(AvroFormat)), 350 | ( 351 | "org.apache.hadoop.mapred.TextInputFormat", 352 | "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat", 353 | "org.apache.hive.hcatalog.data.JsonSerDe", 354 | ) => Ok(Arc::new(JsonFormat::default())), 355 | ( 356 | "org.apache.hadoop.mapred.TextInputFormat", 357 | "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat", 358 | "org.openx.data.jsonserde.JsonSerDe", 359 | ) => Ok(Arc::new(JsonFormat::default())), 360 | ( 361 | "org.apache.hadoop.mapred.TextInputFormat", 362 | "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat", 363 | "com.amazon.ionhiveserde.IonHiveSerDe", 364 | ) => Ok(Arc::new(JsonFormat::default())), 365 | _ => Err(DataFusionError::Execution(format!( 366 | "No support for: {}, {}, {:?} yet.", 367 | input_format, output_format, sd 368 | ))), 369 | }; 370 | 371 | let format = format_result?; 372 | Ok(format) 373 | } 374 | 375 | async fn build_s3(url: &Url, sdk_config: &SdkConfig) -> Result { 376 | let cp = sdk_config.credentials_provider().unwrap(); 377 | let creds = cp 378 | .provide_credentials() 379 | .await 380 | .map_err(|e| DataFusionError::Execution(format!("Failed to get credentials: {e}")))?; 381 | 382 | let bucket_name = url.host_str().unwrap(); 383 | 384 | let builder = AmazonS3Builder::from_env() 385 | .with_bucket_name(bucket_name) 386 | .with_access_key_id(creds.access_key_id()) 387 | .with_secret_access_key(creds.secret_access_key()); 388 | 389 | let builder = if let Some(session_token) = creds.session_token() { 390 | builder.with_token(session_token) 391 | } else { 392 | builder 393 | }; 394 | 395 | //https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-envvars.html 396 | let builder = if let Ok(aws_endpoint_url) = env::var("AWS_ENDPOINT_URL") { 397 | builder.with_endpoint(aws_endpoint_url) 398 | } else { 399 | builder 400 | }; 401 | 402 | let s3 = builder.build()?; 403 | 404 | Ok(s3) 405 | } 406 | 407 | async fn build_gcs(gcs_url: &Url) -> Result { 408 | let google_application_credentials = 409 | env::var("GOOGLE_APPLICATION_CREDENTIALS").map_err(|_| { 410 | DataFusionError::Execution(String::from( 411 | "Could not find GOOGLE_APPLICATION_CREDENTIALS environment variable", 412 | )) 413 | })?; 414 | 415 | let bucket_name = gcs_url.host_str().unwrap(); 416 | 417 | let gcs_builder = GoogleCloudStorageBuilder::new(); 418 | let gcs_builder = gcs_builder.with_bucket_name(bucket_name); 419 | let gcs_builder = gcs_builder.with_service_account_path(google_application_credentials); 420 | let gcs = gcs_builder.build()?; 421 | 422 | Ok(gcs) 423 | } 424 | -------------------------------------------------------------------------------- /tests/files_on_localfs.rs: -------------------------------------------------------------------------------- 1 | use assert_cmd::cargo::CargoError; 2 | use assert_cmd::prelude::*; 3 | use datafusion::common::DataFusionError; 4 | use predicates::prelude::*; 5 | use predicates::str::RegexPredicate; 6 | use std::env; 7 | use std::process::Command; 8 | 9 | fn map_cargo_to_datafusion_error(e: CargoError) -> DataFusionError { 10 | DataFusionError::External(Box::new(e)) 11 | } 12 | 13 | fn get_qv_cmd() -> datafusion::common::Result { 14 | Command::cargo_bin(env!("CARGO_PKG_NAME")).map_err(map_cargo_to_datafusion_error) 15 | } 16 | 17 | fn get_qv_testing_path(rel_data_path: &str) -> String { 18 | let testing_path = env::var("QV_TESTING_PATH").unwrap_or_else(|_| "./testing".to_string()); 19 | format!("{}/{}", testing_path, rel_data_path) 20 | } 21 | 22 | fn build_row_regex_predicate(columns: Vec<&str>) -> RegexPredicate { 23 | let pattern = columns.join("\\s*|\\s*"); 24 | predicate::str::is_match(pattern).unwrap() 25 | } 26 | 27 | #[tokio::test] 28 | async fn run_without_file_exits_with_usage() -> datafusion::common::Result<()> { 29 | let mut cmd = get_qv_cmd()?; 30 | cmd.assert() 31 | .failure() 32 | .stderr(predicate::str::contains("Usage: qv ")); 33 | Ok(()) 34 | } 35 | 36 | #[tokio::test] 37 | async fn run_with_local_avro_file() -> datafusion::common::Result<()> { 38 | let mut cmd = get_qv_cmd()?; 39 | let cmd = cmd.arg(get_qv_testing_path("data/avro/alltypes_plain.avro")); 40 | 41 | let header_predicate = build_row_regex_predicate(vec![ 42 | "id", 43 | "bool_col", 44 | "tinyint_col", 45 | "smallint_col", 46 | "int_col", 47 | "bigint_col", 48 | "float_col", 49 | "double_col", 50 | "date_string_col", 51 | "string_col", 52 | "timestamp_col", 53 | ]); 54 | 55 | let data_predicate = build_row_regex_predicate(vec![ 56 | "4", 57 | "true", 58 | "0", 59 | "0", 60 | "0", 61 | "0", 62 | "0.0", 63 | "0.0", 64 | "30332f30312f3039", 65 | "30", 66 | "2009-03-01T00:00:00", 67 | ]); 68 | 69 | cmd.assert() 70 | .success() 71 | .stdout(header_predicate) 72 | .stdout(data_predicate); 73 | Ok(()) 74 | } 75 | 76 | #[tokio::test] 77 | async fn run_with_local_ndjson_file() -> datafusion::common::Result<()> { 78 | let mut cmd = get_qv_cmd()?; 79 | let cmd = cmd 80 | .arg(get_qv_testing_path("data/json/ndjson-sample.json")) 81 | .arg("-q") 82 | .arg("SELECT url from tbl"); 83 | 84 | let header_predicate = build_row_regex_predicate(vec!["url"]); 85 | 86 | let data_predicate = build_row_regex_predicate(vec!["https://www.yelp.com/search"]); 87 | 88 | cmd.assert() 89 | .success() 90 | .stdout(header_predicate) 91 | .stdout(data_predicate); 92 | Ok(()) 93 | } 94 | 95 | #[tokio::test] 96 | async fn run_with_local_ndjson_gz_file() -> datafusion::common::Result<()> { 97 | let mut cmd = get_qv_cmd()?; 98 | let cmd = cmd 99 | .arg(get_qv_testing_path("data/json/ndjson-sample.json.gz")) 100 | .arg("-q") 101 | .arg("SELECT url from tbl"); 102 | 103 | let header_predicate = build_row_regex_predicate(vec!["url"]); 104 | 105 | let data_predicate = build_row_regex_predicate(vec!["https://www.yelp.com/search"]); 106 | 107 | cmd.assert() 108 | .success() 109 | .stdout(header_predicate) 110 | .stdout(data_predicate); 111 | Ok(()) 112 | } 113 | 114 | #[tokio::test] 115 | async fn run_with_local_parquet_file() -> datafusion::common::Result<()> { 116 | let mut cmd = get_qv_cmd()?; 117 | let cmd = cmd.arg(get_qv_testing_path( 118 | "data/parquet/generated_simple_numerics/blogs.parquet", 119 | )); 120 | 121 | let header_predicate = build_row_regex_predicate(vec!["reply", "blog_id"]); 122 | 123 | let data_predicate = build_row_regex_predicate(vec![ 124 | "\\{reply_id: 332770973, next_id: }", 125 | "-1473106667809783919", 126 | ]); 127 | 128 | cmd.assert() 129 | .success() 130 | .stdout(header_predicate) 131 | .stdout(data_predicate); 132 | Ok(()) 133 | } 134 | 135 | #[tokio::test] 136 | async fn run_with_local_parquet_files_in_folder() -> datafusion::common::Result<()> { 137 | let mut cmd = get_qv_cmd()?; 138 | let cmd = cmd 139 | .arg(get_qv_testing_path("data/iceberg/db/COVID-19_NYT/data")) 140 | .arg("-q") 141 | .arg("select * from tbl order by date, county, state, fips, cases, deaths"); 142 | 143 | let header_predicate = 144 | build_row_regex_predicate(vec!["date", "county", "state", "fips", "case", "deaths"]); 145 | 146 | let data_predicate = build_row_regex_predicate(vec![ 147 | "2020-01-21", 148 | "Snohomish", 149 | "Washington", 150 | "53061", 151 | "1", 152 | "0", 153 | ]); 154 | 155 | cmd.assert() 156 | .success() 157 | .stdout(header_predicate) 158 | .stdout(data_predicate); 159 | Ok(()) 160 | } 161 | 162 | #[tokio::test] 163 | async fn run_with_local_deltalake() -> datafusion::common::Result<()> { 164 | let mut cmd = get_qv_cmd()?; 165 | let cmd = cmd 166 | .arg(get_qv_testing_path("data/delta/COVID-19_NYT")) 167 | .arg("--at") 168 | .arg("2022-01-13T16:39:00+01:00") 169 | .arg("-q") 170 | .arg("select * from tbl order by date, county, state, fips, cases, deaths"); 171 | 172 | let header_predicate = 173 | build_row_regex_predicate(vec!["date", "county", "state", "fips", "case", "deaths"]); 174 | 175 | let data_predicate = build_row_regex_predicate(vec![ 176 | "2020-01-21", 177 | "Snohomish", 178 | "Washington", 179 | "53061", 180 | "1", 181 | "0", 182 | ]); 183 | 184 | cmd.assert() 185 | .success() 186 | .stdout(header_predicate) 187 | .stdout(data_predicate); 188 | Ok(()) 189 | } 190 | -------------------------------------------------------------------------------- /tests/files_on_s3.rs: -------------------------------------------------------------------------------- 1 | use assert_cmd::cargo::CargoError; 2 | use assert_cmd::prelude::*; 3 | use datafusion::common::DataFusionError; 4 | use predicates::prelude::*; 5 | use predicates::str::RegexPredicate; 6 | use std::env; 7 | use std::process::Command; 8 | 9 | fn configure_minio() { 10 | env::set_var("AWS_REGION", "eu-central-1"); 11 | env::set_var("AWS_ACCESS_KEY_ID", "AKIAIOSFODNN7EXAMPLE"); 12 | env::set_var( 13 | "AWS_SECRET_ACCESS_KEY", 14 | "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY", 15 | ); 16 | env::set_var("AWS_ENDPOINT_URL", "http://localhost:9000"); 17 | env::set_var("AWS_ENDPOINT", "http://localhost:9000"); 18 | env::set_var("AWS_ALLOW_HTTP", "true"); 19 | } 20 | 21 | fn map_cargo_to_datafusion_error(e: CargoError) -> DataFusionError { 22 | DataFusionError::External(Box::new(e)) 23 | } 24 | 25 | fn get_qv_cmd() -> datafusion::common::Result { 26 | Command::cargo_bin(env!("CARGO_PKG_NAME")).map_err(map_cargo_to_datafusion_error) 27 | } 28 | 29 | fn build_row_regex_predicate(columns: Vec<&str>) -> RegexPredicate { 30 | let pattern = columns.join("\\s*|\\s*"); 31 | predicate::str::is_match(pattern).unwrap() 32 | } 33 | 34 | #[tokio::test] 35 | async fn run_without_file_exits_with_usage() -> datafusion::common::Result<()> { 36 | let mut cmd = get_qv_cmd()?; 37 | cmd.assert() 38 | .failure() 39 | .stderr(predicate::str::contains("Usage: qv ")); 40 | Ok(()) 41 | } 42 | 43 | #[tokio::test] 44 | async fn run_with_s3_parquet_file() -> datafusion::common::Result<()> { 45 | configure_minio(); 46 | 47 | let mut cmd = get_qv_cmd()?; 48 | let cmd = cmd 49 | .arg("s3://data/iceberg/db/COVID-19_NYT/data/00000-2-2d39563f-6901-4e2d-9903-84a8eab8ac3d-00001.parquet") 50 | .arg("-q") 51 | .arg("select * from tbl order by date, county, state, fips, cases, deaths"); 52 | 53 | let header_predicate = 54 | build_row_regex_predicate(vec!["date", "county", "state", "fips", "case", "deaths"]); 55 | 56 | let data_predicate = build_row_regex_predicate(vec![ 57 | "2020-01-21", 58 | "Snohomish", 59 | "Washington", 60 | "53061", 61 | "1", 62 | "0", 63 | ]); 64 | 65 | cmd.assert() 66 | .success() 67 | .stdout(header_predicate) 68 | .stdout(data_predicate); 69 | Ok(()) 70 | } 71 | 72 | #[tokio::test] 73 | async fn run_with_s3_console_parquet_file() -> datafusion::common::Result<()> { 74 | configure_minio(); 75 | 76 | let mut cmd = get_qv_cmd()?; 77 | let cmd = cmd 78 | .arg("https://s3.console.aws.amazon.com/s3/buckets/data?region=eu-central-1&prefix=iceberg/db/COVID-19_NYT/data/00000-2-2d39563f-6901-4e2d-9903-84a8eab8ac3d-00001.parquet&showversions=false") 79 | .arg("-q") 80 | .arg("select * from tbl order by date, county, state, fips, cases, deaths"); 81 | 82 | let header_predicate = 83 | build_row_regex_predicate(vec!["date", "county", "state", "fips", "case", "deaths"]); 84 | 85 | let data_predicate = build_row_regex_predicate(vec![ 86 | "2020-01-21", 87 | "Snohomish", 88 | "Washington", 89 | "53061", 90 | "1", 91 | "0", 92 | ]); 93 | 94 | cmd.assert() 95 | .success() 96 | .stdout(header_predicate) 97 | .stdout(data_predicate); 98 | Ok(()) 99 | } 100 | 101 | #[tokio::test] 102 | async fn run_with_s3_parquet_files_in_folder_trailing_slash() -> datafusion::common::Result<()> { 103 | configure_minio(); 104 | 105 | let mut cmd = get_qv_cmd()?; 106 | let cmd = cmd 107 | .arg("s3://data/iceberg/db/COVID-19_NYT/data/") 108 | .arg("-q") 109 | .arg("select * from tbl order by date, county, state, fips, cases, deaths"); 110 | 111 | let header_predicate = 112 | build_row_regex_predicate(vec!["date", "county", "state", "fips", "case", "deaths"]); 113 | 114 | let data_predicate = build_row_regex_predicate(vec![ 115 | "2020-01-21", 116 | "Snohomish", 117 | "Washington", 118 | "53061", 119 | "1", 120 | "0", 121 | ]); 122 | 123 | cmd.assert() 124 | .success() 125 | .stdout(header_predicate) 126 | .stdout(data_predicate); 127 | Ok(()) 128 | } 129 | 130 | #[tokio::test] 131 | async fn run_with_s3_parquet_files_in_folder_no_trailing_slash() -> datafusion::common::Result<()> { 132 | configure_minio(); 133 | 134 | let mut cmd = get_qv_cmd()?; 135 | let cmd = cmd 136 | .arg("s3://data/iceberg/db/COVID-19_NYT/data") 137 | .arg("-q") 138 | .arg("select * from tbl order by date, county, state, fips, cases, deaths"); 139 | 140 | let header_predicate = 141 | build_row_regex_predicate(vec!["date", "county", "state", "fips", "case", "deaths"]); 142 | 143 | let data_predicate = build_row_regex_predicate(vec![ 144 | "2020-01-21", 145 | "Snohomish", 146 | "Washington", 147 | "53061", 148 | "1", 149 | "0", 150 | ]); 151 | 152 | cmd.assert() 153 | .success() 154 | .stdout(header_predicate) 155 | .stdout(data_predicate); 156 | Ok(()) 157 | } 158 | 159 | #[tokio::test] 160 | async fn run_with_s3_deltalake() -> datafusion::common::Result<()> { 161 | configure_minio(); 162 | 163 | let mut cmd = get_qv_cmd()?; 164 | let cmd = cmd 165 | .arg("s3://data/delta/COVID-19_NYT") 166 | .arg("--at") 167 | .arg("2022-01-13T16:39:00+01:00") 168 | .arg("-q") 169 | .arg("select * from tbl order by date, county, state, fips, cases, deaths"); 170 | 171 | let header_predicate = 172 | build_row_regex_predicate(vec!["date", "county", "state", "fips", "case", "deaths"]); 173 | 174 | let data_predicate = build_row_regex_predicate(vec![ 175 | "2020-01-21", 176 | "Snohomish", 177 | "Washington", 178 | "53061", 179 | "1", 180 | "0X", 181 | ]); 182 | 183 | cmd.assert() 184 | .success() 185 | .stdout(header_predicate) 186 | .stdout(data_predicate); 187 | Ok(()) 188 | } 189 | --------------------------------------------------------------------------------