├── .asf.yaml ├── .cargo └── audit.toml ├── .devcontainer └── devcontainer.json ├── .gitattributes ├── .github ├── ISSUE_TEMPLATE │ ├── config.yml │ ├── iceberg_bug_report.yml │ ├── iceberg_epic.yml │ └── iceberg_feature.yml ├── PULL_REQUEST_TEMPLATE.md ├── actions │ ├── overwrite-package-version │ │ └── action.yml │ └── setup-builder │ │ └── action.yml ├── dependabot.yml └── workflows │ ├── audit.yml │ ├── bindings_python_ci.yml │ ├── ci.yml │ ├── ci_typos.yml │ ├── publish.yml │ ├── release_python.yml │ ├── release_python_nightly.yml │ └── website.yml ├── .gitignore ├── .idea ├── .gitignore └── vcs.xml ├── .licenserc.yaml ├── .taplo.toml ├── .typos.toml ├── CHANGELOG.md ├── CONTRIBUTING.md ├── Cargo.lock ├── Cargo.toml ├── LICENSE ├── Makefile ├── NOTICE ├── README.md ├── bindings └── python │ ├── Cargo.lock │ ├── Cargo.toml │ ├── DEPENDENCIES.rust.tsv │ ├── README.md │ ├── project-description.md │ ├── pyproject.toml │ ├── python │ └── pyiceberg_core │ │ └── __init__.py │ ├── src │ ├── datafusion_table_provider.rs │ ├── error.rs │ ├── lib.rs │ ├── runtime.rs │ └── transform.rs │ └── tests │ ├── test_datafusion_table_provider.py │ └── test_transform.py ├── crates ├── catalog │ ├── glue │ │ ├── Cargo.toml │ │ ├── DEPENDENCIES.rust.tsv │ │ ├── README.md │ │ ├── src │ │ │ ├── catalog.rs │ │ │ ├── error.rs │ │ │ ├── lib.rs │ │ │ ├── schema.rs │ │ │ └── utils.rs │ │ ├── testdata │ │ │ └── glue_catalog │ │ │ │ └── docker-compose.yaml │ │ └── tests │ │ │ └── glue_catalog_test.rs │ ├── hms │ │ ├── Cargo.toml │ │ ├── DEPENDENCIES.rust.tsv │ │ ├── README.md │ │ ├── src │ │ │ ├── catalog.rs │ │ │ ├── error.rs │ │ │ ├── lib.rs │ │ │ ├── schema.rs │ │ │ └── utils.rs │ │ ├── testdata │ │ │ └── hms_catalog │ │ │ │ ├── Dockerfile │ │ │ │ ├── core-site.xml │ │ │ │ └── docker-compose.yaml │ │ └── tests │ │ │ └── hms_catalog_test.rs │ ├── memory │ │ ├── Cargo.toml │ │ ├── DEPENDENCIES.rust.tsv │ │ ├── README.md │ │ └── src │ │ │ ├── catalog.rs │ │ │ ├── lib.rs │ │ │ └── namespace_state.rs │ ├── rest │ │ ├── Cargo.toml │ │ ├── DEPENDENCIES.rust.tsv │ │ ├── README.md │ │ ├── src │ │ │ ├── catalog.rs │ │ │ ├── client.rs │ │ │ ├── lib.rs │ │ │ └── types.rs │ │ ├── testdata │ │ │ ├── create_table_response.json │ │ │ ├── load_table_response.json │ │ │ ├── rest_catalog │ │ │ │ └── docker-compose.yaml │ │ │ └── update_table_response.json │ │ └── tests │ │ │ └── rest_catalog_test.rs │ ├── s3tables │ │ ├── Cargo.toml │ │ ├── DEPENDENCIES.rust.tsv │ │ └── src │ │ │ ├── catalog.rs │ │ │ ├── lib.rs │ │ │ └── utils.rs │ └── sql │ │ ├── Cargo.toml │ │ ├── DEPENDENCIES.rust.tsv │ │ └── src │ │ ├── catalog.rs │ │ ├── error.rs │ │ └── lib.rs ├── examples │ ├── Cargo.toml │ ├── DEPENDENCIES.rust.tsv │ ├── README.md │ └── src │ │ ├── oss_backend.rs │ │ ├── rest_catalog_namespace.rs │ │ └── rest_catalog_table.rs ├── iceberg │ ├── Cargo.toml │ ├── DEPENDENCIES.rust.tsv │ ├── README.md │ ├── src │ │ ├── arrow │ │ │ ├── delete_file_manager.rs │ │ │ ├── mod.rs │ │ │ ├── nan_val_cnt_visitor.rs │ │ │ ├── reader.rs │ │ │ ├── record_batch_projector.rs │ │ │ ├── record_batch_transformer.rs │ │ │ ├── schema.rs │ │ │ └── value.rs │ │ ├── avro │ │ │ ├── mod.rs │ │ │ └── schema.rs │ │ ├── cache.rs │ │ ├── catalog │ │ │ └── mod.rs │ │ ├── delete_file_index.rs │ │ ├── delete_vector.rs │ │ ├── error.rs │ │ ├── expr │ │ │ ├── accessor.rs │ │ │ ├── mod.rs │ │ │ ├── predicate.rs │ │ │ ├── term.rs │ │ │ └── visitors │ │ │ │ ├── bound_predicate_visitor.rs │ │ │ │ ├── expression_evaluator.rs │ │ │ │ ├── inclusive_metrics_evaluator.rs │ │ │ │ ├── inclusive_projection.rs │ │ │ │ ├── manifest_evaluator.rs │ │ │ │ ├── mod.rs │ │ │ │ ├── page_index_evaluator.rs │ │ │ │ ├── row_group_metrics_evaluator.rs │ │ │ │ ├── strict_metrics_evaluator.rs │ │ │ │ └── strict_projection.rs │ │ ├── inspect │ │ │ ├── manifests.rs │ │ │ ├── metadata_table.rs │ │ │ ├── mod.rs │ │ │ └── snapshots.rs │ │ ├── io │ │ │ ├── file_io.rs │ │ │ ├── mod.rs │ │ │ ├── object_cache.rs │ │ │ ├── storage.rs │ │ │ ├── storage_azdls.rs │ │ │ ├── storage_fs.rs │ │ │ ├── storage_gcs.rs │ │ │ ├── storage_memory.rs │ │ │ ├── storage_oss.rs │ │ │ └── storage_s3.rs │ │ ├── lib.rs │ │ ├── puffin │ │ │ ├── blob.rs │ │ │ ├── compression.rs │ │ │ ├── metadata.rs │ │ │ ├── mod.rs │ │ │ ├── reader.rs │ │ │ ├── test_utils.rs │ │ │ └── writer.rs │ │ ├── runtime │ │ │ └── mod.rs │ │ ├── scan │ │ │ ├── cache.rs │ │ │ ├── context.rs │ │ │ ├── mod.rs │ │ │ └── task.rs │ │ ├── spec │ │ │ ├── datatypes.rs │ │ │ ├── encrypted_key.rs │ │ │ ├── manifest │ │ │ │ ├── _serde.rs │ │ │ │ ├── data_file.rs │ │ │ │ ├── entry.rs │ │ │ │ ├── metadata.rs │ │ │ │ ├── mod.rs │ │ │ │ └── writer.rs │ │ │ ├── manifest_list.rs │ │ │ ├── mod.rs │ │ │ ├── name_mapping │ │ │ │ └── mod.rs │ │ │ ├── partition.rs │ │ │ ├── schema │ │ │ │ ├── _serde.rs │ │ │ │ ├── id_reassigner.rs │ │ │ │ ├── index.rs │ │ │ │ ├── mod.rs │ │ │ │ ├── prune_columns.rs │ │ │ │ ├── utils.rs │ │ │ │ └── visitor.rs │ │ │ ├── snapshot.rs │ │ │ ├── snapshot_summary.rs │ │ │ ├── sort.rs │ │ │ ├── statistic_file.rs │ │ │ ├── table_metadata.rs │ │ │ ├── table_metadata_builder.rs │ │ │ ├── transform.rs │ │ │ ├── values.rs │ │ │ ├── view_metadata.rs │ │ │ ├── view_metadata_builder.rs │ │ │ └── view_version.rs │ │ ├── table.rs │ │ ├── transaction │ │ │ ├── append.rs │ │ │ ├── mod.rs │ │ │ ├── snapshot.rs │ │ │ └── sort_order.rs │ │ ├── transform │ │ │ ├── bucket.rs │ │ │ ├── identity.rs │ │ │ ├── mod.rs │ │ │ ├── temporal.rs │ │ │ ├── truncate.rs │ │ │ └── void.rs │ │ ├── utils.rs │ │ └── writer │ │ │ ├── base_writer │ │ │ ├── data_file_writer.rs │ │ │ ├── equality_delete_writer.rs │ │ │ └── mod.rs │ │ │ ├── file_writer │ │ │ ├── location_generator.rs │ │ │ ├── mod.rs │ │ │ ├── parquet_writer.rs │ │ │ └── track_writer.rs │ │ │ └── mod.rs │ ├── testdata │ │ ├── avro_schema_manifest_entry.json │ │ ├── avro_schema_manifest_file_v1.json │ │ ├── avro_schema_manifest_file_v2.json │ │ ├── example_empty_table_metadata_v2.json │ │ ├── example_table_metadata_v2.json │ │ ├── file_io_gcs │ │ │ └── docker-compose.yaml │ │ ├── file_io_s3 │ │ │ └── docker-compose.yaml │ │ ├── manifests_lists │ │ │ ├── manifest-list-v2-1.avro │ │ │ └── manifest-list-v2-2.avro │ │ ├── puffin │ │ │ └── java-generated │ │ │ │ ├── empty-puffin-uncompressed.bin │ │ │ │ ├── sample-metric-data-compressed-zstd.bin │ │ │ │ └── sample-metric-data-uncompressed.bin │ │ ├── table_metadata │ │ │ ├── TableMetadataUnsupportedVersion.json │ │ │ ├── TableMetadataV1Compat.json │ │ │ ├── TableMetadataV1NoValidSchema.json │ │ │ ├── TableMetadataV1PartitionSpecsWithoutDefaultId.json │ │ │ ├── TableMetadataV1SchemasWithoutCurrentId.json │ │ │ ├── TableMetadataV1Valid.json │ │ │ ├── TableMetadataV2CurrentSchemaNotFound.json │ │ │ ├── TableMetadataV2MissingLastPartitionId.json │ │ │ ├── TableMetadataV2MissingPartitionSpecs.json │ │ │ ├── TableMetadataV2MissingSchemas.json │ │ │ ├── TableMetadataV2MissingSortOrder.json │ │ │ ├── TableMetadataV2Valid.json │ │ │ └── TableMetadataV2ValidMinimal.json │ │ └── view_metadata │ │ │ ├── ViewMetadataUnsupportedVersion.json │ │ │ ├── ViewMetadataV1CurrentVersionNotFound.json │ │ │ ├── ViewMetadataV1MissingCurrentVersion.json │ │ │ ├── ViewMetadataV1MissingSchema.json │ │ │ ├── ViewMetadataV1SchemaNotFound.json │ │ │ └── ViewMetadataV1Valid.json │ └── tests │ │ ├── file_io_gcs_test.rs │ │ └── file_io_s3_test.rs ├── integration_tests │ ├── Cargo.toml │ ├── DEPENDENCIES.rust.tsv │ ├── src │ │ └── lib.rs │ ├── testdata │ │ ├── docker-compose.yaml │ │ └── spark │ │ │ ├── Dockerfile │ │ │ ├── entrypoint.sh │ │ │ ├── provision.py │ │ │ └── spark-defaults.conf │ └── tests │ │ ├── shared.rs │ │ └── shared_tests │ │ ├── append_data_file_test.rs │ │ ├── append_partition_data_file_test.rs │ │ ├── conflict_commit_test.rs │ │ ├── datafusion.rs │ │ ├── mod.rs │ │ ├── read_evolved_schema.rs │ │ ├── read_positional_deletes.rs │ │ └── scan_all_type.rs ├── integrations │ ├── cache-moka │ │ ├── Cargo.toml │ │ ├── DEPENDENCIES.rust.tsv │ │ ├── README.md │ │ └── src │ │ │ └── lib.rs │ ├── cli │ │ ├── Cargo.toml │ │ ├── DEPENDENCIES.rust.tsv │ │ ├── README.md │ │ └── src │ │ │ ├── catalog.rs │ │ │ ├── lib.rs │ │ │ └── main.rs │ └── datafusion │ │ ├── Cargo.toml │ │ ├── DEPENDENCIES.rust.tsv │ │ ├── README.md │ │ ├── src │ │ ├── catalog.rs │ │ ├── error.rs │ │ ├── lib.rs │ │ ├── physical_plan │ │ │ ├── expr_to_predicate.rs │ │ │ ├── mod.rs │ │ │ └── scan.rs │ │ ├── schema.rs │ │ └── table │ │ │ ├── mod.rs │ │ │ └── table_provider_factory.rs │ │ ├── testdata │ │ └── table_metadata │ │ │ └── TableMetadataV2.json │ │ └── tests │ │ ├── integration_datafusion_test.rs │ │ └── test_data │ │ └── TableMetadataV2Valid.json ├── sqllogictest │ ├── Cargo.toml │ ├── DEPENDENCIES.rust.tsv │ ├── README.md │ └── src │ │ ├── engine │ │ ├── datafusion.rs │ │ └── mod.rs │ │ ├── error.rs │ │ └── lib.rs └── test_utils │ ├── Cargo.toml │ ├── DEPENDENCIES.rust.tsv │ └── src │ ├── cmd.rs │ ├── docker.rs │ └── lib.rs ├── deny.toml ├── docs └── contributing │ ├── orbstack.md │ └── podman.md ├── rust-toolchain.toml ├── rustfmt.toml └── scripts ├── dependencies.py ├── release.sh └── verify.py /.asf.yaml: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, 13 | # software distributed under the License is distributed on an 14 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | # KIND, either express or implied. See the License for the 16 | # specific language governing permissions and limitations 17 | # under the License. 18 | # 19 | 20 | # The format of this file is documented at 21 | # https://cwiki.apache.org/confluence/display/INFRA/Git+-+.asf.yaml+features 22 | 23 | github: 24 | description: "Apache Iceberg" 25 | homepage: https://rust.iceberg.apache.org/ 26 | labels: 27 | - iceberg 28 | - apache 29 | - hacktoberfest 30 | - rust 31 | enabled_merge_buttons: 32 | squash: true 33 | merge: false 34 | rebase: false 35 | protected_branches: 36 | main: 37 | required_status_checks: 38 | # strict means "Require branches to be up to date before merging". 39 | strict: true 40 | 41 | required_pull_request_reviews: 42 | required_approving_review_count: 1 43 | dismiss_stale_reviews: true 44 | 45 | required_linear_history: true 46 | del_branch_on_merge: true 47 | features: 48 | wiki: false 49 | issues: true 50 | projects: true 51 | discussions: true 52 | collaborators: 53 | - JanKaul 54 | - c-thiel 55 | ghp_branch: gh-pages 56 | ghp_path: / 57 | 58 | notifications: 59 | commits: commits@iceberg.apache.org 60 | issues: issues@iceberg.apache.org 61 | pullrequests: issues@iceberg.apache.org 62 | jira_options: link label link label 63 | discussions: issues@iceberg.apache.org 64 | -------------------------------------------------------------------------------- /.cargo/audit.toml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | [advisories] 19 | ignore = [ 20 | # rsa 21 | # Marvin Attack: potential key recovery through timing sidechannels 22 | # Issues: https://github.com/apache/iceberg-rust/issues/221 23 | "RUSTSEC-2023-0071", 24 | # `derivative` is unmaintained; consider using an alternative 25 | # 26 | # Introduced by hive_metastore, tracked at https://github.com/cloudwego/pilota/issues/293 27 | "RUSTSEC-2024-0388", 28 | ] 29 | -------------------------------------------------------------------------------- /.devcontainer/devcontainer.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Iceberg Rust", 3 | "image": "mcr.microsoft.com/devcontainers/rust:bullseye", 4 | "customizations": { 5 | "vscode": { 6 | "extensions": [ 7 | "rust-lang.rust-analyzer" 8 | ], 9 | "settings": { 10 | "editor.formatOnSave": true, 11 | "files.exclude": { 12 | "**/LICENSE": true 13 | } 14 | } 15 | } 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | website export-ignore 2 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, 13 | # software distributed under the License is distributed on an 14 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | # KIND, either express or implied. See the License for the 16 | # specific language governing permissions and limitations 17 | # under the License. 18 | # 19 | 20 | blank_issues_enabled: true 21 | contact_links: 22 | - name: Ask questions about iceberg-rust 23 | url: https://github.com/apache/iceberg-rust/discussions 24 | about: Please ask and answer questions here. -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/iceberg_bug_report.yml: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, 13 | # software distributed under the License is distributed on an 14 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | # KIND, either express or implied. See the License for the 16 | # specific language governing permissions and limitations 17 | # under the License. 18 | # 19 | 20 | name: Iceberg Rust Bug Report 🐞 21 | description: Problems, bugs and issues with Apache Iceberg Rust 22 | labels: bug 23 | 24 | body: 25 | - type: dropdown 26 | attributes: 27 | label: Apache Iceberg Rust version 28 | description: What Apache Iceberg Rust version are you using? 29 | multiple: false 30 | options: 31 | - 0.4.0 (latest version) 32 | - 0.3.0 33 | - 0.2.0 34 | validations: 35 | required: false 36 | - type: textarea 37 | attributes: 38 | label: Describe the bug 39 | description: > 40 | Describe the problem, what to expect, and how to reproduce. 41 | You can include files by dragging and dropping them here. 42 | validations: 43 | required: true 44 | - type: textarea 45 | attributes: 46 | label: To Reproduce 47 | placeholder: > 48 | Steps to reproduce the behavior: 49 | - type: textarea 50 | attributes: 51 | label: Expected behavior 52 | placeholder: > 53 | A clear and concise description of what you expected to happen. 54 | - type: dropdown 55 | attributes: 56 | label: Willingness to contribute 57 | description: > 58 | The Apache Iceberg community encourages bug fix contributions. Would you or another member of your organization be willing to contribute a fix for this bug to the Apache Iceberg codebase? 59 | options: 60 | - I can contribute a fix for this bug independently 61 | - I would be willing to contribute a fix for this bug with guidance from the Iceberg community 62 | - I cannot contribute a fix for this bug at this time 63 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/iceberg_epic.yml: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, 13 | # software distributed under the License is distributed on an 14 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | # KIND, either express or implied. See the License for the 16 | # specific language governing permissions and limitations 17 | # under the License. 18 | # 19 | 20 | name: Iceberg Rust Epic Feature 21 | description: Create an epic issue to act as a parent issue with some sub issues 22 | labels: epic 23 | body: 24 | - type: textarea 25 | attributes: 26 | label: What's the feature are you trying to implement? 27 | description: Please describe what you are trying to do. 28 | placeholder: > 29 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 30 | (This section helps Iceberg developers understand the context and *why* for this feature, in addition to the *what*) 31 | - type: dropdown 32 | attributes: 33 | label: Willingness to contribute 34 | description: > 35 | The Apache Iceberg community encourages feature contributions. Would you or another member of your organization be willing to contribute to this feature for the Apache Iceberg Rust codebase? 36 | options: 37 | - I can contribute to this feature independently 38 | - I would be willing to contribute to this feature with guidance from the Iceberg Rust community 39 | - I cannot contribute to this feature at this time 40 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/iceberg_feature.yml: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, 13 | # software distributed under the License is distributed on an 14 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | # KIND, either express or implied. See the License for the 16 | # specific language governing permissions and limitations 17 | # under the License. 18 | # 19 | 20 | name: Iceberg Rust Feature Request 21 | description: Suggest an idea for Iceberg Rust 22 | labels: enhancement 23 | body: 24 | - type: textarea 25 | attributes: 26 | label: Is your feature request related to a problem or challenge? 27 | description: Please describe what you are trying to do. 28 | placeholder: > 29 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 30 | (This section helps Iceberg developers understand the context and *why* for this feature, in addition to the *what*) 31 | - type: textarea 32 | attributes: 33 | label: Describe the solution you'd like 34 | placeholder: > 35 | A clear and concise description of what you want to happen. 36 | - type: dropdown 37 | attributes: 38 | label: Willingness to contribute 39 | description: > 40 | The Apache Iceberg community encourages feature contributions. Would you or another member of your organization be willing to contribute to this feature for the Apache Iceberg Rust codebase? 41 | options: 42 | - I can contribute to this feature independently 43 | - I would be willing to contribute to this feature with guidance from the Iceberg Rust community 44 | - I cannot contribute to this feature at this time 45 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | ## Which issue does this PR close? 2 | 3 | 6 | 7 | - Closes #. 8 | 9 | ## What changes are included in this PR? 10 | 11 | 14 | 15 | ## Are these changes tested? 16 | 17 | -------------------------------------------------------------------------------- /.github/actions/overwrite-package-version/action.yml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | name: 'Update Package Version' 19 | description: 'Updates pyproject.toml version with a provided timestamp' 20 | inputs: 21 | timestamp: 22 | description: 'Timestamp to override to the package version' 23 | required: true 24 | runs: 25 | using: "composite" 26 | steps: 27 | - name: Setup Python 28 | uses: actions/setup-python@v5 29 | with: 30 | python-version: '3.12' 31 | 32 | - name: Install toml 33 | run: pip install toml 34 | shell: bash 35 | 36 | - name: Get and update version 37 | shell: bash 38 | env: 39 | TIMESTAMP: ${{ inputs.timestamp }} 40 | run: | 41 | CURRENT_VERSION=$(python -c "import toml; print(toml.load('bindings/python/pyproject.toml')['project']['version'])") 42 | NEW_VERSION="${CURRENT_VERSION}.dev${TIMESTAMP}" 43 | NEW_VERSION=$NEW_VERSION python -c " 44 | import toml 45 | import os 46 | config = toml.load('bindings/python/pyproject.toml') 47 | config['project']['version'] = os.environ['NEW_VERSION'] 48 | with open('bindings/python/pyproject.toml', 'w') as f: 49 | toml.dump(config, f) 50 | print(f'Updated version to: {config[\"project\"][\"version\"]}') 51 | " -------------------------------------------------------------------------------- /.github/actions/setup-builder/action.yml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | # This file is heavily inspired by 19 | # [datafusion](https://github.com/apache/datafusion/blob/main/.github/actions/setup-builder/action.yaml). 20 | name: Prepare Rust Builder 21 | description: 'Prepare Rust Build Environment' 22 | inputs: 23 | rust-version: 24 | description: 'version of rust to install and use' 25 | runs: 26 | using: "composite" 27 | steps: 28 | - name: Setup specified Rust toolchain 29 | shell: bash 30 | if: ${{ inputs.rust-version != '' }} 31 | env: 32 | RUST_VERSION: ${{ inputs.rust-version }} 33 | run: | 34 | echo "Installing ${RUST_VERSION}" 35 | rustup toolchain install ${RUST_VERSION} 36 | rustup override set ${RUST_VERSION} 37 | rustup component add rustfmt clippy 38 | - name: Setup Rust toolchain according to rust-toolchain.toml 39 | shell: bash 40 | if: ${{ inputs.rust-version == '' }} 41 | run: | 42 | echo "Installing toolchain according to rust-toolchain.toml" 43 | rustup show 44 | rustup component add rustfmt clippy 45 | - name: Fixup git permissions 46 | # https://github.com/actions/checkout/issues/766 47 | shell: bash 48 | run: git config --global --add safe.directory "$GITHUB_WORKSPACE" -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | version: 2 19 | updates: 20 | # Maintain dependencies for GitHub Actions 21 | - package-ecosystem: "github-actions" 22 | directory: "/" 23 | schedule: 24 | interval: "weekly" 25 | day: "sunday" 26 | 27 | # Maintain dependencies for iceberg 28 | - package-ecosystem: "cargo" 29 | directory: "/" 30 | schedule: 31 | interval: "weekly" 32 | day: "sunday" 33 | open-pull-requests-limit: 50 34 | versioning-strategy: lockfile-only 35 | ignore: 36 | # For all packages, ignore all patch updates 37 | - dependency-name: "*" 38 | update-types: ["version-update:semver-patch"] 39 | groups: 40 | arrow-parquet: 41 | applies-to: version-updates 42 | patterns: 43 | - "arrow*" 44 | - "parquet" 45 | -------------------------------------------------------------------------------- /.github/workflows/audit.yml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | name: Security audit 19 | 20 | concurrency: 21 | group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }} 22 | cancel-in-progress: true 23 | 24 | on: 25 | push: 26 | paths: 27 | - "**/Cargo.toml" 28 | - "**/Cargo.lock" 29 | 30 | pull_request: 31 | paths: 32 | - "**/Cargo.toml" 33 | - "**/Cargo.lock" 34 | - ".github/workflows/audit.yml" 35 | 36 | schedule: 37 | - cron: '0 0 * * *' 38 | 39 | jobs: 40 | security_audit: 41 | runs-on: ubuntu-latest 42 | steps: 43 | - uses: actions/checkout@v4 44 | - name: Setup Rust toolchain 45 | uses: ./.github/actions/setup-builder 46 | with: 47 | rust-version: stable 48 | - uses: rustsec/audit-check@v2.0.0 49 | with: 50 | token: ${{ secrets.GITHUB_TOKEN }} 51 | ignore: RUSTSEC-2024-0436 52 | -------------------------------------------------------------------------------- /.github/workflows/bindings_python_ci.yml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | name: Bindings Python CI 19 | 20 | on: 21 | push: 22 | branches: 23 | - main 24 | pull_request: 25 | branches: 26 | - main 27 | 28 | concurrency: 29 | group: ${{ github.workflow }}-${{ github.ref }}-${{ github.event_name }} 30 | cancel-in-progress: true 31 | 32 | jobs: 33 | check-rust: 34 | runs-on: ubuntu-latest 35 | steps: 36 | - uses: actions/checkout@v4 37 | - name: Check format 38 | working-directory: "bindings/python" 39 | run: cargo fmt --all -- --check 40 | - name: Check clippy 41 | working-directory: "bindings/python" 42 | run: cargo clippy --all-targets --all-features -- -D warnings 43 | 44 | check-python: 45 | runs-on: ubuntu-latest 46 | steps: 47 | - uses: actions/checkout@v4 48 | - name: Install tools 49 | run: | 50 | pip install ruff 51 | - name: Check format 52 | working-directory: "bindings/python" 53 | run: | 54 | ruff format . --diff 55 | - name: Check style 56 | working-directory: "bindings/python" 57 | run: | 58 | ruff check . 59 | 60 | test: 61 | runs-on: ${{ matrix.os }} 62 | strategy: 63 | matrix: 64 | os: 65 | - ubuntu-latest 66 | - macos-latest 67 | - windows-latest 68 | steps: 69 | - uses: actions/checkout@v4 70 | - uses: actions/setup-python@v5 71 | with: 72 | python-version: 3.9 73 | - uses: PyO3/maturin-action@v1 74 | with: 75 | working-directory: "bindings/python" 76 | command: build 77 | args: --out dist --sdist 78 | - name: Run tests 79 | working-directory: "bindings/python" 80 | shell: bash 81 | run: | 82 | set -e 83 | pip install hatch==1.12.0 84 | hatch run dev:pip install dist/pyiceberg_core-*.whl --force-reinstall 85 | hatch run dev:test 86 | -------------------------------------------------------------------------------- /.github/workflows/ci_typos.yml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | name: Typos Check 19 | 20 | on: 21 | push: 22 | branches: 23 | - main 24 | pull_request: 25 | branches: 26 | - main 27 | 28 | concurrency: 29 | group: ${{ github.workflow }}-${{ github.ref }}-${{ github.event_name }} 30 | cancel-in-progress: true 31 | 32 | env: 33 | RUST_BACKTRACE: 1 34 | 35 | jobs: 36 | typos-check: 37 | name: typos check 38 | runs-on: ubuntu-latest 39 | timeout-minutes: 10 40 | env: 41 | FORCE_COLOR: 1 42 | steps: 43 | - uses: actions/checkout@v4 44 | - name: Check typos 45 | uses: crate-ci/typos@v1.32.0 46 | -------------------------------------------------------------------------------- /.github/workflows/publish.yml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | name: Publish 19 | 20 | on: 21 | push: 22 | tags: 23 | - "*" 24 | workflow_dispatch: 25 | 26 | env: 27 | rust_msrv: "1.85" 28 | 29 | jobs: 30 | publish: 31 | runs-on: ubuntu-latest 32 | strategy: 33 | # Publish package one by one instead of flooding the registry 34 | max-parallel: 1 35 | matrix: 36 | # Order here is sensitive, as it will be used to determine the order of publishing 37 | package: 38 | - "crates/iceberg" 39 | - "crates/catalog/glue" 40 | - "crates/catalog/hms" 41 | - "crates/catalog/memory" 42 | - "crates/catalog/rest" 43 | # sql is not ready for release yet. 44 | # - "crates/catalog/sql" 45 | - "crates/integrations/datafusion" 46 | steps: 47 | - uses: actions/checkout@v4 48 | 49 | - name: Setup Rust toolchain 50 | uses: ./.github/actions/setup-builder 51 | with: 52 | rust-version: ${{ env.rust_msrv }} 53 | 54 | - name: Publish ${{ matrix.package }} 55 | working-directory: ${{ matrix.package }} 56 | # Only publish if it's a tag and the tag is not a pre-release 57 | if: ${{ startsWith(github.ref, 'refs/tags/') && !contains(github.ref, '-') }} 58 | run: cargo publish --all-features 59 | env: 60 | CARGO_REGISTRY_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }} 61 | -------------------------------------------------------------------------------- /.github/workflows/website.yml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | name: Website 19 | 20 | on: 21 | push: 22 | branches: 23 | - main 24 | pull_request: 25 | branches: 26 | - main 27 | 28 | concurrency: 29 | group: ${{ github.workflow }}-${{ github.ref }}-${{ github.event_name }} 30 | cancel-in-progress: true 31 | 32 | jobs: 33 | build: 34 | runs-on: ubuntu-latest 35 | permissions: 36 | contents: write 37 | steps: 38 | - uses: actions/checkout@v4 39 | 40 | - name: Setup mdBook 41 | uses: peaceiris/actions-mdbook@v2 42 | with: 43 | mdbook-version: '0.4.36' 44 | 45 | - name: Build 46 | working-directory: website 47 | run: mdbook build 48 | 49 | - name: Copy asf file 50 | run: cp .asf.yaml ./website/book/.asf.yaml 51 | 52 | - name: Build API docs 53 | run: | 54 | cargo doc --no-deps --workspace --all-features 55 | cp -r target/doc ./website/book/api 56 | 57 | - name: Deploy to gh-pages 58 | uses: peaceiris/actions-gh-pages@v4.0.0 59 | if: github.event_name == 'push' && github.ref_name == 'main' 60 | with: 61 | github_token: ${{ secrets.GITHUB_TOKEN }} 62 | publish_dir: website/book 63 | publish_branch: gh-pages 64 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | target 19 | .idea 20 | .vscode 21 | .zed 22 | **/.DS_Store 23 | dist/* 24 | **/venv 25 | *.so 26 | *.pyc 27 | *.whl 28 | *.tar.gz 29 | -------------------------------------------------------------------------------- /.idea/.gitignore: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | 19 | !.gitignore 20 | !vcs.xml 21 | -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 16 | 17 | 18 | 26 | 27 | 28 | 29 | 30 | 31 | -------------------------------------------------------------------------------- /.licenserc.yaml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | header: 19 | license: 20 | spdx-id: Apache-2.0 21 | copyright-owner: Apache Software Foundation 22 | 23 | paths-ignore: 24 | - 'LICENSE' 25 | - 'NOTICE' 26 | - '.gitattributes' 27 | - '**/*.json' 28 | # Generated content by mdbook 29 | - 'website/book' 30 | # Generated content by scripts 31 | - '**/DEPENDENCIES.*.tsv' 32 | # Release distributions 33 | - 'dist/*' 34 | - 'Cargo.lock' 35 | - '.github/PULL_REQUEST_TEMPLATE.md' 36 | comment: on-failure 37 | -------------------------------------------------------------------------------- /.taplo.toml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | include = ["Cargo.toml", "**/*.toml"] 19 | 20 | [formatting] 21 | # Align consecutive entries vertically. 22 | align_entries = false 23 | # Append trailing commas for multi-line arrays. 24 | array_trailing_comma = true 25 | # Expand arrays to multiple lines that exceed the maximum column width. 26 | array_auto_expand = true 27 | # Collapse arrays that don't exceed the maximum column width and don't contain comments. 28 | array_auto_collapse = true 29 | # Omit white space padding from single-line arrays 30 | compact_arrays = true 31 | # Omit white space padding from the start and end of inline tables. 32 | compact_inline_tables = false 33 | # Maximum column width in characters, affects array expansion and collapse, this doesn't take whitespace into account. 34 | # Note that this is not set in stone, and works on a best-effort basis. 35 | column_width = 80 36 | # Indent based on tables and arrays of tables and their subtables, subtables out of order are not indented. 37 | indent_tables = false 38 | # The substring that is used for indentation, should be tabs or spaces (but technically can be anything). 39 | indent_string = ' ' 40 | # Add trailing newline at the end of the file if not present. 41 | trailing_newline = true 42 | # Alphabetically reorder keys that are not separated by empty lines. 43 | reorder_keys = true 44 | # Maximum amount of allowed consecutive blank lines. This does not affect the whitespace at the end of the document, as it is always stripped. 45 | allowed_blank_lines = 1 46 | # Use CRLF for line endings. 47 | crlf = false 48 | -------------------------------------------------------------------------------- /.typos.toml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | [files] 19 | extend-exclude = ["**/testdata", "CHANGELOG.md"] 20 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | .EXPORT_ALL_VARIABLES: 19 | 20 | build: 21 | cargo build --all-targets --all-features --workspace 22 | 23 | check-fmt: 24 | cargo fmt --all -- --check 25 | 26 | check-clippy: 27 | cargo clippy --all-targets --all-features --workspace -- -D warnings 28 | 29 | install-cargo-machete: 30 | cargo install cargo-machete@0.7.0 31 | 32 | cargo-machete: install-cargo-machete 33 | cargo machete 34 | 35 | install-taplo-cli: 36 | cargo install taplo-cli@0.9.3 37 | 38 | fix-toml: install-taplo-cli 39 | taplo fmt 40 | 41 | check-toml: install-taplo-cli 42 | taplo check 43 | 44 | check: check-fmt check-clippy check-toml cargo-machete 45 | 46 | doc-test: 47 | cargo test --no-fail-fast --doc --all-features --workspace 48 | 49 | unit-test: doc-test 50 | cargo test --no-fail-fast --lib --all-features --workspace 51 | 52 | test: doc-test 53 | cargo test --no-fail-fast --all-targets --all-features --workspace 54 | 55 | clean: 56 | cargo clean 57 | -------------------------------------------------------------------------------- /NOTICE: -------------------------------------------------------------------------------- 1 | Apache Iceberg Rust 2 | Copyright 2023-2024 The Apache Software Foundation 3 | 4 | This product includes software developed at 5 | The Apache Software Foundation (http://www.apache.org/). 6 | -------------------------------------------------------------------------------- /bindings/python/Cargo.toml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | [package] 19 | edition = "2024" 20 | homepage = "https://rust.iceberg.apache.org" 21 | name = "pyiceberg_core_rust" 22 | rust-version = "1.85" 23 | version = "0.5.1" 24 | # This crate is used to build python bindings, we don't want to publish it 25 | publish = false 26 | 27 | keywords = ["iceberg"] 28 | license = "Apache-2.0" 29 | 30 | [lib] 31 | crate-type = ["cdylib"] 32 | 33 | [dependencies] 34 | arrow = { version = "55", features = ["pyarrow", "chrono-tz"] } 35 | iceberg = { path = "../../crates/iceberg" } 36 | pyo3 = { version = "0.24", features = ["extension-module", "abi3-py39"] } 37 | iceberg-datafusion = { path = "../../crates/integrations/datafusion" } 38 | datafusion-ffi = { version = "47" } 39 | tokio = { version = "1.44", default-features = false } 40 | -------------------------------------------------------------------------------- /bindings/python/README.md: -------------------------------------------------------------------------------- 1 | 19 | 20 | # Pyiceberg Core 21 | 22 | This project is used to build an iceberg-rust powered core for pyiceberg. 23 | 24 | ## Setup 25 | 26 | ```shell 27 | pip install hatch==1.12.0 28 | ``` 29 | 30 | ## Build 31 | 32 | ```shell 33 | hatch run dev:develop 34 | ``` 35 | 36 | ## Test 37 | 38 | ```shell 39 | hatch run dev:test 40 | ``` -------------------------------------------------------------------------------- /bindings/python/project-description.md: -------------------------------------------------------------------------------- 1 | 19 | 20 | # Pyiceberg Core 21 | 22 | This project is used to build an iceberg-rust powered core for pyiceberg, and intended for use only by pyiceberg. 23 | 24 | Install via PyPI: 25 | 26 | ``` 27 | pip install pyiceberg-core 28 | ``` 29 | -------------------------------------------------------------------------------- /bindings/python/pyproject.toml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | [build-system] 19 | build-backend = "maturin" 20 | requires = ["maturin>=1.0,<2.0"] 21 | 22 | [project] 23 | classifiers = [ 24 | "Development Status :: 4 - Beta", 25 | "Intended Audience :: Developers", 26 | "License :: OSI Approved :: Apache Software License", 27 | "Operating System :: OS Independent", 28 | "Programming Language :: Python :: 3.9", 29 | "Programming Language :: Python :: 3.10", 30 | "Programming Language :: Python :: 3.11", 31 | "Programming Language :: Python :: 3.12", 32 | ] 33 | name = "pyiceberg-core" 34 | readme = "project-description.md" 35 | requires-python = "~=3.9" 36 | dynamic = ["version"] 37 | 38 | [tool.maturin] 39 | features = ["pyo3/extension-module"] 40 | module-name = "pyiceberg_core.pyiceberg_core_rust" 41 | python-source = "python" 42 | 43 | [tool.ruff.lint] 44 | ignore = ["F403", "F405"] 45 | 46 | [tool.hatch.envs.dev] 47 | dependencies = ["maturin>=1.0,<2.0", "pytest>=8.3.2", "pyarrow>=17.0.0", "datafusion>=45", "pyiceberg[sql-sqlite]>=0.9.1"] 48 | 49 | [tool.hatch.envs.dev.scripts] 50 | build = "maturin build --out dist --sdist" 51 | develop = "maturin develop" 52 | test = "pytest" 53 | -------------------------------------------------------------------------------- /bindings/python/python/pyiceberg_core/__init__.py: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | from .pyiceberg_core_rust import * 19 | 20 | __doc__ = pyiceberg_core_rust.__doc__ 21 | __all__ = pyiceberg_core_rust.__all__ 22 | -------------------------------------------------------------------------------- /bindings/python/src/error.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | use pyo3::PyErr; 19 | use pyo3::exceptions::PyValueError; 20 | 21 | /// Convert an iceberg error to a python error 22 | pub fn to_py_err(err: iceberg::Error) -> PyErr { 23 | PyValueError::new_err(err.to_string()) 24 | } 25 | -------------------------------------------------------------------------------- /bindings/python/src/lib.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | use pyo3::prelude::*; 19 | 20 | mod datafusion_table_provider; 21 | mod error; 22 | mod runtime; 23 | mod transform; 24 | 25 | #[pymodule] 26 | fn pyiceberg_core_rust(py: Python<'_>, m: &Bound<'_, PyModule>) -> PyResult<()> { 27 | datafusion_table_provider::register_module(py, m)?; 28 | transform::register_module(py, m)?; 29 | Ok(()) 30 | } 31 | -------------------------------------------------------------------------------- /bindings/python/src/runtime.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | use std::sync::OnceLock; 19 | 20 | use tokio::runtime::{Handle, Runtime}; 21 | 22 | static RUNTIME: OnceLock = OnceLock::new(); 23 | 24 | pub fn runtime() -> Handle { 25 | match Handle::try_current() { 26 | Ok(h) => h.clone(), 27 | _ => { 28 | let rt = RUNTIME.get_or_init(|| Runtime::new().unwrap()); 29 | rt.handle().clone() 30 | } 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /crates/catalog/glue/Cargo.toml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | [package] 19 | edition = { workspace = true } 20 | homepage = { workspace = true } 21 | name = "iceberg-catalog-glue" 22 | rust-version = { workspace = true } 23 | version = { workspace = true } 24 | 25 | categories = ["database"] 26 | description = "Apache Iceberg Glue Catalog Support" 27 | keywords = ["iceberg", "glue", "catalog"] 28 | license = { workspace = true } 29 | repository = { workspace = true } 30 | 31 | [dependencies] 32 | anyhow = { workspace = true } 33 | async-trait = { workspace = true } 34 | aws-config = { workspace = true } 35 | aws-sdk-glue = { workspace = true } 36 | iceberg = { workspace = true } 37 | serde_json = { workspace = true } 38 | tokio = { workspace = true } 39 | tracing = { workspace = true } 40 | typed-builder = { workspace = true } 41 | uuid = { workspace = true } 42 | 43 | [dev-dependencies] 44 | ctor = { workspace = true } 45 | iceberg_test_utils = { path = "../../test_utils", features = ["tests"] } 46 | port_scanner = { workspace = true } 47 | -------------------------------------------------------------------------------- /crates/catalog/glue/README.md: -------------------------------------------------------------------------------- 1 | 19 | 20 | # Apache Iceberg Glue Catalog Official Native Rust Implementation 21 | 22 | [![crates.io](https://img.shields.io/crates/v/iceberg.svg)](https://crates.io/crates/iceberg-catalog-glue) 23 | [![docs.rs](https://img.shields.io/docsrs/iceberg.svg)](https://docs.rs/iceberg/latest/iceberg-catalog-glue/) 24 | 25 | This crate contains the official Native Rust implementation of Apache Iceberg Glue Catalog. 26 | 27 | See the [API documentation](https://docs.rs/iceberg-catalog-glue/latest) for examples and the full API. 28 | -------------------------------------------------------------------------------- /crates/catalog/glue/src/error.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | use std::fmt::Debug; 19 | 20 | use anyhow::anyhow; 21 | use iceberg::{Error, ErrorKind}; 22 | 23 | /// Format AWS SDK error into iceberg error 24 | pub(crate) fn from_aws_sdk_error(error: aws_sdk_glue::error::SdkError) -> Error 25 | where T: Debug { 26 | Error::new( 27 | ErrorKind::Unexpected, 28 | "Operation failed for hitting aws sdk error".to_string(), 29 | ) 30 | .with_source(anyhow!("aws sdk error: {:?}", error)) 31 | } 32 | 33 | /// Format AWS Build error into iceberg error 34 | pub(crate) fn from_aws_build_error(error: aws_sdk_glue::error::BuildError) -> Error { 35 | Error::new( 36 | ErrorKind::Unexpected, 37 | "Operation failed for hitting aws build error".to_string(), 38 | ) 39 | .with_source(anyhow!("aws build error: {:?}", error)) 40 | } 41 | -------------------------------------------------------------------------------- /crates/catalog/glue/src/lib.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | //! Iceberg Glue Catalog implementation. 19 | 20 | #![deny(missing_docs)] 21 | 22 | mod catalog; 23 | mod error; 24 | mod schema; 25 | mod utils; 26 | pub use catalog::*; 27 | pub use utils::{ 28 | AWS_ACCESS_KEY_ID, AWS_PROFILE_NAME, AWS_REGION_NAME, AWS_SECRET_ACCESS_KEY, AWS_SESSION_TOKEN, 29 | }; 30 | -------------------------------------------------------------------------------- /crates/catalog/glue/testdata/glue_catalog/docker-compose.yaml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | services: 19 | minio: 20 | image: minio/minio:RELEASE.2025-05-24T17-08-30Z 21 | expose: 22 | - 9000 23 | - 9001 24 | environment: 25 | - MINIO_ROOT_USER=admin 26 | - MINIO_ROOT_PASSWORD=password 27 | - MINIO_DOMAIN=minio 28 | command: [ "server", "/data", "--console-address", ":9001" ] 29 | 30 | mc: 31 | depends_on: 32 | - minio 33 | image: minio/mc:RELEASE.2025-05-21T01-59-54Z 34 | environment: 35 | - AWS_ACCESS_KEY_ID=admin 36 | - AWS_SECRET_ACCESS_KEY=password 37 | - AWS_REGION=us-east-1 38 | entrypoint: > 39 | /bin/sh -c " until (/usr/bin/mc alias set minio http://minio:9000 admin password) do echo '...waiting...' && sleep 1; done; /usr/bin/mc mb minio/warehouse; /usr/bin/mc policy set public minio/warehouse; tail -f /dev/null " 40 | 41 | moto: 42 | image: motoserver/moto:5.0.3 43 | expose: 44 | - 5000 45 | -------------------------------------------------------------------------------- /crates/catalog/hms/Cargo.toml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | [package] 19 | edition = { workspace = true } 20 | homepage = { workspace = true } 21 | name = "iceberg-catalog-hms" 22 | rust-version = { workspace = true } 23 | version = { workspace = true } 24 | 25 | categories = ["database"] 26 | description = "Apache Iceberg Hive Metastore Catalog Support" 27 | keywords = ["iceberg", "hive", "catalog"] 28 | license = { workspace = true } 29 | repository = { workspace = true } 30 | 31 | [dependencies] 32 | anyhow = { workspace = true } 33 | async-trait = { workspace = true } 34 | chrono = { workspace = true } 35 | hive_metastore = { workspace = true } 36 | iceberg = { workspace = true } 37 | pilota = { workspace = true } 38 | serde_json = { workspace = true } 39 | tokio = { workspace = true } 40 | tracing = { workspace = true } 41 | typed-builder = { workspace = true } 42 | uuid = { workspace = true } 43 | volo-thrift = { workspace = true } 44 | 45 | # Transitive dependencies below 46 | 47 | # Some dependencies don't correctly specify a minimal version for their dependencies and will fail to build in minimal versions build. 48 | # So we specify the version of these transitive dependencies here. 49 | # They can be removed when the direct dependencies are updated. 50 | 51 | # transitive dependencies of pilota/volo-thrift 52 | faststr = { workspace = true } 53 | linkedbytes = { workspace = true } 54 | metainfo = { workspace = true } 55 | motore-macros = { workspace = true } 56 | volo = { workspace = true } 57 | 58 | [dev-dependencies] 59 | ctor = { workspace = true } 60 | iceberg_test_utils = { path = "../../test_utils", features = ["tests"] } 61 | port_scanner = { workspace = true } 62 | 63 | [package.metadata.cargo-machete] 64 | # These dependencies are added to ensure minimal dependency version 65 | ignored = ["faststr", "linkedbytes", "metainfo", "volo", "motore-macros"] 66 | -------------------------------------------------------------------------------- /crates/catalog/hms/README.md: -------------------------------------------------------------------------------- 1 | 19 | 20 | # Apache Iceberg HiveMetaStore Catalog Official Native Rust Implementation 21 | 22 | [![crates.io](https://img.shields.io/crates/v/iceberg.svg)](https://crates.io/crates/iceberg-catalog-hms) 23 | [![docs.rs](https://img.shields.io/docsrs/iceberg.svg)](https://docs.rs/iceberg/latest/iceberg-catalog-hms/) 24 | 25 | This crate contains the official Native Rust implementation of Apache Iceberg HiveMetaStore Catalog. 26 | 27 | See the [API documentation](https://docs.rs/iceberg-catalog-hms/latest) for examples and the full API. 28 | -------------------------------------------------------------------------------- /crates/catalog/hms/src/error.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | use std::fmt::Debug; 19 | use std::io; 20 | 21 | use anyhow::anyhow; 22 | use iceberg::{Error, ErrorKind}; 23 | use volo_thrift::MaybeException; 24 | 25 | /// Format a thrift error into iceberg error. 26 | /// 27 | /// Please only throw this error when you are sure that the error is caused by thrift. 28 | pub fn from_thrift_error(error: impl std::error::Error) -> Error { 29 | Error::new( 30 | ErrorKind::Unexpected, 31 | "Operation failed for hitting thrift error".to_string(), 32 | ) 33 | .with_source(anyhow!("thrift error: {:?}", error)) 34 | } 35 | 36 | /// Format a thrift exception into iceberg error. 37 | pub fn from_thrift_exception(value: MaybeException) -> Result { 38 | match value { 39 | MaybeException::Ok(v) => Ok(v), 40 | MaybeException::Exception(err) => Err(Error::new( 41 | ErrorKind::Unexpected, 42 | "Operation failed for hitting thrift error".to_string(), 43 | ) 44 | .with_source(anyhow!("thrift error: {:?}", err))), 45 | } 46 | } 47 | 48 | /// Format an io error into iceberg error. 49 | pub fn from_io_error(error: io::Error) -> Error { 50 | Error::new( 51 | ErrorKind::Unexpected, 52 | "Operation failed for hitting io error".to_string(), 53 | ) 54 | .with_source(error) 55 | } 56 | -------------------------------------------------------------------------------- /crates/catalog/hms/src/lib.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | //! Iceberg Hive Metastore Catalog implementation. 19 | 20 | #![deny(missing_docs)] 21 | 22 | mod catalog; 23 | pub use catalog::*; 24 | 25 | mod error; 26 | mod schema; 27 | mod utils; 28 | -------------------------------------------------------------------------------- /crates/catalog/hms/testdata/hms_catalog/Dockerfile: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | FROM --platform=$BUILDPLATFORM openjdk:8-jre-slim AS build 17 | 18 | ARG BUILDPLATFORM 19 | 20 | RUN apt-get update -qq && apt-get -qq -y install curl 21 | 22 | ENV AWSSDK_VERSION=2.20.18 23 | ENV HADOOP_VERSION=3.1.0 24 | 25 | RUN curl https://repo1.maven.org/maven2/com/amazonaws/aws-java-sdk-bundle/1.11.271/aws-java-sdk-bundle-1.11.271.jar -Lo /tmp/aws-java-sdk-bundle-1.11.271.jar 26 | RUN curl https://repo1.maven.org/maven2/org/apache/hadoop/hadoop-aws/${HADOOP_VERSION}/hadoop-aws-${HADOOP_VERSION}.jar -Lo /tmp/hadoop-aws-${HADOOP_VERSION}.jar 27 | 28 | 29 | FROM apache/hive:3.1.3 30 | 31 | ENV AWSSDK_VERSION=2.20.18 32 | ENV HADOOP_VERSION=3.1.0 33 | 34 | COPY --from=build /tmp/hadoop-aws-${HADOOP_VERSION}.jar /opt/hive/lib/hadoop-aws-${HADOOP_VERSION}.jar 35 | COPY --from=build /tmp/aws-java-sdk-bundle-1.11.271.jar /opt/hive/lib/aws-java-sdk-bundle-1.11.271.jar 36 | COPY core-site.xml /opt/hadoop/etc/hadoop/core-site.xml -------------------------------------------------------------------------------- /crates/catalog/hms/testdata/hms_catalog/core-site.xml: -------------------------------------------------------------------------------- 1 | 17 | 18 | 19 | 20 | fs.defaultFS 21 | s3a://warehouse/hive 22 | 23 | 24 | fs.s3a.impl 25 | org.apache.hadoop.fs.s3a.S3AFileSystem 26 | 27 | 28 | fs.s3a.fast.upload 29 | true 30 | 31 | 32 | fs.s3a.endpoint 33 | http://minio:9000 34 | 35 | 36 | fs.s3a.access.key 37 | admin 38 | 39 | 40 | fs.s3a.secret.key 41 | password 42 | 43 | 44 | fs.s3a.connection.ssl.enabled 45 | false 46 | 47 | 48 | fs.s3a.path.style.access 49 | true 50 | 51 | -------------------------------------------------------------------------------- /crates/catalog/hms/testdata/hms_catalog/docker-compose.yaml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | services: 19 | minio: 20 | image: minio/minio:RELEASE.2025-05-24T17-08-30Z 21 | expose: 22 | - 9000 23 | - 9001 24 | environment: 25 | - MINIO_ROOT_USER=admin 26 | - MINIO_ROOT_PASSWORD=password 27 | - MINIO_DOMAIN=minio 28 | command: [ "server", "/data", "--console-address", ":9001" ] 29 | 30 | mc: 31 | depends_on: 32 | - minio 33 | image: minio/mc:RELEASE.2025-05-21T01-59-54Z 34 | environment: 35 | - AWS_ACCESS_KEY_ID=admin 36 | - AWS_SECRET_ACCESS_KEY=password 37 | - AWS_REGION=us-east-1 38 | entrypoint: > 39 | /bin/sh -c " until (/usr/bin/mc alias set minio http://minio:9000 admin password) do echo '...waiting...' && sleep 1; done; /usr/bin/mc mb minio/warehouse; /usr/bin/mc policy set public minio/warehouse; tail -f /dev/null " 40 | 41 | hive-metastore: 42 | image: iceberg-hive-metastore 43 | build: ./ 44 | platform: ${DOCKER_DEFAULT_PLATFORM} 45 | expose: 46 | - 9083 47 | environment: 48 | SERVICE_NAME: "metastore" 49 | SERVICE_OPTS: "-Dmetastore.warehouse.dir=s3a://warehouse/hive/" 50 | -------------------------------------------------------------------------------- /crates/catalog/memory/Cargo.toml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | [package] 19 | edition = { workspace = true } 20 | homepage = { workspace = true } 21 | name = "iceberg-catalog-memory" 22 | rust-version = { workspace = true } 23 | version = { workspace = true } 24 | 25 | categories = ["database"] 26 | description = "Apache Iceberg Rust Memory Catalog API" 27 | keywords = ["iceberg", "memory", "catalog"] 28 | license = { workspace = true } 29 | repository = { workspace = true } 30 | 31 | [dependencies] 32 | async-trait = { workspace = true } 33 | futures = { workspace = true } 34 | iceberg = { workspace = true } 35 | itertools = { workspace = true } 36 | serde_json = { workspace = true } 37 | uuid = { workspace = true, features = ["v4"] } 38 | 39 | [dev-dependencies] 40 | regex = { workspace = true } 41 | tempfile = { workspace = true } 42 | tokio = { workspace = true } 43 | -------------------------------------------------------------------------------- /crates/catalog/memory/README.md: -------------------------------------------------------------------------------- 1 | 19 | 20 | # Apache Iceberg Memory Catalog Official Native Rust Implementation 21 | 22 | [![crates.io](https://img.shields.io/crates/v/iceberg-catalog-memory.svg)](https://crates.io/crates/iceberg-catalog-memory) 23 | [![docs.rs](https://img.shields.io/docsrs/iceberg-catalog-memory.svg)](https://docs.rs/iceberg/latest/iceberg-catalog-memory/) 24 | 25 | This crate contains the official Native Rust implementation of Apache Iceberg Memory Catalog. 26 | 27 | See the [API documentation](https://docs.rs/iceberg-catalog-memory/latest) for examples and the full API. 28 | -------------------------------------------------------------------------------- /crates/catalog/memory/src/lib.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | //! Iceberg memory Catalog API implementation. 19 | 20 | #![deny(missing_docs)] 21 | 22 | mod catalog; 23 | mod namespace_state; 24 | 25 | pub use catalog::*; 26 | -------------------------------------------------------------------------------- /crates/catalog/rest/Cargo.toml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | [package] 19 | edition = { workspace = true } 20 | homepage = { workspace = true } 21 | name = "iceberg-catalog-rest" 22 | rust-version = { workspace = true } 23 | version = { workspace = true } 24 | 25 | categories = ["database"] 26 | description = "Apache Iceberg Rust REST API" 27 | keywords = ["iceberg", "rest", "catalog"] 28 | license = { workspace = true } 29 | repository = { workspace = true } 30 | 31 | [dependencies] 32 | async-trait = { workspace = true } 33 | chrono = { workspace = true } 34 | http = { workspace = true } 35 | iceberg = { workspace = true } 36 | itertools = { workspace = true } 37 | reqwest = { workspace = true } 38 | serde = { workspace = true } 39 | serde_derive = { workspace = true } 40 | serde_json = { workspace = true } 41 | tokio = { workspace = true, features = ["sync"] } 42 | tracing = { workspace = true } 43 | typed-builder = { workspace = true } 44 | uuid = { workspace = true, features = ["v4"] } 45 | 46 | [dev-dependencies] 47 | ctor = { workspace = true } 48 | iceberg_test_utils = { path = "../../test_utils", features = ["tests"] } 49 | mockito = { workspace = true } 50 | port_scanner = { workspace = true } 51 | tokio = { workspace = true } 52 | -------------------------------------------------------------------------------- /crates/catalog/rest/README.md: -------------------------------------------------------------------------------- 1 | 19 | 20 | # Apache Iceberg Rest Catalog Official Native Rust Implementation 21 | 22 | [![crates.io](https://img.shields.io/crates/v/iceberg.svg)](https://crates.io/crates/iceberg-catalog-rest) 23 | [![docs.rs](https://img.shields.io/docsrs/iceberg.svg)](https://docs.rs/iceberg/latest/iceberg-catalog-rest/) 24 | 25 | This crate contains the official Native Rust implementation of Apache Iceberg Rest Catalog. 26 | 27 | See the [API documentation](https://docs.rs/iceberg-catalog-rest/latest) for examples and the full API. 28 | -------------------------------------------------------------------------------- /crates/catalog/rest/src/lib.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | //! Iceberg REST API implementation. 19 | 20 | #![deny(missing_docs)] 21 | 22 | mod catalog; 23 | mod client; 24 | mod types; 25 | 26 | pub use catalog::*; 27 | -------------------------------------------------------------------------------- /crates/catalog/rest/testdata/create_table_response.json: -------------------------------------------------------------------------------- 1 | { 2 | "metadata-location": "s3://warehouse/database/table/metadata.json", 3 | "metadata": { 4 | "format-version": 1, 5 | "table-uuid": "bf289591-dcc0-4234-ad4f-5c3eed811a29", 6 | "location": "s3://warehouse/database/table", 7 | "last-updated-ms": 1657810967051, 8 | "last-column-id": 3, 9 | "schema": { 10 | "type": "struct", 11 | "schema-id": 0, 12 | "identifier-field-ids": [2], 13 | "fields": [ 14 | {"id": 1, "name": "foo", "required": false, "type": "string"}, 15 | {"id": 2, "name": "bar", "required": true, "type": "int"}, 16 | {"id": 3, "name": "baz", "required": false, "type": "boolean"} 17 | ] 18 | }, 19 | "current-schema-id": 0, 20 | "schemas": [ 21 | { 22 | "type": "struct", 23 | "schema-id": 0, 24 | "identifier-field-ids": [2], 25 | "fields": [ 26 | {"id": 1, "name": "foo", "required": false, "type": "string"}, 27 | {"id": 2, "name": "bar", "required": true, "type": "int"}, 28 | {"id": 3, "name": "baz", "required": false, "type": "boolean"} 29 | ] 30 | } 31 | ], 32 | "partition-spec": [], 33 | "default-spec-id": 0, 34 | "last-partition-id": 999, 35 | "default-sort-order-id": 0, 36 | "sort-orders": [{"order-id": 0, "fields": []}], 37 | "properties": { 38 | "write.delete.parquet.compression-codec": "zstd", 39 | "write.metadata.compression-codec": "gzip", 40 | "write.summary.partition-limit": "100", 41 | "write.parquet.compression-codec": "zstd" 42 | }, 43 | "current-snapshot-id": -1, 44 | "refs": {}, 45 | "snapshots": [], 46 | "snapshot-log": [], 47 | "metadata-log": [] 48 | }, 49 | "config": { 50 | "client.factory": "io.tabular.iceberg.catalog.TabularAwsClientFactory", 51 | "region": "us-west-2" 52 | } 53 | } -------------------------------------------------------------------------------- /crates/catalog/rest/testdata/load_table_response.json: -------------------------------------------------------------------------------- 1 | { 2 | "metadata-location": "s3://warehouse/database/table/metadata/00001-5f2f8166-244c-4eae-ac36-384ecdec81fc.gz.metadata.json", 3 | "metadata": { 4 | "format-version": 1, 5 | "table-uuid": "b55d9dda-6561-423a-8bfc-787980ce421f", 6 | "location": "s3://warehouse/database/table", 7 | "last-updated-ms": 1646787054459, 8 | "last-column-id": 2, 9 | "schema": { 10 | "type": "struct", 11 | "schema-id": 0, 12 | "fields": [ 13 | {"id": 1, "name": "id", "required": false, "type": "int"}, 14 | {"id": 2, "name": "data", "required": false, "type": "string"} 15 | ] 16 | }, 17 | "current-schema-id": 0, 18 | "schemas": [ 19 | { 20 | "type": "struct", 21 | "schema-id": 0, 22 | "fields": [ 23 | {"id": 1, "name": "id", "required": false, "type": "int"}, 24 | {"id": 2, "name": "data", "required": false, "type": "string"} 25 | ] 26 | } 27 | ], 28 | "partition-spec": [], 29 | "default-spec-id": 0, 30 | "partition-specs": [{"spec-id": 0, "fields": []}], 31 | "last-partition-id": 999, 32 | "default-sort-order-id": 0, 33 | "sort-orders": [{"order-id": 0, "fields": []}], 34 | "properties": {"owner": "bryan", "write.metadata.compression-codec": "gzip"}, 35 | "current-snapshot-id": 3497810964824022504, 36 | "refs": {"main": {"snapshot-id": 3497810964824022504, "type": "branch"}}, 37 | "snapshots": [ 38 | { 39 | "snapshot-id": 3497810964824022504, 40 | "timestamp-ms": 1646787054459, 41 | "summary": { 42 | "operation": "append", 43 | "spark.app.id": "local-1646787004168", 44 | "added-data-files": "1", 45 | "added-records": "1", 46 | "added-files-size": "697", 47 | "changed-partition-count": "1", 48 | "total-records": "1", 49 | "total-files-size": "697", 50 | "total-data-files": "1", 51 | "total-delete-files": "0", 52 | "total-position-deletes": "0", 53 | "total-equality-deletes": "0" 54 | }, 55 | "manifest-list": "s3://warehouse/database/table/metadata/snap-3497810964824022504-1-c4f68204-666b-4e50-a9df-b10c34bf6b82.avro", 56 | "schema-id": 0 57 | } 58 | ], 59 | "snapshot-log": [{"timestamp-ms": 1646787054459, "snapshot-id": 3497810964824022504}], 60 | "metadata-log": [ 61 | { 62 | "timestamp-ms": 1646787031514, 63 | "metadata-file": "s3://warehouse/database/table/metadata/00000-88484a1c-00e5-4a07-a787-c0e7aeffa805.gz.metadata.json" 64 | } 65 | ] 66 | }, 67 | "config": {"client.factory": "io.tabular.iceberg.catalog.TabularAwsClientFactory", "region": "us-west-2"} 68 | } -------------------------------------------------------------------------------- /crates/catalog/rest/testdata/rest_catalog/docker-compose.yaml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | networks: 19 | rest_bridge: 20 | 21 | services: 22 | rest: 23 | image: apache/iceberg-rest-fixture 24 | environment: 25 | - AWS_ACCESS_KEY_ID=admin 26 | - AWS_SECRET_ACCESS_KEY=password 27 | - AWS_REGION=us-east-1 28 | - CATALOG_CATALOG__IMPL=org.apache.iceberg.jdbc.JdbcCatalog 29 | - CATALOG_URI=jdbc:sqlite:file:/tmp/iceberg_rest_mode=memory 30 | - CATALOG_WAREHOUSE=s3://icebergdata/demo 31 | - CATALOG_IO__IMPL=org.apache.iceberg.aws.s3.S3FileIO 32 | - CATALOG_S3_ENDPOINT=http://minio:9000 33 | depends_on: 34 | - minio 35 | networks: 36 | rest_bridge: 37 | expose: 38 | - 8181 39 | 40 | minio: 41 | image: minio/minio:RELEASE.2025-05-24T17-08-30Z 42 | environment: 43 | - MINIO_ROOT_USER=admin 44 | - MINIO_ROOT_PASSWORD=password 45 | - MINIO_DOMAIN=minio 46 | hostname: icebergdata.minio 47 | networks: 48 | rest_bridge: 49 | expose: 50 | - 9001 51 | - 9000 52 | command: ["server", "/data", "--console-address", ":9001"] 53 | 54 | mc: 55 | depends_on: 56 | - minio 57 | image: minio/mc:RELEASE.2025-05-21T01-59-54Z 58 | environment: 59 | - AWS_ACCESS_KEY_ID=admin 60 | - AWS_SECRET_ACCESS_KEY=password 61 | - AWS_REGION=us-east-1 62 | entrypoint: > 63 | /bin/sh -c " until (/usr/bin/mc alias set minio http://minio:9000 admin password) do echo '...waiting...' && sleep 1; done; /usr/bin/mc rm -r --force minio/icebergdata; /usr/bin/mc mb minio/icebergdata; /usr/bin/mc policy set public minio/icebergdata; tail -f /dev/null " 64 | networks: 65 | rest_bridge: 66 | -------------------------------------------------------------------------------- /crates/catalog/rest/testdata/update_table_response.json: -------------------------------------------------------------------------------- 1 | { 2 | "metadata-location": "s3://warehouse/database/table/metadata.json", 3 | "metadata": { 4 | "format-version": 2, 5 | "table-uuid": "bf289591-dcc0-4234-ad4f-5c3eed811a29", 6 | "location": "s3://warehouse/database/table", 7 | "last-sequence-number" : 1, 8 | "last-updated-ms": 1657810967051, 9 | "last-column-id": 3, 10 | "current-schema-id": 0, 11 | "schemas": [ 12 | { 13 | "type": "struct", 14 | "schema-id": 0, 15 | "identifier-field-ids": [2], 16 | "fields": [ 17 | {"id": 1, "name": "foo", "required": false, "type": "string"}, 18 | {"id": 2, "name": "bar", "required": true, "type": "int"}, 19 | {"id": 3, "name": "baz", "required": false, "type": "boolean"} 20 | ] 21 | } 22 | ], 23 | "partition-specs": [], 24 | "default-spec-id": 0, 25 | "last-partition-id": 999, 26 | "default-sort-order-id": 0, 27 | "sort-orders": [{"order-id": 0, "fields": []}], 28 | "properties": { 29 | "write.delete.parquet.compression-codec": "zstd", 30 | "write.metadata.compression-codec": "gzip", 31 | "write.summary.partition-limit": "100", 32 | "write.parquet.compression-codec": "zstd" 33 | }, 34 | "current-snapshot-id": -1, 35 | "refs": {}, 36 | "snapshots": [], 37 | "snapshot-log": [], 38 | "metadata-log": [] 39 | } 40 | } -------------------------------------------------------------------------------- /crates/catalog/s3tables/Cargo.toml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | [package] 19 | edition = { workspace = true } 20 | homepage = { workspace = true } 21 | name = "iceberg-catalog-s3tables" 22 | rust-version = { workspace = true } 23 | version = { workspace = true } 24 | 25 | categories = ["database"] 26 | description = "Apache Iceberg Rust S3Tables Catalog" 27 | keywords = ["iceberg", "sql", "catalog"] 28 | license = { workspace = true } 29 | repository = { workspace = true } 30 | 31 | [dependencies] 32 | anyhow = { workspace = true } 33 | async-trait = { workspace = true } 34 | aws-config = { workspace = true } 35 | aws-sdk-s3tables = "1.10.0" 36 | iceberg = { workspace = true } 37 | serde_json = { workspace = true } 38 | typed-builder = { workspace = true } 39 | uuid = { workspace = true, features = ["v4"] } 40 | 41 | [dev-dependencies] 42 | iceberg_test_utils = { path = "../../test_utils", features = ["tests"] } 43 | itertools = { workspace = true } 44 | tokio = { workspace = true } 45 | -------------------------------------------------------------------------------- /crates/catalog/s3tables/src/lib.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | //! Iceberg s3tables catalog implementation. 19 | 20 | #![deny(missing_docs)] 21 | 22 | mod catalog; 23 | mod utils; 24 | 25 | pub use catalog::*; 26 | -------------------------------------------------------------------------------- /crates/catalog/sql/Cargo.toml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | [package] 19 | edition = { workspace = true } 20 | homepage = { workspace = true } 21 | name = "iceberg-catalog-sql" 22 | rust-version = { workspace = true } 23 | version = { workspace = true } 24 | 25 | categories = ["database"] 26 | description = "Apache Iceberg Rust Sql Catalog" 27 | keywords = ["iceberg", "sql", "catalog"] 28 | license = { workspace = true } 29 | repository = { workspace = true } 30 | 31 | [dependencies] 32 | async-trait = { workspace = true } 33 | iceberg = { workspace = true } 34 | serde_json = { workspace = true } 35 | sqlx = { version = "0.8.1", features = ["any"], default-features = false } 36 | typed-builder = { workspace = true } 37 | uuid = { workspace = true, features = ["v4"] } 38 | 39 | [dev-dependencies] 40 | iceberg_test_utils = { path = "../../test_utils", features = ["tests"] } 41 | itertools = { workspace = true } 42 | regex = "1.10.5" 43 | sqlx = { version = "0.8.1", features = [ 44 | "tls-rustls", 45 | "runtime-tokio", 46 | "any", 47 | "sqlite", 48 | "migrate", 49 | ], default-features = false } 50 | tempfile = { workspace = true } 51 | tokio = { workspace = true } 52 | -------------------------------------------------------------------------------- /crates/catalog/sql/src/error.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | use iceberg::{Error, ErrorKind, NamespaceIdent, Result, TableIdent}; 19 | 20 | /// Format an sqlx error into iceberg error. 21 | pub fn from_sqlx_error(error: sqlx::Error) -> Error { 22 | Error::new( 23 | ErrorKind::Unexpected, 24 | "operation failed for hitting sqlx error".to_string(), 25 | ) 26 | .with_source(error) 27 | } 28 | 29 | pub fn no_such_namespace_err(namespace: &NamespaceIdent) -> Result { 30 | Err(Error::new( 31 | ErrorKind::Unexpected, 32 | format!("No such namespace: {:?}", namespace), 33 | )) 34 | } 35 | 36 | pub fn no_such_table_err(table_ident: &TableIdent) -> Result { 37 | Err(Error::new( 38 | ErrorKind::Unexpected, 39 | format!("No such table: {:?}", table_ident), 40 | )) 41 | } 42 | 43 | pub fn table_already_exists_err(table_ident: &TableIdent) -> Result { 44 | Err(Error::new( 45 | ErrorKind::Unexpected, 46 | format!("Table {:?} already exists.", table_ident), 47 | )) 48 | } 49 | -------------------------------------------------------------------------------- /crates/catalog/sql/src/lib.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | //! Iceberg sql catalog implementation. 19 | 20 | #![deny(missing_docs)] 21 | 22 | mod catalog; 23 | mod error; 24 | pub use catalog::*; 25 | -------------------------------------------------------------------------------- /crates/examples/Cargo.toml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | [package] 19 | edition = { workspace = true } 20 | homepage = { workspace = true } 21 | license = { workspace = true } 22 | name = "iceberg-examples" 23 | repository = { workspace = true } 24 | rust-version = { workspace = true } 25 | version = { workspace = true } 26 | 27 | [dependencies] 28 | futures = { workspace = true } 29 | iceberg = { workspace = true } 30 | iceberg-catalog-rest = { workspace = true } 31 | tokio = { workspace = true, features = ["full"] } 32 | 33 | [[example]] 34 | name = "rest-catalog-namespace" 35 | path = "src/rest_catalog_namespace.rs" 36 | 37 | [[example]] 38 | name = "rest-catalog-table" 39 | path = "src/rest_catalog_table.rs" 40 | 41 | [[example]] 42 | name = "oss-backend" 43 | path = "src/oss_backend.rs" 44 | required-features = ["storage-oss"] 45 | 46 | [features] 47 | default = [] 48 | storage-oss = ["iceberg/storage-oss"] 49 | -------------------------------------------------------------------------------- /crates/examples/README.md: -------------------------------------------------------------------------------- 1 | 19 | 20 | Example usage codes for `iceberg-rust`. Currently, these examples can't run directly since it requires setting up of 21 | environments for catalogs, for example, rest catalog server. -------------------------------------------------------------------------------- /crates/iceberg/src/arrow/mod.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | //! Conversion between Iceberg and Arrow schema 19 | 20 | mod schema; 21 | pub use schema::*; 22 | 23 | mod nan_val_cnt_visitor; 24 | pub(crate) use nan_val_cnt_visitor::*; 25 | 26 | pub(crate) mod delete_file_manager; 27 | 28 | mod reader; 29 | pub(crate) mod record_batch_projector; 30 | pub(crate) mod record_batch_transformer; 31 | mod value; 32 | pub use reader::*; 33 | pub use value::*; 34 | -------------------------------------------------------------------------------- /crates/iceberg/src/avro/mod.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | //! Avro related codes. 19 | mod schema; 20 | pub(crate) use schema::*; 21 | -------------------------------------------------------------------------------- /crates/iceberg/src/cache.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | //! Cache management for Iceberg. 19 | 20 | use std::sync::Arc; 21 | 22 | use crate::spec::{Manifest, ManifestList}; 23 | 24 | /// A trait for caching in-memory objects of given type. 25 | /// 26 | /// # Notes 27 | /// 28 | /// ObjectCache will store deeply nested objects, such as `Manifest`, 29 | /// which contains `Schema`. Please ensure that the cache stores the 30 | /// object in memory as-is, without attempting to serialize it, as 31 | /// serialization could be extremely expensive. 32 | pub trait ObjectCache: Send + Sync { 33 | /// Gets an object from the cache by its key. 34 | fn get(&self, key: &K) -> Option; 35 | /// Sets an object in the cache with the given key and value. 36 | fn set(&self, key: K, value: V); 37 | } 38 | 39 | /// A trait for caching different in-memory objects used by iceberg. 40 | /// 41 | /// # Notes 42 | /// 43 | /// ObjectCache will store deeply nested objects, such as `Manifest`, 44 | /// which contains `Schema`. Please ensure that the cache stores the 45 | /// object in memory as-is, without attempting to serialize it, as 46 | /// serialization could be extremely expensive. 47 | pub trait ObjectCacheProvide: Send + Sync { 48 | /// Gets a cache for manifests. 49 | fn manifest_cache(&self) -> &dyn ObjectCache>; 50 | /// Gets a cache for manifest lists. 51 | fn manifest_list_cache(&self) -> &dyn ObjectCache>; 52 | } 53 | 54 | /// CacheProvider is a type alias for a thread-safe reference-counted pointer to a CacheProvide trait object. 55 | pub type ObjectCacheProvider = Arc; 56 | 57 | #[cfg(test)] 58 | mod tests { 59 | use super::*; 60 | 61 | struct _TestDynCompatibleForObjectCache(Arc>>); 62 | struct _TestDynCompatibleForObjectCacheProvider(ObjectCacheProvider); 63 | } 64 | -------------------------------------------------------------------------------- /crates/iceberg/src/expr/visitors/mod.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | pub(crate) mod bound_predicate_visitor; 19 | pub(crate) mod expression_evaluator; 20 | pub(crate) mod inclusive_metrics_evaluator; 21 | pub(crate) mod inclusive_projection; 22 | pub(crate) mod manifest_evaluator; 23 | pub(crate) mod page_index_evaluator; 24 | pub(crate) mod row_group_metrics_evaluator; 25 | pub(crate) mod strict_metrics_evaluator; 26 | pub(crate) mod strict_projection; 27 | -------------------------------------------------------------------------------- /crates/iceberg/src/inspect/mod.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | //! Metadata table APIs. 19 | 20 | mod manifests; 21 | mod metadata_table; 22 | mod snapshots; 23 | 24 | pub use manifests::ManifestsTable; 25 | pub use metadata_table::*; 26 | pub use snapshots::SnapshotsTable; 27 | -------------------------------------------------------------------------------- /crates/iceberg/src/io/storage_fs.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | use opendal::Operator; 19 | use opendal::services::FsConfig; 20 | 21 | use crate::Result; 22 | 23 | /// Build new opendal operator from give path. 24 | pub(crate) fn fs_config_build() -> Result { 25 | let mut cfg = FsConfig::default(); 26 | cfg.root = Some("/".to_string()); 27 | 28 | Ok(Operator::from_config(cfg)?.finish()) 29 | } 30 | -------------------------------------------------------------------------------- /crates/iceberg/src/io/storage_memory.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | use opendal::Operator; 19 | use opendal::services::MemoryConfig; 20 | 21 | use crate::Result; 22 | 23 | pub(crate) fn memory_config_build() -> Result { 24 | Ok(Operator::from_config(MemoryConfig::default())?.finish()) 25 | } 26 | -------------------------------------------------------------------------------- /crates/iceberg/src/io/storage_oss.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | use std::collections::HashMap; 19 | 20 | use opendal::services::OssConfig; 21 | use opendal::{Configurator, Operator}; 22 | use url::Url; 23 | 24 | use crate::{Error, ErrorKind, Result}; 25 | 26 | /// Required configuration arguments for creating an Aliyun OSS Operator with OpenDAL: 27 | /// - `oss.endpoint`: The OSS service endpoint URL 28 | /// - `oss.access-key-id`: The access key ID for authentication 29 | /// - `oss.access-key-secret`: The access key secret for authentication 30 | /// Aliyun oss endpoint. 31 | pub const OSS_ENDPOINT: &str = "oss.endpoint"; 32 | /// Aliyun oss access key id. 33 | pub const OSS_ACCESS_KEY_ID: &str = "oss.access-key-id"; 34 | /// Aliyun oss access key secret. 35 | pub const OSS_ACCESS_KEY_SECRET: &str = "oss.access-key-secret"; 36 | 37 | /// Parse iceberg props to oss config. 38 | pub(crate) fn oss_config_parse(mut m: HashMap) -> Result { 39 | let mut cfg: OssConfig = OssConfig::default(); 40 | if let Some(endpoint) = m.remove(OSS_ENDPOINT) { 41 | cfg.endpoint = Some(endpoint); 42 | }; 43 | if let Some(access_key_id) = m.remove(OSS_ACCESS_KEY_ID) { 44 | cfg.access_key_id = Some(access_key_id); 45 | }; 46 | if let Some(access_key_secret) = m.remove(OSS_ACCESS_KEY_SECRET) { 47 | cfg.access_key_secret = Some(access_key_secret); 48 | }; 49 | 50 | Ok(cfg) 51 | } 52 | 53 | /// Build new opendal operator from give path. 54 | pub(crate) fn oss_config_build(cfg: &OssConfig, path: &str) -> Result { 55 | let url = Url::parse(path)?; 56 | let bucket = url.host_str().ok_or_else(|| { 57 | Error::new( 58 | ErrorKind::DataInvalid, 59 | format!("Invalid oss url: {}, missing bucket", path), 60 | ) 61 | })?; 62 | 63 | let builder = cfg.clone().into_builder().bucket(bucket); 64 | 65 | Ok(Operator::new(builder)?.finish()) 66 | } 67 | -------------------------------------------------------------------------------- /crates/iceberg/src/lib.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | //! Apache Iceberg Official Native Rust Implementation 19 | //! 20 | //! # Examples 21 | //! 22 | //! ## Scan A Table 23 | //! 24 | //! ```rust, ignore 25 | //! // This example uses `iceberg_catalog_memory`, which isn't enabled by default. 26 | //! // To run this, add `iceberg-catalog-memory` as a dependency in your Cargo.toml. 27 | //! use futures::TryStreamExt; 28 | //! use iceberg::io::{FileIO, FileIOBuilder}; 29 | //! use iceberg::{Catalog, Result, TableIdent}; 30 | //! use iceberg_catalog_memory::MemoryCatalog; 31 | //! 32 | //! #[tokio::main] 33 | //! async fn main() -> Result<()> { 34 | //! // Build your file IO. 35 | //! let file_io = FileIOBuilder::new("memory").build()?; 36 | //! // Connect to a catalog. 37 | //! let catalog = MemoryCatalog::new(file_io, None); 38 | //! // Load table from catalog. 39 | //! let table = catalog 40 | //! .load_table(&TableIdent::from_strs(["hello", "world"])?) 41 | //! .await?; 42 | //! // Build table scan. 43 | //! let stream = table 44 | //! .scan() 45 | //! .select(["name", "id"]) 46 | //! .build()? 47 | //! .to_arrow() 48 | //! .await?; 49 | //! 50 | //! // Consume this stream like arrow record batch stream. 51 | //! let _data: Vec<_> = stream.try_collect().await?; 52 | //! Ok(()) 53 | //! } 54 | //! ``` 55 | 56 | #![deny(missing_docs)] 57 | 58 | #[macro_use] 59 | extern crate derive_builder; 60 | extern crate core; 61 | 62 | mod error; 63 | pub use error::{Error, ErrorKind, Result}; 64 | 65 | mod catalog; 66 | 67 | pub use catalog::*; 68 | 69 | pub mod table; 70 | 71 | mod avro; 72 | pub mod cache; 73 | pub mod io; 74 | pub mod spec; 75 | 76 | pub mod inspect; 77 | pub mod scan; 78 | 79 | pub mod expr; 80 | pub mod transaction; 81 | pub mod transform; 82 | 83 | mod runtime; 84 | 85 | pub mod arrow; 86 | pub(crate) mod delete_file_index; 87 | mod utils; 88 | pub mod writer; 89 | 90 | mod delete_vector; 91 | pub mod puffin; 92 | -------------------------------------------------------------------------------- /crates/iceberg/src/puffin/blob.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | use std::collections::HashMap; 19 | 20 | use typed_builder::TypedBuilder; 21 | 22 | /// A serialized form of a "compact" Theta sketch produced by the Apache DataSketches library. 23 | pub const APACHE_DATASKETCHES_THETA_V1: &str = "apache-datasketches-theta-v1"; 24 | /// A serialized form of a deletion vector. 25 | pub const DELETION_VECTOR_V1: &str = "deletion-vector-v1"; 26 | 27 | /// The blob 28 | #[derive(Debug, PartialEq, Clone, TypedBuilder)] 29 | pub struct Blob { 30 | pub(crate) r#type: String, 31 | pub(crate) fields: Vec, 32 | pub(crate) snapshot_id: i64, 33 | pub(crate) sequence_number: i64, 34 | pub(crate) data: Vec, 35 | pub(crate) properties: HashMap, 36 | } 37 | 38 | impl Blob { 39 | #[inline] 40 | /// See blob types: https://iceberg.apache.org/puffin-spec/#blob-types 41 | pub fn blob_type(&self) -> &str { 42 | &self.r#type 43 | } 44 | 45 | #[inline] 46 | /// List of field IDs the blob was computed for; the order of items is used to compute sketches stored in the blob. 47 | pub fn fields(&self) -> &[i32] { 48 | &self.fields 49 | } 50 | 51 | #[inline] 52 | /// ID of the Iceberg table's snapshot the blob was computed from 53 | pub fn snapshot_id(&self) -> i64 { 54 | self.snapshot_id 55 | } 56 | 57 | #[inline] 58 | /// Sequence number of the Iceberg table's snapshot the blob was computed from 59 | pub fn sequence_number(&self) -> i64 { 60 | self.sequence_number 61 | } 62 | 63 | #[inline] 64 | /// The uncompressed blob data 65 | pub fn data(&self) -> &[u8] { 66 | &self.data 67 | } 68 | 69 | #[inline] 70 | /// Arbitrary meta-information about the blob 71 | pub fn properties(&self) -> &HashMap { 72 | &self.properties 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /crates/iceberg/src/puffin/mod.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | //! Iceberg Puffin implementation. 19 | 20 | #![deny(missing_docs)] 21 | 22 | mod blob; 23 | pub use blob::{APACHE_DATASKETCHES_THETA_V1, Blob, DELETION_VECTOR_V1}; 24 | 25 | mod compression; 26 | pub use compression::CompressionCodec; 27 | 28 | mod metadata; 29 | pub use metadata::{BlobMetadata, CREATED_BY_PROPERTY, FileMetadata}; 30 | 31 | mod reader; 32 | pub use reader::PuffinReader; 33 | 34 | mod writer; 35 | pub use writer::PuffinWriter; 36 | 37 | #[cfg(test)] 38 | mod test_utils; 39 | -------------------------------------------------------------------------------- /crates/iceberg/src/spec/mod.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | //! Spec for Iceberg. 19 | 20 | mod datatypes; 21 | mod encrypted_key; 22 | mod manifest; 23 | mod manifest_list; 24 | mod name_mapping; 25 | mod partition; 26 | mod schema; 27 | mod snapshot; 28 | mod snapshot_summary; 29 | mod sort; 30 | mod statistic_file; 31 | mod table_metadata; 32 | mod table_metadata_builder; 33 | mod transform; 34 | mod values; 35 | mod view_metadata; 36 | mod view_metadata_builder; 37 | mod view_version; 38 | 39 | pub use datatypes::*; 40 | pub use encrypted_key::*; 41 | pub use manifest::*; 42 | pub use manifest_list::*; 43 | pub use name_mapping::*; 44 | pub use partition::*; 45 | pub use schema::*; 46 | pub use snapshot::*; 47 | pub use snapshot_summary::*; 48 | pub use sort::*; 49 | pub use statistic_file::*; 50 | pub use table_metadata::*; 51 | pub use transform::*; 52 | pub use values::*; 53 | pub use view_metadata::*; 54 | pub use view_version::*; 55 | -------------------------------------------------------------------------------- /crates/iceberg/src/spec/schema/utils.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | use std::collections::HashMap; 19 | 20 | use crate::{Error, ErrorKind, Result}; 21 | 22 | pub fn try_insert_field(map: &mut HashMap, field_id: i32, value: V) -> Result<()> { 23 | map.insert(field_id, value).map_or_else( 24 | || Ok(()), 25 | |_| { 26 | Err(Error::new( 27 | ErrorKind::DataInvalid, 28 | format!( 29 | "Found duplicate 'field.id' {}. Field ids must be unique.", 30 | field_id 31 | ), 32 | )) 33 | }, 34 | ) 35 | } 36 | -------------------------------------------------------------------------------- /crates/iceberg/src/utils.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | use std::num::NonZeroUsize; 19 | 20 | // Use a default value of 1 as the safest option. 21 | // See https://doc.rust-lang.org/std/thread/fn.available_parallelism.html#limitations 22 | // for more details. 23 | const DEFAULT_PARALLELISM: usize = 1; 24 | 25 | /// Uses [`std::thread::available_parallelism`] in order to 26 | /// retrieve an estimate of the default amount of parallelism 27 | /// that should be used. Note that [`std::thread::available_parallelism`] 28 | /// returns a `Result` as it can fail, so here we use 29 | /// a default value instead. 30 | /// Note: we don't use a OnceCell or LazyCell here as there 31 | /// are circumstances where the level of available 32 | /// parallelism can change during the lifetime of an executing 33 | /// process, but this should not be called in a hot loop. 34 | pub(crate) fn available_parallelism() -> NonZeroUsize { 35 | std::thread::available_parallelism().unwrap_or_else(|_err| { 36 | // Failed to get the level of parallelism. 37 | // TODO: log/trace when this fallback occurs. 38 | 39 | // Using a default value. 40 | NonZeroUsize::new(DEFAULT_PARALLELISM).unwrap() 41 | }) 42 | } 43 | -------------------------------------------------------------------------------- /crates/iceberg/src/writer/base_writer/mod.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | //! Base writer module contains the basic writer provide by iceberg: `DataFileWriter`, `PositionDeleteFileWriter`, `EqualityDeleteFileWriter`. 19 | 20 | pub mod data_file_writer; 21 | pub mod equality_delete_writer; 22 | -------------------------------------------------------------------------------- /crates/iceberg/src/writer/file_writer/mod.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | //! This module contains the writer for data file format supported by iceberg: parquet, orc. 19 | 20 | use arrow_array::RecordBatch; 21 | use futures::Future; 22 | 23 | use super::CurrentFileStatus; 24 | use crate::Result; 25 | use crate::spec::DataFileBuilder; 26 | 27 | mod parquet_writer; 28 | pub use parquet_writer::{ParquetWriter, ParquetWriterBuilder}; 29 | mod track_writer; 30 | 31 | pub mod location_generator; 32 | 33 | type DefaultOutput = Vec; 34 | 35 | /// File writer builder trait. 36 | pub trait FileWriterBuilder: Send + Clone + 'static { 37 | /// The associated file writer type. 38 | type R: FileWriter; 39 | /// Build file writer. 40 | fn build(self) -> impl Future> + Send; 41 | } 42 | 43 | /// File writer focus on writing record batch to different physical file format.(Such as parquet. orc) 44 | pub trait FileWriter: Send + CurrentFileStatus + 'static { 45 | /// Write record batch to file. 46 | fn write(&mut self, batch: &RecordBatch) -> impl Future> + Send; 47 | /// Close file writer. 48 | fn close(self) -> impl Future> + Send; 49 | } 50 | -------------------------------------------------------------------------------- /crates/iceberg/src/writer/file_writer/track_writer.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | use std::sync::Arc; 19 | use std::sync::atomic::AtomicI64; 20 | 21 | use bytes::Bytes; 22 | 23 | use crate::Result; 24 | use crate::io::FileWrite; 25 | 26 | /// `TrackWriter` is used to track the written size. 27 | pub(crate) struct TrackWriter { 28 | inner: Box, 29 | written_size: Arc, 30 | } 31 | 32 | impl TrackWriter { 33 | pub fn new(writer: Box, written_size: Arc) -> Self { 34 | Self { 35 | inner: writer, 36 | written_size, 37 | } 38 | } 39 | } 40 | 41 | #[async_trait::async_trait] 42 | impl FileWrite for TrackWriter { 43 | async fn write(&mut self, bs: Bytes) -> Result<()> { 44 | let size = bs.len(); 45 | self.inner.write(bs).await.inspect(|_| { 46 | self.written_size 47 | .fetch_add(size as i64, std::sync::atomic::Ordering::Relaxed); 48 | }) 49 | } 50 | 51 | async fn close(&mut self) -> Result<()> { 52 | self.inner.close().await 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /crates/iceberg/testdata/example_empty_table_metadata_v2.json: -------------------------------------------------------------------------------- 1 | { 2 | "format-version": 2, 3 | "table-uuid": "9c12d441-03fe-4693-9a96-a0705ddf69c1", 4 | "location": "{{ table_location }}", 5 | "last-sequence-number": 34, 6 | "last-updated-ms": 1602638573590, 7 | "last-column-id": 3, 8 | "current-schema-id": 1, 9 | "schemas": [ 10 | { 11 | "type": "struct", 12 | "schema-id": 0, 13 | "fields": [ 14 | {"id": 1, "name": "x", "required": true, "type": "long"} 15 | ]}, 16 | { 17 | "type": "struct", 18 | "schema-id": 1, 19 | "identifier-field-ids": [1, 2], 20 | "fields": [ 21 | {"id": 1, "name": "x", "required": true, "type": "long"}, 22 | {"id": 2, "name": "y", "required": true, "type": "long", "doc": "comment"}, 23 | {"id": 3, "name": "z", "required": true, "type": "long"}, 24 | {"id": 4, "name": "a", "required": true, "type": "string"}, 25 | {"id": 5, "name": "dbl", "required": true, "type": "double"}, 26 | {"id": 6, "name": "i32", "required": true, "type": "int"}, 27 | {"id": 7, "name": "i64", "required": true, "type": "long"}, 28 | {"id": 8, "name": "bool", "required": true, "type": "boolean"} 29 | ] 30 | } 31 | ], 32 | "default-spec-id": 0, 33 | "partition-specs": [ 34 | { 35 | "spec-id": 0, 36 | "fields": [ 37 | {"name": "x", "transform": "identity", "source-id": 1, "field-id": 1000} 38 | ] 39 | } 40 | ], 41 | "last-partition-id": 1000, 42 | "default-sort-order-id": 3, 43 | "sort-orders": [ 44 | { 45 | "order-id": 3, 46 | "fields": [ 47 | {"transform": "identity", "source-id": 2, "direction": "asc", "null-order": "nulls-first"}, 48 | {"transform": "bucket[4]", "source-id": 3, "direction": "desc", "null-order": "nulls-last"} 49 | ] 50 | } 51 | ], 52 | "properties": {"read.split.target.size": "134217728"}, 53 | "snapshots": [], 54 | "snapshot-log": [], 55 | "metadata-log": [{"metadata-file": "{{ table_metadata_1_location }}", "timestamp-ms": 1515100}], 56 | "refs": {} 57 | } 58 | -------------------------------------------------------------------------------- /crates/iceberg/testdata/example_table_metadata_v2.json: -------------------------------------------------------------------------------- 1 | { 2 | "format-version": 2, 3 | "table-uuid": "9c12d441-03fe-4693-9a96-a0705ddf69c1", 4 | "location": "{{ table_location }}", 5 | "last-sequence-number": 34, 6 | "last-updated-ms": 1602638573590, 7 | "last-column-id": 3, 8 | "current-schema-id": 1, 9 | "schemas": [ 10 | { 11 | "type": "struct", 12 | "schema-id": 0, 13 | "fields": [ 14 | {"id": 1, "name": "x", "required": true, "type": "long"} 15 | ]}, 16 | { 17 | "type": "struct", 18 | "schema-id": 1, 19 | "identifier-field-ids": [1, 2], 20 | "fields": [ 21 | {"id": 1, "name": "x", "required": true, "type": "long"}, 22 | {"id": 2, "name": "y", "required": true, "type": "long", "doc": "comment"}, 23 | {"id": 3, "name": "z", "required": true, "type": "long"}, 24 | {"id": 4, "name": "a", "required": true, "type": "string"}, 25 | {"id": 5, "name": "dbl", "required": true, "type": "double"}, 26 | {"id": 6, "name": "i32", "required": true, "type": "int"}, 27 | {"id": 7, "name": "i64", "required": true, "type": "long"}, 28 | {"id": 8, "name": "bool", "required": true, "type": "boolean"} 29 | ] 30 | } 31 | ], 32 | "default-spec-id": 0, 33 | "partition-specs": [ 34 | { 35 | "spec-id": 0, 36 | "fields": [ 37 | {"name": "x", "transform": "identity", "source-id": 1, "field-id": 1000} 38 | ] 39 | } 40 | ], 41 | "last-partition-id": 1000, 42 | "default-sort-order-id": 3, 43 | "sort-orders": [ 44 | { 45 | "order-id": 3, 46 | "fields": [ 47 | {"transform": "identity", "source-id": 2, "direction": "asc", "null-order": "nulls-first"}, 48 | {"transform": "bucket[4]", "source-id": 3, "direction": "desc", "null-order": "nulls-last"} 49 | ] 50 | } 51 | ], 52 | "properties": {"read.split.target.size": "134217728"}, 53 | "current-snapshot-id": 3055729675574597004, 54 | "snapshots": [ 55 | { 56 | "snapshot-id": 3051729675574597004, 57 | "timestamp-ms": 1515100955770, 58 | "sequence-number": 0, 59 | "summary": {"operation": "append"}, 60 | "manifest-list": "{{ manifest_list_1_location }}" 61 | }, 62 | { 63 | "snapshot-id": 3055729675574597004, 64 | "parent-snapshot-id": 3051729675574597004, 65 | "timestamp-ms": 1555100955770, 66 | "sequence-number": 1, 67 | "summary": {"operation": "append"}, 68 | "manifest-list": "{{ manifest_list_2_location }}", 69 | "schema-id": 1 70 | } 71 | ], 72 | "snapshot-log": [ 73 | {"snapshot-id": 3051729675574597004, "timestamp-ms": 1515100955770}, 74 | {"snapshot-id": 3055729675574597004, "timestamp-ms": 1555100955770} 75 | ], 76 | "metadata-log": [{"metadata-file": "{{ table_metadata_1_location }}", "timestamp-ms": 1515100}], 77 | "refs": {"test": {"snapshot-id": 3051729675574597004, "type": "tag", "max-ref-age-ms": 10000000}} 78 | } -------------------------------------------------------------------------------- /crates/iceberg/testdata/file_io_gcs/docker-compose.yaml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | services: 19 | gcs-server: 20 | image: fsouza/fake-gcs-server@sha256:36b0116fae5236e8def76ccb07761a9ca323e476f366a5f4bf449cac19deaf2d 21 | expose: 22 | - 4443 23 | command: --scheme http 24 | -------------------------------------------------------------------------------- /crates/iceberg/testdata/file_io_s3/docker-compose.yaml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | services: 19 | minio: 20 | image: minio/minio:RELEASE.2024-02-26T09-33-48Z 21 | expose: 22 | - 9000 23 | - 9001 24 | environment: 25 | MINIO_ROOT_USER: 'admin' 26 | MINIO_ROOT_PASSWORD: 'password' 27 | MINIO_ADDRESS: ':9000' 28 | MINIO_CONSOLE_ADDRESS: ':9001' 29 | entrypoint: sh 30 | command: -c 'mkdir -p /data/bucket1 && /usr/bin/minio server /data' 31 | -------------------------------------------------------------------------------- /crates/iceberg/testdata/manifests_lists/manifest-list-v2-1.avro: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/iceberg-rust/bddffa174dcafb7b3adc9d868edfb645a30f41aa/crates/iceberg/testdata/manifests_lists/manifest-list-v2-1.avro -------------------------------------------------------------------------------- /crates/iceberg/testdata/manifests_lists/manifest-list-v2-2.avro: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/iceberg-rust/bddffa174dcafb7b3adc9d868edfb645a30f41aa/crates/iceberg/testdata/manifests_lists/manifest-list-v2-2.avro -------------------------------------------------------------------------------- /crates/iceberg/testdata/puffin/java-generated/empty-puffin-uncompressed.bin: -------------------------------------------------------------------------------- 1 | PFA1PFA1{"blobs":[]} PFA1 -------------------------------------------------------------------------------- /crates/iceberg/testdata/puffin/java-generated/sample-metric-data-compressed-zstd.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/iceberg-rust/bddffa174dcafb7b3adc9d868edfb645a30f41aa/crates/iceberg/testdata/puffin/java-generated/sample-metric-data-compressed-zstd.bin -------------------------------------------------------------------------------- /crates/iceberg/testdata/puffin/java-generated/sample-metric-data-uncompressed.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/iceberg-rust/bddffa174dcafb7b3adc9d868edfb645a30f41aa/crates/iceberg/testdata/puffin/java-generated/sample-metric-data-uncompressed.bin -------------------------------------------------------------------------------- /crates/iceberg/testdata/table_metadata/TableMetadataUnsupportedVersion.json: -------------------------------------------------------------------------------- 1 | { 2 | "format-version": 3, 3 | "table-uuid": "d20125c8-7284-442c-9aea-15fee620737c", 4 | "location": "s3://bucket/test/location", 5 | "last-updated-ms": 1602638573874, 6 | "last-sequence-number": 0, 7 | "last-column-id": 3, 8 | "schema": { 9 | "type": "struct", 10 | "fields": [ 11 | { 12 | "id": 1, 13 | "name": "x", 14 | "required": true, 15 | "type": "long" 16 | }, 17 | { 18 | "id": 2, 19 | "name": "y", 20 | "required": true, 21 | "type": "long", 22 | "doc": "comment" 23 | }, 24 | { 25 | "id": 3, 26 | "name": "z", 27 | "required": true, 28 | "type": "long" 29 | } 30 | ] 31 | }, 32 | "partition-spec": [], 33 | "properties": {}, 34 | "current-snapshot-id": -1, 35 | "snapshots": [] 36 | } -------------------------------------------------------------------------------- /crates/iceberg/testdata/table_metadata/TableMetadataV1NoValidSchema.json: -------------------------------------------------------------------------------- 1 | { 2 | "format-version": 1, 3 | "table-uuid": "d20125c8-7284-442c-9aea-15fee620737c", 4 | "location": "s3://bucket/test/location", 5 | "last-updated-ms": 1602638573874, 6 | "last-column-id": 3, 7 | "schemas": [ 8 | { 9 | "fields": [ 10 | { 11 | "id": 1, 12 | "name": "x", 13 | "required": true, 14 | "type": "long" 15 | }, 16 | { 17 | "id": 2, 18 | "name": "y", 19 | "required": true, 20 | "type": "long", 21 | "doc": "comment" 22 | }, 23 | { 24 | "id": 3, 25 | "name": "z", 26 | "required": true, 27 | "type": "long" 28 | } 29 | ], 30 | "schema-id": 0, 31 | "type": "struct" 32 | } 33 | ], 34 | "partition-spec": [ 35 | { 36 | "name": "x", 37 | "transform": "identity", 38 | "source-id": 1, 39 | "field-id": 1000 40 | } 41 | ], 42 | "properties": {}, 43 | "current-snapshot-id": -1, 44 | "snapshots": [] 45 | } -------------------------------------------------------------------------------- /crates/iceberg/testdata/table_metadata/TableMetadataV1PartitionSpecsWithoutDefaultId.json: -------------------------------------------------------------------------------- 1 | { 2 | "format-version": 1, 3 | "table-uuid": "d20125c8-7284-442c-9aea-15fee620737c", 4 | "location": "s3://bucket/test/location", 5 | "last-updated-ms": 1602638573874, 6 | "last-column-id": 3, 7 | "schema": { 8 | "type": "struct", 9 | "fields": [ 10 | { 11 | "id": 1, 12 | "name": "x", 13 | "required": true, 14 | "type": "long" 15 | }, 16 | { 17 | "id": 2, 18 | "name": "y", 19 | "required": true, 20 | "type": "long", 21 | "doc": "comment" 22 | }, 23 | { 24 | "id": 3, 25 | "name": "z", 26 | "required": true, 27 | "type": "long" 28 | } 29 | ] 30 | }, 31 | "partition-specs": [ 32 | { 33 | "spec-id": 1, 34 | "fields": [ 35 | { 36 | "name": "x", 37 | "transform": "identity", 38 | "source-id": 1, 39 | "field-id": 1000 40 | } 41 | ] 42 | }, 43 | { 44 | "spec-id": 2, 45 | "fields": [ 46 | { 47 | "name": "y", 48 | "transform": "identity", 49 | "source-id": 2, 50 | "field-id": 1001 51 | } 52 | ] 53 | } 54 | ], 55 | "properties": {}, 56 | "current-snapshot-id": -1, 57 | "snapshots": [] 58 | } -------------------------------------------------------------------------------- /crates/iceberg/testdata/table_metadata/TableMetadataV1SchemasWithoutCurrentId.json: -------------------------------------------------------------------------------- 1 | { 2 | "format-version": 1, 3 | "table-uuid": "d20125c8-7284-442c-9aea-15fee620737c", 4 | "location": "s3://bucket/test/location", 5 | "last-updated-ms": 1602638573874, 6 | "last-column-id": 3, 7 | "schemas": [ 8 | { 9 | "fields": [ 10 | { 11 | "id": 1, 12 | "name": "x", 13 | "required": true, 14 | "type": "long" 15 | }, 16 | { 17 | "id": 2, 18 | "name": "y", 19 | "required": true, 20 | "type": "long", 21 | "doc": "comment" 22 | }, 23 | { 24 | "id": 3, 25 | "name": "z", 26 | "required": true, 27 | "type": "long" 28 | } 29 | ], 30 | "schema-id": 0, 31 | "type": "struct" 32 | } 33 | ], 34 | "schema": { 35 | "type": "struct", 36 | "fields": [ 37 | { 38 | "id": 1, 39 | "name": "x", 40 | "required": true, 41 | "type": "long" 42 | }, 43 | { 44 | "id": 2, 45 | "name": "y", 46 | "required": true, 47 | "type": "long", 48 | "doc": "comment" 49 | }, 50 | { 51 | "id": 3, 52 | "name": "z", 53 | "required": true, 54 | "type": "long" 55 | } 56 | ] 57 | }, 58 | "partition-spec": [ 59 | { 60 | "name": "x", 61 | "transform": "identity", 62 | "source-id": 1, 63 | "field-id": 1000 64 | } 65 | ], 66 | "properties": {}, 67 | "current-snapshot-id": -1, 68 | "snapshots": [] 69 | } -------------------------------------------------------------------------------- /crates/iceberg/testdata/table_metadata/TableMetadataV1Valid.json: -------------------------------------------------------------------------------- 1 | { 2 | "format-version": 1, 3 | "table-uuid": "d20125c8-7284-442c-9aea-15fee620737c", 4 | "location": "s3://bucket/test/location", 5 | "last-updated-ms": 1602638573874, 6 | "last-column-id": 3, 7 | "schema": { 8 | "type": "struct", 9 | "fields": [ 10 | { 11 | "id": 1, 12 | "name": "x", 13 | "required": true, 14 | "type": "long" 15 | }, 16 | { 17 | "id": 2, 18 | "name": "y", 19 | "required": true, 20 | "type": "long", 21 | "doc": "comment" 22 | }, 23 | { 24 | "id": 3, 25 | "name": "z", 26 | "required": true, 27 | "type": "long" 28 | } 29 | ] 30 | }, 31 | "partition-spec": [ 32 | { 33 | "name": "x", 34 | "transform": "identity", 35 | "source-id": 1, 36 | "field-id": 1000 37 | } 38 | ], 39 | "properties": {}, 40 | "current-snapshot-id": -1, 41 | "snapshots": [] 42 | } -------------------------------------------------------------------------------- /crates/iceberg/testdata/table_metadata/TableMetadataV2CurrentSchemaNotFound.json: -------------------------------------------------------------------------------- 1 | { 2 | "format-version": 2, 3 | "table-uuid": "9c12d441-03fe-4693-9a96-a0705ddf69c1", 4 | "location": "s3://bucket/test/location", 5 | "last-sequence-number": 34, 6 | "last-updated-ms": 1602638573590, 7 | "last-column-id": 3, 8 | "current-schema-id": 2, 9 | "schemas": [ 10 | { 11 | "type": "struct", 12 | "schema-id": 0, 13 | "fields": [ 14 | { 15 | "id": 1, 16 | "name": "x", 17 | "required": true, 18 | "type": "long" 19 | } 20 | ] 21 | }, 22 | { 23 | "type": "struct", 24 | "schema-id": 1, 25 | "fields": [ 26 | { 27 | "id": 1, 28 | "name": "x", 29 | "required": true, 30 | "type": "long" 31 | }, 32 | { 33 | "id": 2, 34 | "name": "y", 35 | "required": true, 36 | "type": "long", 37 | "doc": "comment" 38 | }, 39 | { 40 | "id": 3, 41 | "name": "z", 42 | "required": true, 43 | "type": "long" 44 | } 45 | ] 46 | } 47 | ], 48 | "default-spec-id": 0, 49 | "partition-specs": [ 50 | { 51 | "spec-id": 0, 52 | "fields": [ 53 | { 54 | "name": "x", 55 | "transform": "identity", 56 | "source-id": 1, 57 | "field-id": 1000 58 | } 59 | ] 60 | } 61 | ], 62 | "last-partition-id": 1000, 63 | "default-sort-order-id": 3, 64 | "sort-orders": [ 65 | { 66 | "order-id": 3, 67 | "fields": [ 68 | { 69 | "transform": "identity", 70 | "source-id": 2, 71 | "direction": "asc", 72 | "null-order": "nulls-first" 73 | }, 74 | { 75 | "transform": "bucket[4]", 76 | "source-id": 3, 77 | "direction": "desc", 78 | "null-order": "nulls-last" 79 | } 80 | ] 81 | } 82 | ], 83 | "properties": {}, 84 | "current-snapshot-id": -1, 85 | "snapshots": [], 86 | "snapshot-log": [], 87 | "metadata-log": [] 88 | } -------------------------------------------------------------------------------- /crates/iceberg/testdata/table_metadata/TableMetadataV2MissingLastPartitionId.json: -------------------------------------------------------------------------------- 1 | { 2 | "format-version": 2, 3 | "table-uuid": "9c12d441-03fe-4693-9a96-a0705ddf69c1", 4 | "location": "s3://bucket/test/location", 5 | "last-sequence-number": 34, 6 | "last-updated-ms": 1602638573590, 7 | "last-column-id": 3, 8 | "current-schema-id": 0, 9 | "schemas": [{ 10 | "type": "struct", 11 | "schema-id": 0, 12 | "fields": [ 13 | { 14 | "id": 1, 15 | "name": "x", 16 | "required": true, 17 | "type": "long" 18 | }, 19 | { 20 | "id": 2, 21 | "name": "y", 22 | "required": true, 23 | "type": "long", 24 | "doc": "comment" 25 | }, 26 | { 27 | "id": 3, 28 | "name": "z", 29 | "required": true, 30 | "type": "long" 31 | } 32 | ] 33 | }], 34 | "default-spec-id": 0, 35 | "partition-specs": [ 36 | { 37 | "spec-id": 0, 38 | "fields": [ 39 | { 40 | "name": "x", 41 | "transform": "identity", 42 | "source-id": 1, 43 | "field-id": 1000 44 | } 45 | ] 46 | } 47 | ], 48 | "default-sort-order-id": 3, 49 | "sort-orders": [ 50 | { 51 | "order-id": 3, 52 | "fields": [ 53 | { 54 | "transform": "identity", 55 | "source-id": 2, 56 | "direction": "asc", 57 | "null-order": "nulls-first" 58 | }, 59 | { 60 | "transform": "bucket[4]", 61 | "source-id": 3, 62 | "direction": "desc", 63 | "null-order": "nulls-last" 64 | } 65 | ] 66 | } 67 | ], 68 | "properties": {}, 69 | "current-snapshot-id": -1, 70 | "snapshots": [], 71 | "snapshot-log": [], 72 | "metadata-log": [] 73 | } -------------------------------------------------------------------------------- /crates/iceberg/testdata/table_metadata/TableMetadataV2MissingPartitionSpecs.json: -------------------------------------------------------------------------------- 1 | { 2 | "format-version": 2, 3 | "table-uuid": "9c12d441-03fe-4693-9a96-a0705ddf69c1", 4 | "location": "s3://bucket/test/location", 5 | "last-sequence-number": 34, 6 | "last-updated-ms": 1602638573590, 7 | "last-column-id": 3, 8 | "current-schema-id": 0, 9 | "schemas": [{ 10 | "type": "struct", 11 | "schema-id": 0, 12 | "fields": [ 13 | { 14 | "id": 1, 15 | "name": "x", 16 | "required": true, 17 | "type": "long" 18 | }, 19 | { 20 | "id": 2, 21 | "name": "y", 22 | "required": true, 23 | "type": "long", 24 | "doc": "comment" 25 | }, 26 | { 27 | "id": 3, 28 | "name": "z", 29 | "required": true, 30 | "type": "long" 31 | } 32 | ] 33 | }], 34 | "partition-spec": [ 35 | { 36 | "name": "x", 37 | "transform": "identity", 38 | "source-id": 1, 39 | "field-id": 1000 40 | } 41 | ], 42 | "default-sort-order-id": 3, 43 | "sort-orders": [ 44 | { 45 | "order-id": 3, 46 | "fields": [ 47 | { 48 | "transform": "identity", 49 | "source-id": 2, 50 | "direction": "asc", 51 | "null-order": "nulls-first" 52 | }, 53 | { 54 | "transform": "bucket[4]", 55 | "source-id": 3, 56 | "direction": "desc", 57 | "null-order": "nulls-last" 58 | } 59 | ] 60 | } 61 | ], 62 | "properties": {}, 63 | "current-snapshot-id": -1, 64 | "snapshots": [], 65 | "snapshot-log": [], 66 | "metadata-log": [] 67 | } -------------------------------------------------------------------------------- /crates/iceberg/testdata/table_metadata/TableMetadataV2MissingSchemas.json: -------------------------------------------------------------------------------- 1 | { 2 | "format-version": 2, 3 | "table-uuid": "9c12d441-03fe-4693-9a96-a0705ddf69c1", 4 | "location": "s3://bucket/test/location", 5 | "last-sequence-number": 34, 6 | "last-updated-ms": 1602638573590, 7 | "last-column-id": 3, 8 | "schema": { 9 | "type": "struct", 10 | "fields": [ 11 | { 12 | "id": 1, 13 | "name": "x", 14 | "required": true, 15 | "type": "long" 16 | }, 17 | { 18 | "id": 2, 19 | "name": "y", 20 | "required": true, 21 | "type": "long", 22 | "doc": "comment" 23 | }, 24 | { 25 | "id": 3, 26 | "name": "z", 27 | "required": true, 28 | "type": "long" 29 | } 30 | ] 31 | }, 32 | "default-spec-id": 0, 33 | "partition-specs": [ 34 | { 35 | "spec-id": 0, 36 | "fields": [ 37 | { 38 | "name": "x", 39 | "transform": "identity", 40 | "source-id": 1, 41 | "field-id": 1000 42 | } 43 | ] 44 | } 45 | ], 46 | "default-sort-order-id": 3, 47 | "sort-orders": [ 48 | { 49 | "order-id": 3, 50 | "fields": [ 51 | { 52 | "transform": "identity", 53 | "source-id": 2, 54 | "direction": "asc", 55 | "null-order": "nulls-first" 56 | }, 57 | { 58 | "transform": "bucket[4]", 59 | "source-id": 3, 60 | "direction": "desc", 61 | "null-order": "nulls-last" 62 | } 63 | ] 64 | } 65 | ], 66 | "properties": {}, 67 | "current-snapshot-id": -1, 68 | "snapshots": [], 69 | "snapshot-log": [], 70 | "metadata-log": [] 71 | } -------------------------------------------------------------------------------- /crates/iceberg/testdata/table_metadata/TableMetadataV2MissingSortOrder.json: -------------------------------------------------------------------------------- 1 | { 2 | "format-version": 2, 3 | "table-uuid": "9c12d441-03fe-4693-9a96-a0705ddf69c1", 4 | "location": "s3://bucket/test/location", 5 | "last-sequence-number": 34, 6 | "last-updated-ms": 1602638573590, 7 | "last-column-id": 3, 8 | "current-schema-id": 0, 9 | "schemas": [{ 10 | "type": "struct", 11 | "schema-id": 0, 12 | "fields": [ 13 | { 14 | "id": 1, 15 | "name": "x", 16 | "required": true, 17 | "type": "long" 18 | }, 19 | { 20 | "id": 2, 21 | "name": "y", 22 | "required": true, 23 | "type": "long", 24 | "doc": "comment" 25 | }, 26 | { 27 | "id": 3, 28 | "name": "z", 29 | "required": true, 30 | "type": "long" 31 | } 32 | ] 33 | }], 34 | "default-spec-id": 0, 35 | "partition-specs": [ 36 | { 37 | "spec-id": 0, 38 | "fields": [ 39 | { 40 | "name": "x", 41 | "transform": "identity", 42 | "source-id": 1, 43 | "field-id": 1000 44 | } 45 | ] 46 | } 47 | ], 48 | "last-partition-id": 1000, 49 | "properties": {}, 50 | "current-snapshot-id": -1, 51 | "snapshots": [], 52 | "snapshot-log": [], 53 | "metadata-log": [] 54 | } -------------------------------------------------------------------------------- /crates/iceberg/testdata/table_metadata/TableMetadataV2ValidMinimal.json: -------------------------------------------------------------------------------- 1 | { 2 | "format-version": 2, 3 | "table-uuid": "9c12d441-03fe-4693-9a96-a0705ddf69c1", 4 | "location": "s3://bucket/test/location", 5 | "last-sequence-number": 34, 6 | "last-updated-ms": 1602638573590, 7 | "last-column-id": 3, 8 | "current-schema-id": 0, 9 | "schemas": [ 10 | { 11 | "type": "struct", 12 | "schema-id": 0, 13 | "fields": [ 14 | { 15 | "id": 1, 16 | "name": "x", 17 | "required": true, 18 | "type": "long" 19 | }, 20 | { 21 | "id": 2, 22 | "name": "y", 23 | "required": true, 24 | "type": "long", 25 | "doc": "comment" 26 | }, 27 | { 28 | "id": 3, 29 | "name": "z", 30 | "required": true, 31 | "type": "long" 32 | } 33 | ] 34 | } 35 | ], 36 | "default-spec-id": 0, 37 | "partition-specs": [ 38 | { 39 | "spec-id": 0, 40 | "fields": [ 41 | { 42 | "name": "x", 43 | "transform": "identity", 44 | "source-id": 1, 45 | "field-id": 1000 46 | } 47 | ] 48 | } 49 | ], 50 | "last-partition-id": 1000, 51 | "default-sort-order-id": 3, 52 | "sort-orders": [ 53 | { 54 | "order-id": 3, 55 | "fields": [ 56 | { 57 | "transform": "identity", 58 | "source-id": 2, 59 | "direction": "asc", 60 | "null-order": "nulls-first" 61 | }, 62 | { 63 | "transform": "bucket[4]", 64 | "source-id": 3, 65 | "direction": "desc", 66 | "null-order": "nulls-last" 67 | } 68 | ] 69 | } 70 | ] 71 | } -------------------------------------------------------------------------------- /crates/iceberg/testdata/view_metadata/ViewMetadataUnsupportedVersion.json: -------------------------------------------------------------------------------- 1 | { 2 | "view-uuid": "fa6506c3-7681-40c8-86dc-e36561f83385", 3 | "format-version": 2, 4 | "location": "s3://bucket/warehouse/default.db/event_agg", 5 | "current-version-id": 1, 6 | "properties": { 7 | "comment": "Daily event counts" 8 | }, 9 | "versions": [ 10 | { 11 | "version-id": 1, 12 | "timestamp-ms": 1573518431292, 13 | "schema-id": 1, 14 | "default-catalog": "prod", 15 | "default-namespace": [ 16 | "default" 17 | ], 18 | "summary": { 19 | "engine-name": "Spark", 20 | "engineVersion": "3.3.2" 21 | }, 22 | "representations": [ 23 | { 24 | "type": "sql", 25 | "sql": "SELECT\n COUNT(1), CAST(event_ts AS DATE)\nFROM events\nGROUP BY 2", 26 | "dialect": "spark" 27 | } 28 | ] 29 | } 30 | ], 31 | "schemas": [ 32 | { 33 | "schema-id": 1, 34 | "type": "struct", 35 | "fields": [ 36 | { 37 | "id": 1, 38 | "name": "event_count", 39 | "required": false, 40 | "type": "int", 41 | "doc": "Count of events" 42 | }, 43 | { 44 | "id": 2, 45 | "name": "event_date", 46 | "required": false, 47 | "type": "date" 48 | } 49 | ] 50 | } 51 | ], 52 | "version-log": [ 53 | { 54 | "timestamp-ms": 1573518431292, 55 | "version-id": 1 56 | } 57 | ] 58 | } -------------------------------------------------------------------------------- /crates/iceberg/testdata/view_metadata/ViewMetadataV1CurrentVersionNotFound.json: -------------------------------------------------------------------------------- 1 | { 2 | "view-uuid": "fa6506c3-7681-40c8-86dc-e36561f83385", 3 | "format-version": 1, 4 | "location": "s3://bucket/warehouse/default.db/event_agg", 5 | "current-version-id": 2, 6 | "properties": { 7 | "comment": "Daily event counts" 8 | }, 9 | "versions": [ 10 | { 11 | "version-id": 1, 12 | "timestamp-ms": 1573518431292, 13 | "schema-id": 1, 14 | "default-catalog": "prod", 15 | "default-namespace": [ 16 | "default" 17 | ], 18 | "summary": { 19 | "engine-name": "Spark", 20 | "engineVersion": "3.3.2" 21 | }, 22 | "representations": [ 23 | { 24 | "type": "sql", 25 | "sql": "SELECT\n COUNT(1), CAST(event_ts AS DATE)\nFROM events\nGROUP BY 2", 26 | "dialect": "spark" 27 | } 28 | ] 29 | } 30 | ], 31 | "schemas": [ 32 | { 33 | "schema-id": 1, 34 | "type": "struct", 35 | "fields": [ 36 | { 37 | "id": 1, 38 | "name": "event_count", 39 | "required": false, 40 | "type": "int", 41 | "doc": "Count of events" 42 | }, 43 | { 44 | "id": 2, 45 | "name": "event_date", 46 | "required": false, 47 | "type": "date" 48 | } 49 | ] 50 | } 51 | ], 52 | "version-log": [ 53 | { 54 | "timestamp-ms": 1573518431292, 55 | "version-id": 1 56 | } 57 | ] 58 | } -------------------------------------------------------------------------------- /crates/iceberg/testdata/view_metadata/ViewMetadataV1MissingCurrentVersion.json: -------------------------------------------------------------------------------- 1 | { 2 | "view-uuid": "fa6506c3-7681-40c8-86dc-e36561f83385", 3 | "format-version": 1, 4 | "location": "s3://bucket/warehouse/default.db/event_agg", 5 | "properties": { 6 | "comment": "Daily event counts" 7 | }, 8 | "versions": [ 9 | { 10 | "version-id": 1, 11 | "timestamp-ms": 1573518431292, 12 | "schema-id": 1, 13 | "default-catalog": "prod", 14 | "default-namespace": [ 15 | "default" 16 | ], 17 | "summary": { 18 | "engine-name": "Spark", 19 | "engineVersion": "3.3.2" 20 | }, 21 | "representations": [ 22 | { 23 | "type": "sql", 24 | "sql": "SELECT\n COUNT(1), CAST(event_ts AS DATE)\nFROM events\nGROUP BY 2", 25 | "dialect": "spark" 26 | } 27 | ] 28 | } 29 | ], 30 | "schemas": [ 31 | { 32 | "schema-id": 1, 33 | "type": "struct", 34 | "fields": [ 35 | { 36 | "id": 1, 37 | "name": "event_count", 38 | "required": false, 39 | "type": "int", 40 | "doc": "Count of events" 41 | }, 42 | { 43 | "id": 2, 44 | "name": "event_date", 45 | "required": false, 46 | "type": "date" 47 | } 48 | ] 49 | } 50 | ], 51 | "version-log": [ 52 | { 53 | "timestamp-ms": 1573518431292, 54 | "version-id": 1 55 | } 56 | ] 57 | } -------------------------------------------------------------------------------- /crates/iceberg/testdata/view_metadata/ViewMetadataV1MissingSchema.json: -------------------------------------------------------------------------------- 1 | { 2 | "view-uuid": "fa6506c3-7681-40c8-86dc-e36561f83385", 3 | "format-version": 1, 4 | "location": "s3://bucket/warehouse/default.db/event_agg", 5 | "properties": { 6 | "comment": "Daily event counts" 7 | }, 8 | "versions": [ 9 | { 10 | "version-id": 1, 11 | "timestamp-ms": 1573518431292, 12 | "default-catalog": "prod", 13 | "default-namespace": [ 14 | "default" 15 | ], 16 | "summary": { 17 | "engine-name": "Spark", 18 | "engineVersion": "3.3.2" 19 | }, 20 | "representations": [ 21 | { 22 | "type": "sql", 23 | "sql": "SELECT\n COUNT(1), CAST(event_ts AS DATE)\nFROM events\nGROUP BY 2", 24 | "dialect": "spark" 25 | } 26 | ] 27 | } 28 | ], 29 | "schemas": [ 30 | { 31 | "schema-id": 1, 32 | "type": "struct", 33 | "fields": [ 34 | { 35 | "id": 1, 36 | "name": "event_count", 37 | "required": false, 38 | "type": "int", 39 | "doc": "Count of events" 40 | }, 41 | { 42 | "id": 2, 43 | "name": "event_date", 44 | "required": false, 45 | "type": "date" 46 | } 47 | ] 48 | } 49 | ], 50 | "version-log": [ 51 | { 52 | "timestamp-ms": 1573518431292, 53 | "version-id": 1 54 | } 55 | ] 56 | } -------------------------------------------------------------------------------- /crates/iceberg/testdata/view_metadata/ViewMetadataV1SchemaNotFound.json: -------------------------------------------------------------------------------- 1 | { 2 | "view-uuid": "fa6506c3-7681-40c8-86dc-e36561f83385", 3 | "format-version": 1, 4 | "location": "s3://bucket/warehouse/default.db/event_agg", 5 | "current-version-id": 1, 6 | "properties": { 7 | "comment": "Daily event counts" 8 | }, 9 | "versions": [ 10 | { 11 | "version-id": 1, 12 | "timestamp-ms": 1573518431292, 13 | "schema-id": 2, 14 | "default-catalog": "prod", 15 | "default-namespace": [ 16 | "default" 17 | ], 18 | "summary": { 19 | "engine-name": "Spark", 20 | "engineVersion": "3.3.2" 21 | }, 22 | "representations": [ 23 | { 24 | "type": "sql", 25 | "sql": "SELECT\n COUNT(1), CAST(event_ts AS DATE)\nFROM events\nGROUP BY 2", 26 | "dialect": "spark" 27 | } 28 | ] 29 | } 30 | ], 31 | "schemas": [ 32 | { 33 | "schema-id": 1, 34 | "type": "struct", 35 | "fields": [ 36 | { 37 | "id": 1, 38 | "name": "event_count", 39 | "required": false, 40 | "type": "int", 41 | "doc": "Count of events" 42 | }, 43 | { 44 | "id": 2, 45 | "name": "event_date", 46 | "required": false, 47 | "type": "date" 48 | } 49 | ] 50 | } 51 | ], 52 | "version-log": [ 53 | { 54 | "timestamp-ms": 1573518431292, 55 | "version-id": 1 56 | } 57 | ] 58 | } -------------------------------------------------------------------------------- /crates/iceberg/testdata/view_metadata/ViewMetadataV1Valid.json: -------------------------------------------------------------------------------- 1 | { 2 | "view-uuid": "fa6506c3-7681-40c8-86dc-e36561f83385", 3 | "format-version": 1, 4 | "location": "s3://bucket/warehouse/default.db/event_agg", 5 | "current-version-id": 1, 6 | "properties": { 7 | "comment": "Daily event counts" 8 | }, 9 | "versions": [ 10 | { 11 | "version-id": 1, 12 | "timestamp-ms": 1573518431292, 13 | "schema-id": 1, 14 | "default-catalog": "prod", 15 | "default-namespace": [ 16 | "default" 17 | ], 18 | "summary": { 19 | "engine-name": "Spark", 20 | "engineVersion": "3.3.2" 21 | }, 22 | "representations": [ 23 | { 24 | "type": "sql", 25 | "sql": "SELECT\n COUNT(1), CAST(event_ts AS DATE)\nFROM events\nGROUP BY 2", 26 | "dialect": "spark" 27 | } 28 | ] 29 | } 30 | ], 31 | "schemas": [ 32 | { 33 | "schema-id": 1, 34 | "type": "struct", 35 | "fields": [ 36 | { 37 | "id": 1, 38 | "name": "event_count", 39 | "required": false, 40 | "type": "int", 41 | "doc": "Count of events" 42 | }, 43 | { 44 | "id": 2, 45 | "name": "event_date", 46 | "required": false, 47 | "type": "date" 48 | } 49 | ] 50 | } 51 | ], 52 | "version-log": [ 53 | { 54 | "timestamp-ms": 1573518431292, 55 | "version-id": 1 56 | } 57 | ] 58 | } -------------------------------------------------------------------------------- /crates/integration_tests/Cargo.toml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | [package] 19 | edition = { workspace = true } 20 | homepage = { workspace = true } 21 | license = { workspace = true } 22 | name = "iceberg-integration-tests" 23 | repository = { workspace = true } 24 | rust-version = { workspace = true } 25 | version = { workspace = true } 26 | 27 | [dependencies] 28 | arrow-array = { workspace = true } 29 | arrow-schema = { workspace = true } 30 | ctor = { workspace = true } 31 | datafusion = { workspace = true } 32 | futures = { workspace = true } 33 | iceberg = { workspace = true } 34 | iceberg-catalog-rest = { workspace = true } 35 | iceberg-datafusion = { workspace = true } 36 | iceberg_test_utils = { path = "../test_utils", features = ["tests"] } 37 | parquet = { workspace = true } 38 | tokio = { workspace = true } 39 | uuid = { workspace = true } 40 | ordered-float = "2.10.1" 41 | -------------------------------------------------------------------------------- /crates/integration_tests/src/lib.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | use std::collections::HashMap; 19 | 20 | use iceberg::io::{S3_ACCESS_KEY_ID, S3_ENDPOINT, S3_REGION, S3_SECRET_ACCESS_KEY}; 21 | use iceberg_catalog_rest::RestCatalogConfig; 22 | use iceberg_test_utils::docker::DockerCompose; 23 | use iceberg_test_utils::{normalize_test_name, set_up}; 24 | 25 | const REST_CATALOG_PORT: u16 = 8181; 26 | 27 | pub struct TestFixture { 28 | pub _docker_compose: DockerCompose, 29 | pub catalog_config: RestCatalogConfig, 30 | } 31 | 32 | pub fn set_test_fixture(func: &str) -> TestFixture { 33 | set_up(); 34 | let docker_compose = DockerCompose::new( 35 | normalize_test_name(format!("{}_{func}", module_path!())), 36 | format!("{}/testdata", env!("CARGO_MANIFEST_DIR")), 37 | ); 38 | 39 | // Stop any containers from previous runs and start new ones 40 | docker_compose.down(); 41 | docker_compose.up(); 42 | 43 | let rest_catalog_ip = docker_compose.get_container_ip("rest"); 44 | let minio_ip = docker_compose.get_container_ip("minio"); 45 | 46 | let catalog_config = RestCatalogConfig::builder() 47 | .uri(format!("http://{}:{}", rest_catalog_ip, REST_CATALOG_PORT)) 48 | .props(HashMap::from([ 49 | ( 50 | S3_ENDPOINT.to_string(), 51 | format!("http://{}:{}", minio_ip, 9000), 52 | ), 53 | (S3_ACCESS_KEY_ID.to_string(), "admin".to_string()), 54 | (S3_SECRET_ACCESS_KEY.to_string(), "password".to_string()), 55 | (S3_REGION.to_string(), "us-east-1".to_string()), 56 | ])) 57 | .build(); 58 | 59 | TestFixture { 60 | _docker_compose: docker_compose, 61 | catalog_config, 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /crates/integration_tests/testdata/spark/Dockerfile: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | FROM python:3.9-bullseye 17 | 18 | RUN apt-get -qq update && \ 19 | apt-get -qq install -y --no-install-recommends sudo curl openjdk-11-jdk && \ 20 | apt-get -qq clean && \ 21 | rm -rf /var/lib/apt/lists/* 22 | 23 | ENV SPARK_HOME=${SPARK_HOME:-"/opt/spark"} 24 | ENV HADOOP_HOME=${HADOOP_HOME:-"/opt/hadoop"} 25 | ENV PYTHONPATH=$SPARK_HOME/python:$SPARK_HOME/python/lib/py4j-0.10.9.7-src.zip:$PYTHONPATH 26 | 27 | RUN mkdir -p ${HADOOP_HOME} && mkdir -p ${SPARK_HOME} && mkdir -p /home/iceberg/spark-events 28 | WORKDIR ${SPARK_HOME} 29 | 30 | ENV SPARK_VERSION=3.5.6 31 | ENV ICEBERG_SPARK_RUNTIME_VERSION=3.5_2.12 32 | ENV ICEBERG_VERSION=1.6.0 33 | 34 | RUN curl --retry 5 -s -C - https://dlcdn.apache.org/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop3.tgz -o spark-${SPARK_VERSION}-bin-hadoop3.tgz \ 35 | && tar xzf spark-${SPARK_VERSION}-bin-hadoop3.tgz --directory /opt/spark --strip-components 1 \ 36 | && rm -rf spark-${SPARK_VERSION}-bin-hadoop3.tgz 37 | 38 | # Download iceberg spark runtime 39 | RUN curl --retry 5 -s https://repo1.maven.org/maven2/org/apache/iceberg/iceberg-spark-runtime-${ICEBERG_SPARK_RUNTIME_VERSION}/${ICEBERG_VERSION}/iceberg-spark-runtime-${ICEBERG_SPARK_RUNTIME_VERSION}-${ICEBERG_VERSION}.jar -Lo iceberg-spark-runtime-${ICEBERG_SPARK_RUNTIME_VERSION}-${ICEBERG_VERSION}.jar \ 40 | && mv iceberg-spark-runtime-${ICEBERG_SPARK_RUNTIME_VERSION}-${ICEBERG_VERSION}.jar /opt/spark/jars 41 | 42 | # Download AWS bundle 43 | RUN curl --retry 5 -s https://repo1.maven.org/maven2/org/apache/iceberg/iceberg-aws-bundle/${ICEBERG_VERSION}/iceberg-aws-bundle-${ICEBERG_VERSION}.jar -Lo /opt/spark/jars/iceberg-aws-bundle-${ICEBERG_VERSION}.jar 44 | 45 | COPY spark-defaults.conf /opt/spark/conf 46 | ENV PATH="/opt/spark/sbin:/opt/spark/bin:${PATH}" 47 | 48 | RUN chmod u+x /opt/spark/sbin/* && \ 49 | chmod u+x /opt/spark/bin/* 50 | 51 | WORKDIR '/home/' 52 | 53 | COPY entrypoint.sh . 54 | COPY provision.py . 55 | 56 | HEALTHCHECK --retries=120 --interval=1s \ 57 | CMD ls /tmp/ready || exit 1 58 | 59 | ENTRYPOINT ["./entrypoint.sh"] 60 | -------------------------------------------------------------------------------- /crates/integration_tests/testdata/spark/entrypoint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Licensed to the Apache Software Foundation (ASF) under one 4 | # or more contributor license agreements. See the NOTICE file 5 | # distributed with this work for additional information 6 | # regarding copyright ownership. The ASF licenses this file 7 | # to you under the Apache License, Version 2.0 (the 8 | # "License"); you may not use this file except in compliance 9 | # with the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, 14 | # software distributed under the License is distributed on an 15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | # KIND, either express or implied. See the License for the 17 | # specific language governing permissions and limitations 18 | # under the License. 19 | # 20 | 21 | set -e 22 | 23 | start-master.sh -p 7077 24 | start-worker.sh spark://spark-iceberg:7077 25 | start-history-server.sh 26 | 27 | python3 ./provision.py 28 | 29 | touch /tmp/ready 30 | 31 | tail -f /dev/null 32 | -------------------------------------------------------------------------------- /crates/integration_tests/testdata/spark/spark-defaults.conf: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | spark.sql.extensions org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions 19 | spark.sql.catalog.rest org.apache.iceberg.spark.SparkCatalog 20 | spark.sql.catalog.rest.type rest 21 | spark.sql.catalog.rest.uri http://rest:8181 22 | spark.sql.catalog.rest.io-impl org.apache.iceberg.aws.s3.S3FileIO 23 | spark.sql.catalog.rest.warehouse s3://warehouse/rest/ 24 | spark.sql.catalog.rest.s3.endpoint http://minio:9000 25 | spark.sql.defaultCatalog rest 26 | spark.eventLog.enabled true 27 | spark.eventLog.dir /home/iceberg/spark-events 28 | spark.history.fs.logDirectory /home/iceberg/spark-events 29 | spark.sql.catalogImplementation in-memory 30 | -------------------------------------------------------------------------------- /crates/integration_tests/tests/shared.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | use std::sync::{Arc, OnceLock}; 19 | 20 | use ctor::dtor; 21 | use iceberg_integration_tests::{TestFixture, set_test_fixture}; 22 | 23 | pub mod shared_tests; 24 | 25 | static DOCKER_CONTAINERS: OnceLock> = OnceLock::new(); 26 | 27 | pub fn get_shared_containers() -> &'static Arc { 28 | DOCKER_CONTAINERS.get_or_init(|| Arc::new(set_test_fixture("shared_tests"))) 29 | } 30 | 31 | #[dtor] 32 | fn shutdown() { 33 | if let Some(fixture) = DOCKER_CONTAINERS.get() { 34 | fixture._docker_compose.down() 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /crates/integration_tests/tests/shared_tests/mod.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | use std::collections::HashMap; 19 | 20 | use iceberg::spec::{NestedField, PrimitiveType, Schema, Type}; 21 | use iceberg::{Catalog, Namespace, NamespaceIdent}; 22 | use iceberg_catalog_rest::RestCatalog; 23 | 24 | use crate::get_shared_containers; 25 | 26 | mod append_data_file_test; 27 | mod append_partition_data_file_test; 28 | mod conflict_commit_test; 29 | mod datafusion; 30 | mod read_evolved_schema; 31 | mod read_positional_deletes; 32 | mod scan_all_type; 33 | 34 | pub async fn random_ns() -> Namespace { 35 | let fixture = get_shared_containers(); 36 | let rest_catalog = RestCatalog::new(fixture.catalog_config.clone()); 37 | 38 | let ns = Namespace::with_properties( 39 | NamespaceIdent::from_strs([uuid::Uuid::new_v4().to_string()]).unwrap(), 40 | HashMap::from([ 41 | ("owner".to_string(), "ray".to_string()), 42 | ("community".to_string(), "apache".to_string()), 43 | ]), 44 | ); 45 | 46 | rest_catalog 47 | .create_namespace(ns.name(), ns.properties().clone()) 48 | .await 49 | .unwrap(); 50 | 51 | ns 52 | } 53 | 54 | fn test_schema() -> Schema { 55 | Schema::builder() 56 | .with_schema_id(1) 57 | .with_identifier_field_ids(vec![2]) 58 | .with_fields(vec![ 59 | NestedField::optional(1, "foo", Type::Primitive(PrimitiveType::String)).into(), 60 | NestedField::required(2, "bar", Type::Primitive(PrimitiveType::Int)).into(), 61 | NestedField::optional(3, "baz", Type::Primitive(PrimitiveType::Boolean)).into(), 62 | ]) 63 | .build() 64 | .unwrap() 65 | } 66 | -------------------------------------------------------------------------------- /crates/integration_tests/tests/shared_tests/read_positional_deletes.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | //! Integration tests for rest catalog. 19 | 20 | use futures::TryStreamExt; 21 | use iceberg::ErrorKind::FeatureUnsupported; 22 | use iceberg::{Catalog, TableIdent}; 23 | use iceberg_catalog_rest::RestCatalog; 24 | 25 | use crate::get_shared_containers; 26 | 27 | #[tokio::test] 28 | async fn test_read_table_with_positional_deletes() { 29 | let fixture = get_shared_containers(); 30 | let rest_catalog = RestCatalog::new(fixture.catalog_config.clone()); 31 | 32 | let table = rest_catalog 33 | .load_table( 34 | &TableIdent::from_strs(["default", "test_positional_merge_on_read_double_deletes"]) 35 | .unwrap(), 36 | ) 37 | .await 38 | .unwrap(); 39 | 40 | let scan = table 41 | .scan() 42 | .with_delete_file_processing_enabled(true) 43 | .build() 44 | .unwrap(); 45 | println!("{:?}", scan); 46 | 47 | let plan: Vec<_> = scan 48 | .plan_files() 49 | .await 50 | .unwrap() 51 | .try_collect() 52 | .await 53 | .unwrap(); 54 | println!("{:?}", plan); 55 | 56 | // Scan plan phase should include delete files in file plan 57 | // when with_delete_file_processing_enabled == true 58 | assert_eq!(plan[0].deletes.len(), 2); 59 | 60 | // 😱 If we don't support positional deletes, we should fail when we try to read a table that 61 | // has positional deletes. The table has 12 rows, and 2 are deleted, see provision.py 62 | let result = scan.to_arrow().await.unwrap().try_collect::>().await; 63 | 64 | assert!(result.is_err_and(|e| e.kind() == FeatureUnsupported)); 65 | 66 | // When we get support for it: 67 | // let batch_stream = scan.to_arrow().await.unwrap(); 68 | // let batches: Vec<_> = batch_stream.try_collect().await.is_err(); 69 | // let num_rows: usize = batches.iter().map(|v| v.num_rows()).sum(); 70 | // assert_eq!(num_rows, 10); 71 | } 72 | -------------------------------------------------------------------------------- /crates/integrations/cache-moka/Cargo.toml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | [package] 19 | name = "iceberg-cache-moka" 20 | 21 | edition = { workspace = true } 22 | homepage = { workspace = true } 23 | license = { workspace = true } 24 | repository = { workspace = true } 25 | rust-version = { workspace = true } 26 | version = { workspace = true } 27 | 28 | [dependencies] 29 | iceberg = { workspace = true } 30 | moka = { version = "0.12.10", features = ["sync"] } 31 | -------------------------------------------------------------------------------- /crates/integrations/cache-moka/README.md: -------------------------------------------------------------------------------- 1 | 19 | 20 | # Apache Iceberg Rust Cache Moka 21 | 22 | This crate provides a [moka](https://github.com/moka-rs/moka) cache implementation for Apache Iceberg Rust. It is used to cache data in memory for faster access. 23 | -------------------------------------------------------------------------------- /crates/integrations/cli/Cargo.toml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | [package] 19 | description = "Apache iceberg client" 20 | edition.workspace = true 21 | homepage.workspace = true 22 | license.workspace = true 23 | name = "iceberg-cli" 24 | readme = "README.md" 25 | repository.workspace = true 26 | rust-version.workspace = true 27 | version.workspace = true 28 | 29 | [dependencies] 30 | anyhow = { workspace = true } 31 | clap = { workspace = true } 32 | datafusion = { workspace = true } 33 | datafusion-cli = { workspace = true } 34 | dirs = { workspace = true } 35 | fs-err = { workspace = true } 36 | iceberg-catalog-rest = { workspace = true } 37 | iceberg-datafusion = { workspace = true } 38 | mimalloc = { workspace = true } 39 | stacker = { workspace = true } 40 | tokio = { workspace = true } 41 | toml = { workspace = true } 42 | tracing = { workspace = true } 43 | tracing-subscriber = { workspace = true } 44 | 45 | [package.metadata.cargo-machete] 46 | # These dependencies are added to ensure minimal dependency version 47 | ignored = ["stacker", "mimalloc"] 48 | -------------------------------------------------------------------------------- /crates/integrations/cli/README.md: -------------------------------------------------------------------------------- 1 | 19 | 20 | 21 | # Introduction 22 | 23 | Iceberg CLI (`iceberg-cli`) is a small command line utility that runs SQL queries against tables, 24 | which is backed by the DataFusion engine. 25 | 26 | -------------------------------------------------------------------------------- /crates/integrations/cli/src/lib.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | #![doc = include_str!("../README.md")] 19 | pub const ICEBERG_CLI_VERSION: &str = env!("CARGO_PKG_VERSION"); 20 | 21 | mod catalog; 22 | pub use catalog::*; 23 | -------------------------------------------------------------------------------- /crates/integrations/datafusion/Cargo.toml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | [package] 19 | edition = { workspace = true } 20 | homepage = { workspace = true } 21 | name = "iceberg-datafusion" 22 | rust-version = { workspace = true } 23 | version = { workspace = true } 24 | 25 | categories = ["database"] 26 | description = "Apache Iceberg DataFusion Integration" 27 | keywords = ["iceberg", "integrations", "datafusion"] 28 | license = { workspace = true } 29 | repository = { workspace = true } 30 | 31 | [dependencies] 32 | anyhow = { workspace = true } 33 | async-trait = { workspace = true } 34 | datafusion = { workspace = true } 35 | futures = { workspace = true } 36 | iceberg = { workspace = true } 37 | tokio = { workspace = true } 38 | 39 | [dev-dependencies] 40 | iceberg-catalog-memory = { workspace = true } 41 | parquet = { workspace = true } 42 | tempfile = { workspace = true } 43 | -------------------------------------------------------------------------------- /crates/integrations/datafusion/README.md: -------------------------------------------------------------------------------- 1 | 19 | 20 | # Apache Iceberg DataFusion Integration 21 | 22 | This crate contains the integration of Apache DataFusion and Apache Iceberg. 23 | -------------------------------------------------------------------------------- /crates/integrations/datafusion/src/error.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | use anyhow::anyhow; 19 | use iceberg::{Error, ErrorKind}; 20 | 21 | /// Converts a datafusion error into an iceberg error. 22 | pub fn from_datafusion_error(error: datafusion::error::DataFusionError) -> Error { 23 | Error::new( 24 | ErrorKind::Unexpected, 25 | "Operation failed for hitting datafusion error".to_string(), 26 | ) 27 | .with_source(anyhow!("datafusion error: {:?}", error)) 28 | } 29 | /// Converts an iceberg error into a datafusion error. 30 | pub fn to_datafusion_error(error: Error) -> datafusion::error::DataFusionError { 31 | datafusion::error::DataFusionError::External(error.into()) 32 | } 33 | -------------------------------------------------------------------------------- /crates/integrations/datafusion/src/lib.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | mod catalog; 19 | pub use catalog::*; 20 | 21 | mod error; 22 | pub use error::*; 23 | 24 | mod physical_plan; 25 | mod schema; 26 | pub mod table; 27 | pub use table::table_provider_factory::IcebergTableProviderFactory; 28 | pub use table::*; 29 | -------------------------------------------------------------------------------- /crates/integrations/datafusion/src/physical_plan/mod.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | pub(crate) mod expr_to_predicate; 19 | pub(crate) mod scan; 20 | -------------------------------------------------------------------------------- /crates/sqllogictest/Cargo.toml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | [package] 19 | edition = { workspace = true } 20 | homepage = { workspace = true } 21 | license = { workspace = true } 22 | name = "iceberg-sqllogictest" 23 | repository = { workspace = true } 24 | rust-version = { workspace = true } 25 | version = { workspace = true } 26 | 27 | [dependencies] 28 | anyhow = { workspace = true } 29 | async-trait = { workspace = true } 30 | datafusion = { workspace = true } 31 | datafusion-sqllogictest = { workspace = true } 32 | enum-ordinalize = { workspace = true } 33 | indicatif = { workspace = true } 34 | sqllogictest = { workspace = true } 35 | toml = { workspace = true } 36 | 37 | [package.metadata.cargo-machete] 38 | # These dependencies are added to ensure minimal dependency version 39 | ignored = ["enum-ordinalize"] 40 | -------------------------------------------------------------------------------- /crates/sqllogictest/README.md: -------------------------------------------------------------------------------- 1 | 19 | 20 | This crate contains a suite of [sqllogictest](https://crates.io/crates/sqllogictest) tests that are used to validate [iceberg-rust](https://github.com/apache/iceberg-rust). 21 | 22 | ## Running the tests 23 | 24 | Just run the following command: 25 | 26 | ```bash 27 | cargo test 28 | ``` 29 | 30 | ## Sql Engines 31 | 32 | The tests are run against the following sql engines: 33 | 34 | * [Apache datafusion](https://crates.io/crates/datafusion) 35 | * [Apache spark](https://github.com/apache/spark) -------------------------------------------------------------------------------- /crates/sqllogictest/src/engine/datafusion.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | use std::path::{Path, PathBuf}; 19 | use std::sync::Arc; 20 | 21 | use anyhow::{Context, anyhow}; 22 | use datafusion::catalog::CatalogProvider; 23 | use datafusion::prelude::{SessionConfig, SessionContext}; 24 | use datafusion_sqllogictest::DataFusion; 25 | use indicatif::ProgressBar; 26 | use sqllogictest::runner::AsyncDB; 27 | use toml::Table as TomlTable; 28 | 29 | use crate::engine::Engine; 30 | use crate::error::Result; 31 | 32 | pub struct DataFusionEngine { 33 | datafusion: DataFusion, 34 | } 35 | 36 | #[async_trait::async_trait] 37 | impl Engine for DataFusionEngine { 38 | async fn new(config: TomlTable) -> Result { 39 | let session_config = SessionConfig::new().with_target_partitions(4); 40 | let ctx = SessionContext::new_with_config(session_config); 41 | ctx.register_catalog("default", Self::create_catalog(&config).await?); 42 | 43 | Ok(Self { 44 | datafusion: DataFusion::new(ctx, PathBuf::from("testdata"), ProgressBar::new(100)), 45 | }) 46 | } 47 | 48 | async fn run_slt_file(&mut self, path: &Path) -> Result<()> { 49 | let content = std::fs::read_to_string(path) 50 | .with_context(|| format!("Failed to read slt file {:?}", path)) 51 | .map_err(|e| anyhow!(e))?; 52 | 53 | self.datafusion 54 | .run(content.as_str()) 55 | .await 56 | .with_context(|| format!("Failed to run slt file {:?}", path)) 57 | .map_err(|e| anyhow!(e))?; 58 | 59 | Ok(()) 60 | } 61 | } 62 | 63 | impl DataFusionEngine { 64 | async fn create_catalog(_: &TomlTable) -> anyhow::Result> { 65 | todo!() 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /crates/sqllogictest/src/engine/mod.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | mod datafusion; 19 | 20 | use std::path::Path; 21 | 22 | use toml::Table as TomlTable; 23 | 24 | use crate::error::Result; 25 | 26 | #[async_trait::async_trait] 27 | pub trait Engine: Sized { 28 | async fn new(config: TomlTable) -> Result; 29 | async fn run_slt_file(&mut self, path: &Path) -> Result<()>; 30 | } 31 | -------------------------------------------------------------------------------- /crates/sqllogictest/src/error.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | use std::fmt::{Debug, Display, Formatter}; 19 | 20 | pub struct Error(pub anyhow::Error); 21 | 22 | pub type Result = std::result::Result; 23 | 24 | impl Debug for Error { 25 | fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { 26 | write!(f, "{:?}", self.0) 27 | } 28 | } 29 | 30 | impl Display for Error { 31 | fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { 32 | write!(f, "{}", self.0) 33 | } 34 | } 35 | 36 | impl std::error::Error for Error { 37 | fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { 38 | self.0.source() 39 | } 40 | } 41 | 42 | impl From for Error { 43 | fn from(value: anyhow::Error) -> Self { 44 | Self(value) 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /crates/sqllogictest/src/lib.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | // This lib contains codes copied from 19 | // [Apache Datafusion](https://github.com/apache/datafusion/tree/main/datafusion/sqllogictest) 20 | 21 | #[allow(dead_code)] 22 | mod engine; 23 | #[allow(dead_code)] 24 | mod error; 25 | -------------------------------------------------------------------------------- /crates/test_utils/Cargo.toml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | [package] 19 | edition = { workspace = true } 20 | homepage = { workspace = true } 21 | name = "iceberg_test_utils" 22 | rust-version = { workspace = true } 23 | version = { workspace = true } 24 | 25 | license = { workspace = true } 26 | repository = { workspace = true } 27 | 28 | [dependencies] 29 | tracing = { workspace = true } 30 | tracing-subscriber = { workspace = true } 31 | 32 | [features] 33 | tests = [] 34 | -------------------------------------------------------------------------------- /crates/test_utils/DEPENDENCIES.rust.tsv: -------------------------------------------------------------------------------- 1 | crate Apache-2.0 MIT Unicode-3.0 2 | cfg-if@1.0.0 X X 3 | iceberg_test_utils@0.5.1 X 4 | lazy_static@1.5.0 X X 5 | log@0.4.25 X X 6 | nu-ansi-term@0.46.0 X 7 | once_cell@1.21.1 X X 8 | overload@0.1.1 X 9 | pin-project-lite@0.2.16 X X 10 | proc-macro2@1.0.93 X X 11 | quote@1.0.40 X X 12 | sharded-slab@0.1.7 X 13 | smallvec@1.14.0 X X 14 | syn@2.0.101 X X 15 | thread_local@1.1.8 X X 16 | tracing@0.1.41 X 17 | tracing-attributes@0.1.28 X 18 | tracing-core@0.1.33 X 19 | tracing-log@0.2.0 X 20 | tracing-subscriber@0.3.19 X 21 | unicode-ident@1.0.16 X X X 22 | winapi@0.3.9 X X 23 | winapi-i686-pc-windows-gnu@0.4.0 X X 24 | winapi-x86_64-pc-windows-gnu@0.4.0 X X 25 | -------------------------------------------------------------------------------- /crates/test_utils/src/cmd.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | use std::process::Command; 19 | 20 | use tracing::info; 21 | 22 | pub fn run_command(mut cmd: Command, desc: impl ToString) { 23 | let desc = desc.to_string(); 24 | info!("Starting to {}, command: {:?}", &desc, cmd); 25 | let exit = cmd.status().unwrap(); 26 | if exit.success() { 27 | info!("{} succeed!", desc) 28 | } else { 29 | panic!("{} failed: {:?}", desc, exit); 30 | } 31 | } 32 | 33 | pub fn get_cmd_output_result(mut cmd: Command, desc: impl ToString) -> Result { 34 | let desc = desc.to_string(); 35 | info!("Starting to {}, command: {:?}", &desc, cmd); 36 | let result = cmd.output(); 37 | match result { 38 | Ok(output) => { 39 | if output.status.success() { 40 | info!("{} succeed!", desc); 41 | Ok(String::from_utf8(output.stdout).unwrap()) 42 | } else { 43 | Err(format!("{} failed with rc: {:?}", desc, output.status)) 44 | } 45 | } 46 | Err(err) => Err(format!("{} failed with error: {}", desc, { err })), 47 | } 48 | } 49 | 50 | pub fn get_cmd_output(cmd: Command, desc: impl ToString) -> String { 51 | let result = get_cmd_output_result(cmd, desc); 52 | match result { 53 | Ok(output_str) => output_str, 54 | Err(err) => panic!("{}", err), 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /crates/test_utils/src/lib.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | //! This crate contains common utilities for testing. 19 | //! 20 | //! It's not intended for use outside of `iceberg-rust`. 21 | 22 | #[cfg(feature = "tests")] 23 | mod cmd; 24 | #[cfg(feature = "tests")] 25 | pub mod docker; 26 | 27 | #[cfg(feature = "tests")] 28 | pub use common::*; 29 | 30 | #[cfg(feature = "tests")] 31 | mod common { 32 | use std::sync::Once; 33 | 34 | static INIT: Once = Once::new(); 35 | pub fn set_up() { 36 | INIT.call_once(tracing_subscriber::fmt::init); 37 | } 38 | pub fn normalize_test_name(s: impl ToString) -> String { 39 | s.to_string().replace("::", "__").replace('.', "_") 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /deny.toml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | [licenses] 19 | allow = [ 20 | "Apache-2.0", 21 | "Apache-2.0 WITH LLVM-exception", 22 | "MIT", 23 | "BSD-2-Clause", 24 | "BSD-3-Clause", 25 | "ISC", 26 | "CC0-1.0", 27 | "Zlib", 28 | "CDLA-Permissive-2.0", 29 | # Category-A: https://issues.apache.org/jira/browse/LEGAL-660 30 | "Unicode-3.0", 31 | # Boost Software License Version 1.0 is allowed (Category-A): 32 | # https://www.apache.org/legal/resolved.html#category-a 33 | "BSL-1.0" 34 | ] 35 | 36 | exceptions = [ 37 | # The MPL license is allowed (binary-only): 38 | # https://www.apache.org/legal/resolved.html#category-b 39 | { allow = ["MPL-2.0"], crate = "webpki-roots" }, 40 | { allow = ["MPL-2.0"], crate = "generational-arena" }, 41 | { allow = ["MPL-2.0"], crate = "option-ext" }, 42 | ] -------------------------------------------------------------------------------- /docs/contributing/orbstack.md: -------------------------------------------------------------------------------- 1 | 19 | 20 | # OrbStack as a docker alternative on macOS 21 | 1. Install OrbStack by downloading [installer](https://orbstack.dev/download) or using Homebrew. 22 | ```shell 23 | brew install orbstack 24 | ``` 25 | 26 | 2. Migrate Docker data 27 | ```shell 28 | orbstack migrate docker 29 | ``` 30 | 31 | 3. (Optional) Add registry mirrors 32 | 33 | You can edit the config directly at `~/.orbstack/config/docker.json` and restart the engine with `orb restart docker`. 34 | 35 | ``` 36 | { 37 | "registry-mirrors": [""] 38 | } 39 | ``` -------------------------------------------------------------------------------- /rust-toolchain.toml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | # iceberg-rust use unstable rust to run linters, such as `clippy` and `rustfmt`. But this will not affect downstream users, 19 | # and only MSRV is required. 20 | # 21 | # The channel is exactly same day for our MSRV. 22 | [toolchain] 23 | channel = "nightly-2025-02-20" 24 | components = ["rustfmt", "clippy"] 25 | -------------------------------------------------------------------------------- /rustfmt.toml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | edition = "2021" 19 | reorder_imports = true 20 | 21 | format_code_in_doc_comments = true 22 | group_imports = "StdExternalCrate" 23 | imports_granularity = "Module" 24 | overflow_delimited_expr = true 25 | trailing_comma = "Vertical" 26 | where_single_line = true 27 | -------------------------------------------------------------------------------- /scripts/release.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, 13 | # software distributed under the License is distributed on an 14 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | # KIND, either express or implied. See the License for the 16 | # specific language governing permissions and limitations 17 | # under the License. 18 | 19 | set -e 20 | 21 | if [ -z "${ICEBERG_VERSION}" ]; then 22 | echo "ICEBERG_VERSION is unset" 23 | exit 1 24 | else 25 | echo "var is set to '$ICEBERG_VERSION'" 26 | fi 27 | 28 | # tar source code 29 | release_version=${ICEBERG_VERSION} 30 | # rc versions 31 | rc_version="${ICEBERG_VERSION_RC:-rc.1}" 32 | # Corresponding git repository branch 33 | git_branch=release-${release_version}-${rc_version} 34 | 35 | rm -rf dist 36 | mkdir -p dist/ 37 | 38 | echo "> Checkout version branch" 39 | git checkout -B "${git_branch}" 40 | 41 | # Run a few checks 42 | echo "> Check license" 43 | docker run -it --rm -v $(pwd):/github/workspace apache/skywalking-eyes header check 44 | 45 | echo "> Run dependency license check using cargo-deny" 46 | python3 ./scripts/dependencies.py check 47 | 48 | # Generate and verify artifacts 49 | echo "> Start package" 50 | git archive --format=tar.gz --output="dist/apache-iceberg-rust-$release_version-src.tar.gz" --prefix="apache-iceberg-rust-$release_version-src/" --add-file=Cargo.toml "$git_branch" 51 | 52 | cd dist 53 | 54 | echo "> Generate signature" 55 | for i in *.tar.gz; do 56 | echo "$i" 57 | gpg --armor --output "$i.asc" --detach-sig "$i" 58 | done 59 | echo "> Check signature" 60 | for i in *.tar.gz; do 61 | echo "$i" 62 | gpg --verify "$i.asc" "$i" 63 | done 64 | echo "> Generate sha512sum" 65 | for i in *.tar.gz; do 66 | echo "$i" 67 | sha512sum "$i" >"$i.sha512" 68 | done 69 | echo "> Check sha512sum" 70 | for i in *.tar.gz; do 71 | echo "$i" 72 | sha512sum --check "$i.sha512" 73 | done 74 | -------------------------------------------------------------------------------- /scripts/verify.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, 13 | # software distributed under the License is distributed on an 14 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | # KIND, either express or implied. See the License for the 16 | # specific language governing permissions and limitations 17 | # under the License. 18 | 19 | 20 | import subprocess 21 | import sys 22 | import os 23 | 24 | BASE_DIR = os.getcwd() 25 | 26 | 27 | def check_rust(): 28 | try: 29 | subprocess.run(["cargo", "--version"], check=True) 30 | return True 31 | except FileNotFoundError: 32 | return False 33 | except Exception as e: 34 | raise Exception("Check rust met unexpected error", e) 35 | 36 | def build_core(): 37 | print("Start building iceberg rust") 38 | 39 | subprocess.run(["cargo", "build", "--release"], check=True) 40 | 41 | def main(): 42 | if not check_rust(): 43 | print( 44 | "Cargo is not found, please check if rust development has been setup correctly" 45 | ) 46 | print("Visit https://www.rust-lang.org/tools/install for more information") 47 | sys.exit(1) 48 | 49 | build_core() 50 | 51 | if __name__ == "__main__": 52 | main() --------------------------------------------------------------------------------