├── .asf.yaml
├── .cargo
└── audit.toml
├── .devcontainer
└── devcontainer.json
├── .gitattributes
├── .github
├── ISSUE_TEMPLATE
│ ├── config.yml
│ ├── iceberg_bug_report.yml
│ ├── iceberg_epic.yml
│ └── iceberg_feature.yml
├── PULL_REQUEST_TEMPLATE.md
├── actions
│ ├── overwrite-package-version
│ │ └── action.yml
│ └── setup-builder
│ │ └── action.yml
├── dependabot.yml
└── workflows
│ ├── audit.yml
│ ├── bindings_python_ci.yml
│ ├── ci.yml
│ ├── ci_typos.yml
│ ├── publish.yml
│ ├── release_python.yml
│ ├── release_python_nightly.yml
│ └── website.yml
├── .gitignore
├── .idea
├── .gitignore
└── vcs.xml
├── .licenserc.yaml
├── .taplo.toml
├── .typos.toml
├── CHANGELOG.md
├── CONTRIBUTING.md
├── Cargo.lock
├── Cargo.toml
├── LICENSE
├── Makefile
├── NOTICE
├── README.md
├── bindings
└── python
│ ├── Cargo.lock
│ ├── Cargo.toml
│ ├── DEPENDENCIES.rust.tsv
│ ├── README.md
│ ├── project-description.md
│ ├── pyproject.toml
│ ├── python
│ └── pyiceberg_core
│ │ └── __init__.py
│ ├── src
│ ├── datafusion_table_provider.rs
│ ├── error.rs
│ ├── lib.rs
│ ├── runtime.rs
│ └── transform.rs
│ └── tests
│ ├── test_datafusion_table_provider.py
│ └── test_transform.py
├── crates
├── catalog
│ ├── glue
│ │ ├── Cargo.toml
│ │ ├── DEPENDENCIES.rust.tsv
│ │ ├── README.md
│ │ ├── src
│ │ │ ├── catalog.rs
│ │ │ ├── error.rs
│ │ │ ├── lib.rs
│ │ │ ├── schema.rs
│ │ │ └── utils.rs
│ │ ├── testdata
│ │ │ └── glue_catalog
│ │ │ │ └── docker-compose.yaml
│ │ └── tests
│ │ │ └── glue_catalog_test.rs
│ ├── hms
│ │ ├── Cargo.toml
│ │ ├── DEPENDENCIES.rust.tsv
│ │ ├── README.md
│ │ ├── src
│ │ │ ├── catalog.rs
│ │ │ ├── error.rs
│ │ │ ├── lib.rs
│ │ │ ├── schema.rs
│ │ │ └── utils.rs
│ │ ├── testdata
│ │ │ └── hms_catalog
│ │ │ │ ├── Dockerfile
│ │ │ │ ├── core-site.xml
│ │ │ │ └── docker-compose.yaml
│ │ └── tests
│ │ │ └── hms_catalog_test.rs
│ ├── memory
│ │ ├── Cargo.toml
│ │ ├── DEPENDENCIES.rust.tsv
│ │ ├── README.md
│ │ └── src
│ │ │ ├── catalog.rs
│ │ │ ├── lib.rs
│ │ │ └── namespace_state.rs
│ ├── rest
│ │ ├── Cargo.toml
│ │ ├── DEPENDENCIES.rust.tsv
│ │ ├── README.md
│ │ ├── src
│ │ │ ├── catalog.rs
│ │ │ ├── client.rs
│ │ │ ├── lib.rs
│ │ │ └── types.rs
│ │ ├── testdata
│ │ │ ├── create_table_response.json
│ │ │ ├── load_table_response.json
│ │ │ ├── rest_catalog
│ │ │ │ └── docker-compose.yaml
│ │ │ └── update_table_response.json
│ │ └── tests
│ │ │ └── rest_catalog_test.rs
│ ├── s3tables
│ │ ├── Cargo.toml
│ │ ├── DEPENDENCIES.rust.tsv
│ │ └── src
│ │ │ ├── catalog.rs
│ │ │ ├── lib.rs
│ │ │ └── utils.rs
│ └── sql
│ │ ├── Cargo.toml
│ │ ├── DEPENDENCIES.rust.tsv
│ │ └── src
│ │ ├── catalog.rs
│ │ ├── error.rs
│ │ └── lib.rs
├── examples
│ ├── Cargo.toml
│ ├── DEPENDENCIES.rust.tsv
│ ├── README.md
│ └── src
│ │ ├── oss_backend.rs
│ │ ├── rest_catalog_namespace.rs
│ │ └── rest_catalog_table.rs
├── iceberg
│ ├── Cargo.toml
│ ├── DEPENDENCIES.rust.tsv
│ ├── README.md
│ ├── src
│ │ ├── arrow
│ │ │ ├── delete_file_manager.rs
│ │ │ ├── mod.rs
│ │ │ ├── nan_val_cnt_visitor.rs
│ │ │ ├── reader.rs
│ │ │ ├── record_batch_projector.rs
│ │ │ ├── record_batch_transformer.rs
│ │ │ ├── schema.rs
│ │ │ └── value.rs
│ │ ├── avro
│ │ │ ├── mod.rs
│ │ │ └── schema.rs
│ │ ├── cache.rs
│ │ ├── catalog
│ │ │ └── mod.rs
│ │ ├── delete_file_index.rs
│ │ ├── delete_vector.rs
│ │ ├── error.rs
│ │ ├── expr
│ │ │ ├── accessor.rs
│ │ │ ├── mod.rs
│ │ │ ├── predicate.rs
│ │ │ ├── term.rs
│ │ │ └── visitors
│ │ │ │ ├── bound_predicate_visitor.rs
│ │ │ │ ├── expression_evaluator.rs
│ │ │ │ ├── inclusive_metrics_evaluator.rs
│ │ │ │ ├── inclusive_projection.rs
│ │ │ │ ├── manifest_evaluator.rs
│ │ │ │ ├── mod.rs
│ │ │ │ ├── page_index_evaluator.rs
│ │ │ │ ├── row_group_metrics_evaluator.rs
│ │ │ │ ├── strict_metrics_evaluator.rs
│ │ │ │ └── strict_projection.rs
│ │ ├── inspect
│ │ │ ├── manifests.rs
│ │ │ ├── metadata_table.rs
│ │ │ ├── mod.rs
│ │ │ └── snapshots.rs
│ │ ├── io
│ │ │ ├── file_io.rs
│ │ │ ├── mod.rs
│ │ │ ├── object_cache.rs
│ │ │ ├── storage.rs
│ │ │ ├── storage_azdls.rs
│ │ │ ├── storage_fs.rs
│ │ │ ├── storage_gcs.rs
│ │ │ ├── storage_memory.rs
│ │ │ ├── storage_oss.rs
│ │ │ └── storage_s3.rs
│ │ ├── lib.rs
│ │ ├── puffin
│ │ │ ├── blob.rs
│ │ │ ├── compression.rs
│ │ │ ├── metadata.rs
│ │ │ ├── mod.rs
│ │ │ ├── reader.rs
│ │ │ ├── test_utils.rs
│ │ │ └── writer.rs
│ │ ├── runtime
│ │ │ └── mod.rs
│ │ ├── scan
│ │ │ ├── cache.rs
│ │ │ ├── context.rs
│ │ │ ├── mod.rs
│ │ │ └── task.rs
│ │ ├── spec
│ │ │ ├── datatypes.rs
│ │ │ ├── encrypted_key.rs
│ │ │ ├── manifest
│ │ │ │ ├── _serde.rs
│ │ │ │ ├── data_file.rs
│ │ │ │ ├── entry.rs
│ │ │ │ ├── metadata.rs
│ │ │ │ ├── mod.rs
│ │ │ │ └── writer.rs
│ │ │ ├── manifest_list.rs
│ │ │ ├── mod.rs
│ │ │ ├── name_mapping
│ │ │ │ └── mod.rs
│ │ │ ├── partition.rs
│ │ │ ├── schema
│ │ │ │ ├── _serde.rs
│ │ │ │ ├── id_reassigner.rs
│ │ │ │ ├── index.rs
│ │ │ │ ├── mod.rs
│ │ │ │ ├── prune_columns.rs
│ │ │ │ ├── utils.rs
│ │ │ │ └── visitor.rs
│ │ │ ├── snapshot.rs
│ │ │ ├── snapshot_summary.rs
│ │ │ ├── sort.rs
│ │ │ ├── statistic_file.rs
│ │ │ ├── table_metadata.rs
│ │ │ ├── table_metadata_builder.rs
│ │ │ ├── transform.rs
│ │ │ ├── values.rs
│ │ │ ├── view_metadata.rs
│ │ │ ├── view_metadata_builder.rs
│ │ │ └── view_version.rs
│ │ ├── table.rs
│ │ ├── transaction
│ │ │ ├── append.rs
│ │ │ ├── mod.rs
│ │ │ ├── snapshot.rs
│ │ │ └── sort_order.rs
│ │ ├── transform
│ │ │ ├── bucket.rs
│ │ │ ├── identity.rs
│ │ │ ├── mod.rs
│ │ │ ├── temporal.rs
│ │ │ ├── truncate.rs
│ │ │ └── void.rs
│ │ ├── utils.rs
│ │ └── writer
│ │ │ ├── base_writer
│ │ │ ├── data_file_writer.rs
│ │ │ ├── equality_delete_writer.rs
│ │ │ └── mod.rs
│ │ │ ├── file_writer
│ │ │ ├── location_generator.rs
│ │ │ ├── mod.rs
│ │ │ ├── parquet_writer.rs
│ │ │ └── track_writer.rs
│ │ │ └── mod.rs
│ ├── testdata
│ │ ├── avro_schema_manifest_entry.json
│ │ ├── avro_schema_manifest_file_v1.json
│ │ ├── avro_schema_manifest_file_v2.json
│ │ ├── example_empty_table_metadata_v2.json
│ │ ├── example_table_metadata_v2.json
│ │ ├── file_io_gcs
│ │ │ └── docker-compose.yaml
│ │ ├── file_io_s3
│ │ │ └── docker-compose.yaml
│ │ ├── manifests_lists
│ │ │ ├── manifest-list-v2-1.avro
│ │ │ └── manifest-list-v2-2.avro
│ │ ├── puffin
│ │ │ └── java-generated
│ │ │ │ ├── empty-puffin-uncompressed.bin
│ │ │ │ ├── sample-metric-data-compressed-zstd.bin
│ │ │ │ └── sample-metric-data-uncompressed.bin
│ │ ├── table_metadata
│ │ │ ├── TableMetadataUnsupportedVersion.json
│ │ │ ├── TableMetadataV1Compat.json
│ │ │ ├── TableMetadataV1NoValidSchema.json
│ │ │ ├── TableMetadataV1PartitionSpecsWithoutDefaultId.json
│ │ │ ├── TableMetadataV1SchemasWithoutCurrentId.json
│ │ │ ├── TableMetadataV1Valid.json
│ │ │ ├── TableMetadataV2CurrentSchemaNotFound.json
│ │ │ ├── TableMetadataV2MissingLastPartitionId.json
│ │ │ ├── TableMetadataV2MissingPartitionSpecs.json
│ │ │ ├── TableMetadataV2MissingSchemas.json
│ │ │ ├── TableMetadataV2MissingSortOrder.json
│ │ │ ├── TableMetadataV2Valid.json
│ │ │ └── TableMetadataV2ValidMinimal.json
│ │ └── view_metadata
│ │ │ ├── ViewMetadataUnsupportedVersion.json
│ │ │ ├── ViewMetadataV1CurrentVersionNotFound.json
│ │ │ ├── ViewMetadataV1MissingCurrentVersion.json
│ │ │ ├── ViewMetadataV1MissingSchema.json
│ │ │ ├── ViewMetadataV1SchemaNotFound.json
│ │ │ └── ViewMetadataV1Valid.json
│ └── tests
│ │ ├── file_io_gcs_test.rs
│ │ └── file_io_s3_test.rs
├── integration_tests
│ ├── Cargo.toml
│ ├── DEPENDENCIES.rust.tsv
│ ├── src
│ │ └── lib.rs
│ ├── testdata
│ │ ├── docker-compose.yaml
│ │ └── spark
│ │ │ ├── Dockerfile
│ │ │ ├── entrypoint.sh
│ │ │ ├── provision.py
│ │ │ └── spark-defaults.conf
│ └── tests
│ │ ├── shared.rs
│ │ └── shared_tests
│ │ ├── append_data_file_test.rs
│ │ ├── append_partition_data_file_test.rs
│ │ ├── conflict_commit_test.rs
│ │ ├── datafusion.rs
│ │ ├── mod.rs
│ │ ├── read_evolved_schema.rs
│ │ ├── read_positional_deletes.rs
│ │ └── scan_all_type.rs
├── integrations
│ ├── cache-moka
│ │ ├── Cargo.toml
│ │ ├── DEPENDENCIES.rust.tsv
│ │ ├── README.md
│ │ └── src
│ │ │ └── lib.rs
│ ├── cli
│ │ ├── Cargo.toml
│ │ ├── DEPENDENCIES.rust.tsv
│ │ ├── README.md
│ │ └── src
│ │ │ ├── catalog.rs
│ │ │ ├── lib.rs
│ │ │ └── main.rs
│ └── datafusion
│ │ ├── Cargo.toml
│ │ ├── DEPENDENCIES.rust.tsv
│ │ ├── README.md
│ │ ├── src
│ │ ├── catalog.rs
│ │ ├── error.rs
│ │ ├── lib.rs
│ │ ├── physical_plan
│ │ │ ├── expr_to_predicate.rs
│ │ │ ├── mod.rs
│ │ │ └── scan.rs
│ │ ├── schema.rs
│ │ └── table
│ │ │ ├── mod.rs
│ │ │ └── table_provider_factory.rs
│ │ ├── testdata
│ │ └── table_metadata
│ │ │ └── TableMetadataV2.json
│ │ └── tests
│ │ ├── integration_datafusion_test.rs
│ │ └── test_data
│ │ └── TableMetadataV2Valid.json
├── sqllogictest
│ ├── Cargo.toml
│ ├── DEPENDENCIES.rust.tsv
│ ├── README.md
│ └── src
│ │ ├── engine
│ │ ├── datafusion.rs
│ │ └── mod.rs
│ │ ├── error.rs
│ │ └── lib.rs
└── test_utils
│ ├── Cargo.toml
│ ├── DEPENDENCIES.rust.tsv
│ └── src
│ ├── cmd.rs
│ ├── docker.rs
│ └── lib.rs
├── deny.toml
├── docs
└── contributing
│ ├── orbstack.md
│ └── podman.md
├── rust-toolchain.toml
├── rustfmt.toml
└── scripts
├── dependencies.py
├── release.sh
└── verify.py
/.asf.yaml:
--------------------------------------------------------------------------------
1 | #
2 | # Licensed to the Apache Software Foundation (ASF) under one
3 | # or more contributor license agreements. See the NOTICE file
4 | # distributed with this work for additional information
5 | # regarding copyright ownership. The ASF licenses this file
6 | # to you under the Apache License, Version 2.0 (the
7 | # "License"); you may not use this file except in compliance
8 | # with the License. You may obtain a copy of the License at
9 | #
10 | # http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing,
13 | # software distributed under the License is distributed on an
14 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | # KIND, either express or implied. See the License for the
16 | # specific language governing permissions and limitations
17 | # under the License.
18 | #
19 |
20 | # The format of this file is documented at
21 | # https://cwiki.apache.org/confluence/display/INFRA/Git+-+.asf.yaml+features
22 |
23 | github:
24 | description: "Apache Iceberg"
25 | homepage: https://rust.iceberg.apache.org/
26 | labels:
27 | - iceberg
28 | - apache
29 | - hacktoberfest
30 | - rust
31 | enabled_merge_buttons:
32 | squash: true
33 | merge: false
34 | rebase: false
35 | protected_branches:
36 | main:
37 | required_status_checks:
38 | # strict means "Require branches to be up to date before merging".
39 | strict: true
40 |
41 | required_pull_request_reviews:
42 | required_approving_review_count: 1
43 | dismiss_stale_reviews: true
44 |
45 | required_linear_history: true
46 | del_branch_on_merge: true
47 | features:
48 | wiki: false
49 | issues: true
50 | projects: true
51 | discussions: true
52 | collaborators:
53 | - JanKaul
54 | - c-thiel
55 | ghp_branch: gh-pages
56 | ghp_path: /
57 |
58 | notifications:
59 | commits: commits@iceberg.apache.org
60 | issues: issues@iceberg.apache.org
61 | pullrequests: issues@iceberg.apache.org
62 | jira_options: link label link label
63 | discussions: issues@iceberg.apache.org
64 |
--------------------------------------------------------------------------------
/.cargo/audit.toml:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | [advisories]
19 | ignore = [
20 | # rsa
21 | # Marvin Attack: potential key recovery through timing sidechannels
22 | # Issues: https://github.com/apache/iceberg-rust/issues/221
23 | "RUSTSEC-2023-0071",
24 | # `derivative` is unmaintained; consider using an alternative
25 | #
26 | # Introduced by hive_metastore, tracked at https://github.com/cloudwego/pilota/issues/293
27 | "RUSTSEC-2024-0388",
28 | ]
29 |
--------------------------------------------------------------------------------
/.devcontainer/devcontainer.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "Iceberg Rust",
3 | "image": "mcr.microsoft.com/devcontainers/rust:bullseye",
4 | "customizations": {
5 | "vscode": {
6 | "extensions": [
7 | "rust-lang.rust-analyzer"
8 | ],
9 | "settings": {
10 | "editor.formatOnSave": true,
11 | "files.exclude": {
12 | "**/LICENSE": true
13 | }
14 | }
15 | }
16 | }
17 | }
18 |
--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | website export-ignore
2 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/config.yml:
--------------------------------------------------------------------------------
1 | #
2 | # Licensed to the Apache Software Foundation (ASF) under one
3 | # or more contributor license agreements. See the NOTICE file
4 | # distributed with this work for additional information
5 | # regarding copyright ownership. The ASF licenses this file
6 | # to you under the Apache License, Version 2.0 (the
7 | # "License"); you may not use this file except in compliance
8 | # with the License. You may obtain a copy of the License at
9 | #
10 | # http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing,
13 | # software distributed under the License is distributed on an
14 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | # KIND, either express or implied. See the License for the
16 | # specific language governing permissions and limitations
17 | # under the License.
18 | #
19 |
20 | blank_issues_enabled: true
21 | contact_links:
22 | - name: Ask questions about iceberg-rust
23 | url: https://github.com/apache/iceberg-rust/discussions
24 | about: Please ask and answer questions here.
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/iceberg_bug_report.yml:
--------------------------------------------------------------------------------
1 | #
2 | # Licensed to the Apache Software Foundation (ASF) under one
3 | # or more contributor license agreements. See the NOTICE file
4 | # distributed with this work for additional information
5 | # regarding copyright ownership. The ASF licenses this file
6 | # to you under the Apache License, Version 2.0 (the
7 | # "License"); you may not use this file except in compliance
8 | # with the License. You may obtain a copy of the License at
9 | #
10 | # http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing,
13 | # software distributed under the License is distributed on an
14 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | # KIND, either express or implied. See the License for the
16 | # specific language governing permissions and limitations
17 | # under the License.
18 | #
19 |
20 | name: Iceberg Rust Bug Report 🐞
21 | description: Problems, bugs and issues with Apache Iceberg Rust
22 | labels: bug
23 |
24 | body:
25 | - type: dropdown
26 | attributes:
27 | label: Apache Iceberg Rust version
28 | description: What Apache Iceberg Rust version are you using?
29 | multiple: false
30 | options:
31 | - 0.4.0 (latest version)
32 | - 0.3.0
33 | - 0.2.0
34 | validations:
35 | required: false
36 | - type: textarea
37 | attributes:
38 | label: Describe the bug
39 | description: >
40 | Describe the problem, what to expect, and how to reproduce.
41 | You can include files by dragging and dropping them here.
42 | validations:
43 | required: true
44 | - type: textarea
45 | attributes:
46 | label: To Reproduce
47 | placeholder: >
48 | Steps to reproduce the behavior:
49 | - type: textarea
50 | attributes:
51 | label: Expected behavior
52 | placeholder: >
53 | A clear and concise description of what you expected to happen.
54 | - type: dropdown
55 | attributes:
56 | label: Willingness to contribute
57 | description: >
58 | The Apache Iceberg community encourages bug fix contributions. Would you or another member of your organization be willing to contribute a fix for this bug to the Apache Iceberg codebase?
59 | options:
60 | - I can contribute a fix for this bug independently
61 | - I would be willing to contribute a fix for this bug with guidance from the Iceberg community
62 | - I cannot contribute a fix for this bug at this time
63 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/iceberg_epic.yml:
--------------------------------------------------------------------------------
1 | #
2 | # Licensed to the Apache Software Foundation (ASF) under one
3 | # or more contributor license agreements. See the NOTICE file
4 | # distributed with this work for additional information
5 | # regarding copyright ownership. The ASF licenses this file
6 | # to you under the Apache License, Version 2.0 (the
7 | # "License"); you may not use this file except in compliance
8 | # with the License. You may obtain a copy of the License at
9 | #
10 | # http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing,
13 | # software distributed under the License is distributed on an
14 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | # KIND, either express or implied. See the License for the
16 | # specific language governing permissions and limitations
17 | # under the License.
18 | #
19 |
20 | name: Iceberg Rust Epic Feature
21 | description: Create an epic issue to act as a parent issue with some sub issues
22 | labels: epic
23 | body:
24 | - type: textarea
25 | attributes:
26 | label: What's the feature are you trying to implement?
27 | description: Please describe what you are trying to do.
28 | placeholder: >
29 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
30 | (This section helps Iceberg developers understand the context and *why* for this feature, in addition to the *what*)
31 | - type: dropdown
32 | attributes:
33 | label: Willingness to contribute
34 | description: >
35 | The Apache Iceberg community encourages feature contributions. Would you or another member of your organization be willing to contribute to this feature for the Apache Iceberg Rust codebase?
36 | options:
37 | - I can contribute to this feature independently
38 | - I would be willing to contribute to this feature with guidance from the Iceberg Rust community
39 | - I cannot contribute to this feature at this time
40 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/iceberg_feature.yml:
--------------------------------------------------------------------------------
1 | #
2 | # Licensed to the Apache Software Foundation (ASF) under one
3 | # or more contributor license agreements. See the NOTICE file
4 | # distributed with this work for additional information
5 | # regarding copyright ownership. The ASF licenses this file
6 | # to you under the Apache License, Version 2.0 (the
7 | # "License"); you may not use this file except in compliance
8 | # with the License. You may obtain a copy of the License at
9 | #
10 | # http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing,
13 | # software distributed under the License is distributed on an
14 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | # KIND, either express or implied. See the License for the
16 | # specific language governing permissions and limitations
17 | # under the License.
18 | #
19 |
20 | name: Iceberg Rust Feature Request
21 | description: Suggest an idea for Iceberg Rust
22 | labels: enhancement
23 | body:
24 | - type: textarea
25 | attributes:
26 | label: Is your feature request related to a problem or challenge?
27 | description: Please describe what you are trying to do.
28 | placeholder: >
29 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
30 | (This section helps Iceberg developers understand the context and *why* for this feature, in addition to the *what*)
31 | - type: textarea
32 | attributes:
33 | label: Describe the solution you'd like
34 | placeholder: >
35 | A clear and concise description of what you want to happen.
36 | - type: dropdown
37 | attributes:
38 | label: Willingness to contribute
39 | description: >
40 | The Apache Iceberg community encourages feature contributions. Would you or another member of your organization be willing to contribute to this feature for the Apache Iceberg Rust codebase?
41 | options:
42 | - I can contribute to this feature independently
43 | - I would be willing to contribute to this feature with guidance from the Iceberg Rust community
44 | - I cannot contribute to this feature at this time
45 |
--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
1 | ## Which issue does this PR close?
2 |
3 |
6 |
7 | - Closes #.
8 |
9 | ## What changes are included in this PR?
10 |
11 |
14 |
15 | ## Are these changes tested?
16 |
17 |
--------------------------------------------------------------------------------
/.github/actions/overwrite-package-version/action.yml:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | name: 'Update Package Version'
19 | description: 'Updates pyproject.toml version with a provided timestamp'
20 | inputs:
21 | timestamp:
22 | description: 'Timestamp to override to the package version'
23 | required: true
24 | runs:
25 | using: "composite"
26 | steps:
27 | - name: Setup Python
28 | uses: actions/setup-python@v5
29 | with:
30 | python-version: '3.12'
31 |
32 | - name: Install toml
33 | run: pip install toml
34 | shell: bash
35 |
36 | - name: Get and update version
37 | shell: bash
38 | env:
39 | TIMESTAMP: ${{ inputs.timestamp }}
40 | run: |
41 | CURRENT_VERSION=$(python -c "import toml; print(toml.load('bindings/python/pyproject.toml')['project']['version'])")
42 | NEW_VERSION="${CURRENT_VERSION}.dev${TIMESTAMP}"
43 | NEW_VERSION=$NEW_VERSION python -c "
44 | import toml
45 | import os
46 | config = toml.load('bindings/python/pyproject.toml')
47 | config['project']['version'] = os.environ['NEW_VERSION']
48 | with open('bindings/python/pyproject.toml', 'w') as f:
49 | toml.dump(config, f)
50 | print(f'Updated version to: {config[\"project\"][\"version\"]}')
51 | "
--------------------------------------------------------------------------------
/.github/actions/setup-builder/action.yml:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | # This file is heavily inspired by
19 | # [datafusion](https://github.com/apache/datafusion/blob/main/.github/actions/setup-builder/action.yaml).
20 | name: Prepare Rust Builder
21 | description: 'Prepare Rust Build Environment'
22 | inputs:
23 | rust-version:
24 | description: 'version of rust to install and use'
25 | runs:
26 | using: "composite"
27 | steps:
28 | - name: Setup specified Rust toolchain
29 | shell: bash
30 | if: ${{ inputs.rust-version != '' }}
31 | env:
32 | RUST_VERSION: ${{ inputs.rust-version }}
33 | run: |
34 | echo "Installing ${RUST_VERSION}"
35 | rustup toolchain install ${RUST_VERSION}
36 | rustup override set ${RUST_VERSION}
37 | rustup component add rustfmt clippy
38 | - name: Setup Rust toolchain according to rust-toolchain.toml
39 | shell: bash
40 | if: ${{ inputs.rust-version == '' }}
41 | run: |
42 | echo "Installing toolchain according to rust-toolchain.toml"
43 | rustup show
44 | rustup component add rustfmt clippy
45 | - name: Fixup git permissions
46 | # https://github.com/actions/checkout/issues/766
47 | shell: bash
48 | run: git config --global --add safe.directory "$GITHUB_WORKSPACE"
--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | version: 2
19 | updates:
20 | # Maintain dependencies for GitHub Actions
21 | - package-ecosystem: "github-actions"
22 | directory: "/"
23 | schedule:
24 | interval: "weekly"
25 | day: "sunday"
26 |
27 | # Maintain dependencies for iceberg
28 | - package-ecosystem: "cargo"
29 | directory: "/"
30 | schedule:
31 | interval: "weekly"
32 | day: "sunday"
33 | open-pull-requests-limit: 50
34 | versioning-strategy: lockfile-only
35 | ignore:
36 | # For all packages, ignore all patch updates
37 | - dependency-name: "*"
38 | update-types: ["version-update:semver-patch"]
39 | groups:
40 | arrow-parquet:
41 | applies-to: version-updates
42 | patterns:
43 | - "arrow*"
44 | - "parquet"
45 |
--------------------------------------------------------------------------------
/.github/workflows/audit.yml:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | name: Security audit
19 |
20 | concurrency:
21 | group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }}
22 | cancel-in-progress: true
23 |
24 | on:
25 | push:
26 | paths:
27 | - "**/Cargo.toml"
28 | - "**/Cargo.lock"
29 |
30 | pull_request:
31 | paths:
32 | - "**/Cargo.toml"
33 | - "**/Cargo.lock"
34 | - ".github/workflows/audit.yml"
35 |
36 | schedule:
37 | - cron: '0 0 * * *'
38 |
39 | jobs:
40 | security_audit:
41 | runs-on: ubuntu-latest
42 | steps:
43 | - uses: actions/checkout@v4
44 | - name: Setup Rust toolchain
45 | uses: ./.github/actions/setup-builder
46 | with:
47 | rust-version: stable
48 | - uses: rustsec/audit-check@v2.0.0
49 | with:
50 | token: ${{ secrets.GITHUB_TOKEN }}
51 | ignore: RUSTSEC-2024-0436
52 |
--------------------------------------------------------------------------------
/.github/workflows/bindings_python_ci.yml:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | name: Bindings Python CI
19 |
20 | on:
21 | push:
22 | branches:
23 | - main
24 | pull_request:
25 | branches:
26 | - main
27 |
28 | concurrency:
29 | group: ${{ github.workflow }}-${{ github.ref }}-${{ github.event_name }}
30 | cancel-in-progress: true
31 |
32 | jobs:
33 | check-rust:
34 | runs-on: ubuntu-latest
35 | steps:
36 | - uses: actions/checkout@v4
37 | - name: Check format
38 | working-directory: "bindings/python"
39 | run: cargo fmt --all -- --check
40 | - name: Check clippy
41 | working-directory: "bindings/python"
42 | run: cargo clippy --all-targets --all-features -- -D warnings
43 |
44 | check-python:
45 | runs-on: ubuntu-latest
46 | steps:
47 | - uses: actions/checkout@v4
48 | - name: Install tools
49 | run: |
50 | pip install ruff
51 | - name: Check format
52 | working-directory: "bindings/python"
53 | run: |
54 | ruff format . --diff
55 | - name: Check style
56 | working-directory: "bindings/python"
57 | run: |
58 | ruff check .
59 |
60 | test:
61 | runs-on: ${{ matrix.os }}
62 | strategy:
63 | matrix:
64 | os:
65 | - ubuntu-latest
66 | - macos-latest
67 | - windows-latest
68 | steps:
69 | - uses: actions/checkout@v4
70 | - uses: actions/setup-python@v5
71 | with:
72 | python-version: 3.9
73 | - uses: PyO3/maturin-action@v1
74 | with:
75 | working-directory: "bindings/python"
76 | command: build
77 | args: --out dist --sdist
78 | - name: Run tests
79 | working-directory: "bindings/python"
80 | shell: bash
81 | run: |
82 | set -e
83 | pip install hatch==1.12.0
84 | hatch run dev:pip install dist/pyiceberg_core-*.whl --force-reinstall
85 | hatch run dev:test
86 |
--------------------------------------------------------------------------------
/.github/workflows/ci_typos.yml:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | name: Typos Check
19 |
20 | on:
21 | push:
22 | branches:
23 | - main
24 | pull_request:
25 | branches:
26 | - main
27 |
28 | concurrency:
29 | group: ${{ github.workflow }}-${{ github.ref }}-${{ github.event_name }}
30 | cancel-in-progress: true
31 |
32 | env:
33 | RUST_BACKTRACE: 1
34 |
35 | jobs:
36 | typos-check:
37 | name: typos check
38 | runs-on: ubuntu-latest
39 | timeout-minutes: 10
40 | env:
41 | FORCE_COLOR: 1
42 | steps:
43 | - uses: actions/checkout@v4
44 | - name: Check typos
45 | uses: crate-ci/typos@v1.32.0
46 |
--------------------------------------------------------------------------------
/.github/workflows/publish.yml:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | name: Publish
19 |
20 | on:
21 | push:
22 | tags:
23 | - "*"
24 | workflow_dispatch:
25 |
26 | env:
27 | rust_msrv: "1.85"
28 |
29 | jobs:
30 | publish:
31 | runs-on: ubuntu-latest
32 | strategy:
33 | # Publish package one by one instead of flooding the registry
34 | max-parallel: 1
35 | matrix:
36 | # Order here is sensitive, as it will be used to determine the order of publishing
37 | package:
38 | - "crates/iceberg"
39 | - "crates/catalog/glue"
40 | - "crates/catalog/hms"
41 | - "crates/catalog/memory"
42 | - "crates/catalog/rest"
43 | # sql is not ready for release yet.
44 | # - "crates/catalog/sql"
45 | - "crates/integrations/datafusion"
46 | steps:
47 | - uses: actions/checkout@v4
48 |
49 | - name: Setup Rust toolchain
50 | uses: ./.github/actions/setup-builder
51 | with:
52 | rust-version: ${{ env.rust_msrv }}
53 |
54 | - name: Publish ${{ matrix.package }}
55 | working-directory: ${{ matrix.package }}
56 | # Only publish if it's a tag and the tag is not a pre-release
57 | if: ${{ startsWith(github.ref, 'refs/tags/') && !contains(github.ref, '-') }}
58 | run: cargo publish --all-features
59 | env:
60 | CARGO_REGISTRY_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}
61 |
--------------------------------------------------------------------------------
/.github/workflows/website.yml:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | name: Website
19 |
20 | on:
21 | push:
22 | branches:
23 | - main
24 | pull_request:
25 | branches:
26 | - main
27 |
28 | concurrency:
29 | group: ${{ github.workflow }}-${{ github.ref }}-${{ github.event_name }}
30 | cancel-in-progress: true
31 |
32 | jobs:
33 | build:
34 | runs-on: ubuntu-latest
35 | permissions:
36 | contents: write
37 | steps:
38 | - uses: actions/checkout@v4
39 |
40 | - name: Setup mdBook
41 | uses: peaceiris/actions-mdbook@v2
42 | with:
43 | mdbook-version: '0.4.36'
44 |
45 | - name: Build
46 | working-directory: website
47 | run: mdbook build
48 |
49 | - name: Copy asf file
50 | run: cp .asf.yaml ./website/book/.asf.yaml
51 |
52 | - name: Build API docs
53 | run: |
54 | cargo doc --no-deps --workspace --all-features
55 | cp -r target/doc ./website/book/api
56 |
57 | - name: Deploy to gh-pages
58 | uses: peaceiris/actions-gh-pages@v4.0.0
59 | if: github.event_name == 'push' && github.ref_name == 'main'
60 | with:
61 | github_token: ${{ secrets.GITHUB_TOKEN }}
62 | publish_dir: website/book
63 | publish_branch: gh-pages
64 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | target
19 | .idea
20 | .vscode
21 | .zed
22 | **/.DS_Store
23 | dist/*
24 | **/venv
25 | *.so
26 | *.pyc
27 | *.whl
28 | *.tar.gz
29 |
--------------------------------------------------------------------------------
/.idea/.gitignore:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 |
19 | !.gitignore
20 | !vcs.xml
21 |
--------------------------------------------------------------------------------
/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
16 |
17 |
18 |
26 |
27 |
28 |
29 |
30 |
31 |
--------------------------------------------------------------------------------
/.licenserc.yaml:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | header:
19 | license:
20 | spdx-id: Apache-2.0
21 | copyright-owner: Apache Software Foundation
22 |
23 | paths-ignore:
24 | - 'LICENSE'
25 | - 'NOTICE'
26 | - '.gitattributes'
27 | - '**/*.json'
28 | # Generated content by mdbook
29 | - 'website/book'
30 | # Generated content by scripts
31 | - '**/DEPENDENCIES.*.tsv'
32 | # Release distributions
33 | - 'dist/*'
34 | - 'Cargo.lock'
35 | - '.github/PULL_REQUEST_TEMPLATE.md'
36 | comment: on-failure
37 |
--------------------------------------------------------------------------------
/.taplo.toml:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | include = ["Cargo.toml", "**/*.toml"]
19 |
20 | [formatting]
21 | # Align consecutive entries vertically.
22 | align_entries = false
23 | # Append trailing commas for multi-line arrays.
24 | array_trailing_comma = true
25 | # Expand arrays to multiple lines that exceed the maximum column width.
26 | array_auto_expand = true
27 | # Collapse arrays that don't exceed the maximum column width and don't contain comments.
28 | array_auto_collapse = true
29 | # Omit white space padding from single-line arrays
30 | compact_arrays = true
31 | # Omit white space padding from the start and end of inline tables.
32 | compact_inline_tables = false
33 | # Maximum column width in characters, affects array expansion and collapse, this doesn't take whitespace into account.
34 | # Note that this is not set in stone, and works on a best-effort basis.
35 | column_width = 80
36 | # Indent based on tables and arrays of tables and their subtables, subtables out of order are not indented.
37 | indent_tables = false
38 | # The substring that is used for indentation, should be tabs or spaces (but technically can be anything).
39 | indent_string = ' '
40 | # Add trailing newline at the end of the file if not present.
41 | trailing_newline = true
42 | # Alphabetically reorder keys that are not separated by empty lines.
43 | reorder_keys = true
44 | # Maximum amount of allowed consecutive blank lines. This does not affect the whitespace at the end of the document, as it is always stripped.
45 | allowed_blank_lines = 1
46 | # Use CRLF for line endings.
47 | crlf = false
48 |
--------------------------------------------------------------------------------
/.typos.toml:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | [files]
19 | extend-exclude = ["**/testdata", "CHANGELOG.md"]
20 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | .EXPORT_ALL_VARIABLES:
19 |
20 | build:
21 | cargo build --all-targets --all-features --workspace
22 |
23 | check-fmt:
24 | cargo fmt --all -- --check
25 |
26 | check-clippy:
27 | cargo clippy --all-targets --all-features --workspace -- -D warnings
28 |
29 | install-cargo-machete:
30 | cargo install cargo-machete@0.7.0
31 |
32 | cargo-machete: install-cargo-machete
33 | cargo machete
34 |
35 | install-taplo-cli:
36 | cargo install taplo-cli@0.9.3
37 |
38 | fix-toml: install-taplo-cli
39 | taplo fmt
40 |
41 | check-toml: install-taplo-cli
42 | taplo check
43 |
44 | check: check-fmt check-clippy check-toml cargo-machete
45 |
46 | doc-test:
47 | cargo test --no-fail-fast --doc --all-features --workspace
48 |
49 | unit-test: doc-test
50 | cargo test --no-fail-fast --lib --all-features --workspace
51 |
52 | test: doc-test
53 | cargo test --no-fail-fast --all-targets --all-features --workspace
54 |
55 | clean:
56 | cargo clean
57 |
--------------------------------------------------------------------------------
/NOTICE:
--------------------------------------------------------------------------------
1 | Apache Iceberg Rust
2 | Copyright 2023-2024 The Apache Software Foundation
3 |
4 | This product includes software developed at
5 | The Apache Software Foundation (http://www.apache.org/).
6 |
--------------------------------------------------------------------------------
/bindings/python/Cargo.toml:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | [package]
19 | edition = "2024"
20 | homepage = "https://rust.iceberg.apache.org"
21 | name = "pyiceberg_core_rust"
22 | rust-version = "1.85"
23 | version = "0.5.1"
24 | # This crate is used to build python bindings, we don't want to publish it
25 | publish = false
26 |
27 | keywords = ["iceberg"]
28 | license = "Apache-2.0"
29 |
30 | [lib]
31 | crate-type = ["cdylib"]
32 |
33 | [dependencies]
34 | arrow = { version = "55", features = ["pyarrow", "chrono-tz"] }
35 | iceberg = { path = "../../crates/iceberg" }
36 | pyo3 = { version = "0.24", features = ["extension-module", "abi3-py39"] }
37 | iceberg-datafusion = { path = "../../crates/integrations/datafusion" }
38 | datafusion-ffi = { version = "47" }
39 | tokio = { version = "1.44", default-features = false }
40 |
--------------------------------------------------------------------------------
/bindings/python/README.md:
--------------------------------------------------------------------------------
1 |
19 |
20 | # Pyiceberg Core
21 |
22 | This project is used to build an iceberg-rust powered core for pyiceberg.
23 |
24 | ## Setup
25 |
26 | ```shell
27 | pip install hatch==1.12.0
28 | ```
29 |
30 | ## Build
31 |
32 | ```shell
33 | hatch run dev:develop
34 | ```
35 |
36 | ## Test
37 |
38 | ```shell
39 | hatch run dev:test
40 | ```
--------------------------------------------------------------------------------
/bindings/python/project-description.md:
--------------------------------------------------------------------------------
1 |
19 |
20 | # Pyiceberg Core
21 |
22 | This project is used to build an iceberg-rust powered core for pyiceberg, and intended for use only by pyiceberg.
23 |
24 | Install via PyPI:
25 |
26 | ```
27 | pip install pyiceberg-core
28 | ```
29 |
--------------------------------------------------------------------------------
/bindings/python/pyproject.toml:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | [build-system]
19 | build-backend = "maturin"
20 | requires = ["maturin>=1.0,<2.0"]
21 |
22 | [project]
23 | classifiers = [
24 | "Development Status :: 4 - Beta",
25 | "Intended Audience :: Developers",
26 | "License :: OSI Approved :: Apache Software License",
27 | "Operating System :: OS Independent",
28 | "Programming Language :: Python :: 3.9",
29 | "Programming Language :: Python :: 3.10",
30 | "Programming Language :: Python :: 3.11",
31 | "Programming Language :: Python :: 3.12",
32 | ]
33 | name = "pyiceberg-core"
34 | readme = "project-description.md"
35 | requires-python = "~=3.9"
36 | dynamic = ["version"]
37 |
38 | [tool.maturin]
39 | features = ["pyo3/extension-module"]
40 | module-name = "pyiceberg_core.pyiceberg_core_rust"
41 | python-source = "python"
42 |
43 | [tool.ruff.lint]
44 | ignore = ["F403", "F405"]
45 |
46 | [tool.hatch.envs.dev]
47 | dependencies = ["maturin>=1.0,<2.0", "pytest>=8.3.2", "pyarrow>=17.0.0", "datafusion>=45", "pyiceberg[sql-sqlite]>=0.9.1"]
48 |
49 | [tool.hatch.envs.dev.scripts]
50 | build = "maturin build --out dist --sdist"
51 | develop = "maturin develop"
52 | test = "pytest"
53 |
--------------------------------------------------------------------------------
/bindings/python/python/pyiceberg_core/__init__.py:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | from .pyiceberg_core_rust import *
19 |
20 | __doc__ = pyiceberg_core_rust.__doc__
21 | __all__ = pyiceberg_core_rust.__all__
22 |
--------------------------------------------------------------------------------
/bindings/python/src/error.rs:
--------------------------------------------------------------------------------
1 | // Licensed to the Apache Software Foundation (ASF) under one
2 | // or more contributor license agreements. See the NOTICE file
3 | // distributed with this work for additional information
4 | // regarding copyright ownership. The ASF licenses this file
5 | // to you under the Apache License, Version 2.0 (the
6 | // "License"); you may not use this file except in compliance
7 | // with the License. You may obtain a copy of the License at
8 | //
9 | // http://www.apache.org/licenses/LICENSE-2.0
10 | //
11 | // Unless required by applicable law or agreed to in writing,
12 | // software distributed under the License is distributed on an
13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | // KIND, either express or implied. See the License for the
15 | // specific language governing permissions and limitations
16 | // under the License.
17 |
18 | use pyo3::PyErr;
19 | use pyo3::exceptions::PyValueError;
20 |
21 | /// Convert an iceberg error to a python error
22 | pub fn to_py_err(err: iceberg::Error) -> PyErr {
23 | PyValueError::new_err(err.to_string())
24 | }
25 |
--------------------------------------------------------------------------------
/bindings/python/src/lib.rs:
--------------------------------------------------------------------------------
1 | // Licensed to the Apache Software Foundation (ASF) under one
2 | // or more contributor license agreements. See the NOTICE file
3 | // distributed with this work for additional information
4 | // regarding copyright ownership. The ASF licenses this file
5 | // to you under the Apache License, Version 2.0 (the
6 | // "License"); you may not use this file except in compliance
7 | // with the License. You may obtain a copy of the License at
8 | //
9 | // http://www.apache.org/licenses/LICENSE-2.0
10 | //
11 | // Unless required by applicable law or agreed to in writing,
12 | // software distributed under the License is distributed on an
13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | // KIND, either express or implied. See the License for the
15 | // specific language governing permissions and limitations
16 | // under the License.
17 |
18 | use pyo3::prelude::*;
19 |
20 | mod datafusion_table_provider;
21 | mod error;
22 | mod runtime;
23 | mod transform;
24 |
25 | #[pymodule]
26 | fn pyiceberg_core_rust(py: Python<'_>, m: &Bound<'_, PyModule>) -> PyResult<()> {
27 | datafusion_table_provider::register_module(py, m)?;
28 | transform::register_module(py, m)?;
29 | Ok(())
30 | }
31 |
--------------------------------------------------------------------------------
/bindings/python/src/runtime.rs:
--------------------------------------------------------------------------------
1 | // Licensed to the Apache Software Foundation (ASF) under one
2 | // or more contributor license agreements. See the NOTICE file
3 | // distributed with this work for additional information
4 | // regarding copyright ownership. The ASF licenses this file
5 | // to you under the Apache License, Version 2.0 (the
6 | // "License"); you may not use this file except in compliance
7 | // with the License. You may obtain a copy of the License at
8 | //
9 | // http://www.apache.org/licenses/LICENSE-2.0
10 | //
11 | // Unless required by applicable law or agreed to in writing,
12 | // software distributed under the License is distributed on an
13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | // KIND, either express or implied. See the License for the
15 | // specific language governing permissions and limitations
16 | // under the License.
17 |
18 | use std::sync::OnceLock;
19 |
20 | use tokio::runtime::{Handle, Runtime};
21 |
22 | static RUNTIME: OnceLock = OnceLock::new();
23 |
24 | pub fn runtime() -> Handle {
25 | match Handle::try_current() {
26 | Ok(h) => h.clone(),
27 | _ => {
28 | let rt = RUNTIME.get_or_init(|| Runtime::new().unwrap());
29 | rt.handle().clone()
30 | }
31 | }
32 | }
33 |
--------------------------------------------------------------------------------
/crates/catalog/glue/Cargo.toml:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | [package]
19 | edition = { workspace = true }
20 | homepage = { workspace = true }
21 | name = "iceberg-catalog-glue"
22 | rust-version = { workspace = true }
23 | version = { workspace = true }
24 |
25 | categories = ["database"]
26 | description = "Apache Iceberg Glue Catalog Support"
27 | keywords = ["iceberg", "glue", "catalog"]
28 | license = { workspace = true }
29 | repository = { workspace = true }
30 |
31 | [dependencies]
32 | anyhow = { workspace = true }
33 | async-trait = { workspace = true }
34 | aws-config = { workspace = true }
35 | aws-sdk-glue = { workspace = true }
36 | iceberg = { workspace = true }
37 | serde_json = { workspace = true }
38 | tokio = { workspace = true }
39 | tracing = { workspace = true }
40 | typed-builder = { workspace = true }
41 | uuid = { workspace = true }
42 |
43 | [dev-dependencies]
44 | ctor = { workspace = true }
45 | iceberg_test_utils = { path = "../../test_utils", features = ["tests"] }
46 | port_scanner = { workspace = true }
47 |
--------------------------------------------------------------------------------
/crates/catalog/glue/README.md:
--------------------------------------------------------------------------------
1 |
19 |
20 | # Apache Iceberg Glue Catalog Official Native Rust Implementation
21 |
22 | [](https://crates.io/crates/iceberg-catalog-glue)
23 | [](https://docs.rs/iceberg/latest/iceberg-catalog-glue/)
24 |
25 | This crate contains the official Native Rust implementation of Apache Iceberg Glue Catalog.
26 |
27 | See the [API documentation](https://docs.rs/iceberg-catalog-glue/latest) for examples and the full API.
28 |
--------------------------------------------------------------------------------
/crates/catalog/glue/src/error.rs:
--------------------------------------------------------------------------------
1 | // Licensed to the Apache Software Foundation (ASF) under one
2 | // or more contributor license agreements. See the NOTICE file
3 | // distributed with this work for additional information
4 | // regarding copyright ownership. The ASF licenses this file
5 | // to you under the Apache License, Version 2.0 (the
6 | // "License"); you may not use this file except in compliance
7 | // with the License. You may obtain a copy of the License at
8 | //
9 | // http://www.apache.org/licenses/LICENSE-2.0
10 | //
11 | // Unless required by applicable law or agreed to in writing,
12 | // software distributed under the License is distributed on an
13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | // KIND, either express or implied. See the License for the
15 | // specific language governing permissions and limitations
16 | // under the License.
17 |
18 | use std::fmt::Debug;
19 |
20 | use anyhow::anyhow;
21 | use iceberg::{Error, ErrorKind};
22 |
23 | /// Format AWS SDK error into iceberg error
24 | pub(crate) fn from_aws_sdk_error(error: aws_sdk_glue::error::SdkError) -> Error
25 | where T: Debug {
26 | Error::new(
27 | ErrorKind::Unexpected,
28 | "Operation failed for hitting aws sdk error".to_string(),
29 | )
30 | .with_source(anyhow!("aws sdk error: {:?}", error))
31 | }
32 |
33 | /// Format AWS Build error into iceberg error
34 | pub(crate) fn from_aws_build_error(error: aws_sdk_glue::error::BuildError) -> Error {
35 | Error::new(
36 | ErrorKind::Unexpected,
37 | "Operation failed for hitting aws build error".to_string(),
38 | )
39 | .with_source(anyhow!("aws build error: {:?}", error))
40 | }
41 |
--------------------------------------------------------------------------------
/crates/catalog/glue/src/lib.rs:
--------------------------------------------------------------------------------
1 | // Licensed to the Apache Software Foundation (ASF) under one
2 | // or more contributor license agreements. See the NOTICE file
3 | // distributed with this work for additional information
4 | // regarding copyright ownership. The ASF licenses this file
5 | // to you under the Apache License, Version 2.0 (the
6 | // "License"); you may not use this file except in compliance
7 | // with the License. You may obtain a copy of the License at
8 | //
9 | // http://www.apache.org/licenses/LICENSE-2.0
10 | //
11 | // Unless required by applicable law or agreed to in writing,
12 | // software distributed under the License is distributed on an
13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | // KIND, either express or implied. See the License for the
15 | // specific language governing permissions and limitations
16 | // under the License.
17 |
18 | //! Iceberg Glue Catalog implementation.
19 |
20 | #![deny(missing_docs)]
21 |
22 | mod catalog;
23 | mod error;
24 | mod schema;
25 | mod utils;
26 | pub use catalog::*;
27 | pub use utils::{
28 | AWS_ACCESS_KEY_ID, AWS_PROFILE_NAME, AWS_REGION_NAME, AWS_SECRET_ACCESS_KEY, AWS_SESSION_TOKEN,
29 | };
30 |
--------------------------------------------------------------------------------
/crates/catalog/glue/testdata/glue_catalog/docker-compose.yaml:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | services:
19 | minio:
20 | image: minio/minio:RELEASE.2025-05-24T17-08-30Z
21 | expose:
22 | - 9000
23 | - 9001
24 | environment:
25 | - MINIO_ROOT_USER=admin
26 | - MINIO_ROOT_PASSWORD=password
27 | - MINIO_DOMAIN=minio
28 | command: [ "server", "/data", "--console-address", ":9001" ]
29 |
30 | mc:
31 | depends_on:
32 | - minio
33 | image: minio/mc:RELEASE.2025-05-21T01-59-54Z
34 | environment:
35 | - AWS_ACCESS_KEY_ID=admin
36 | - AWS_SECRET_ACCESS_KEY=password
37 | - AWS_REGION=us-east-1
38 | entrypoint: >
39 | /bin/sh -c " until (/usr/bin/mc alias set minio http://minio:9000 admin password) do echo '...waiting...' && sleep 1; done; /usr/bin/mc mb minio/warehouse; /usr/bin/mc policy set public minio/warehouse; tail -f /dev/null "
40 |
41 | moto:
42 | image: motoserver/moto:5.0.3
43 | expose:
44 | - 5000
45 |
--------------------------------------------------------------------------------
/crates/catalog/hms/Cargo.toml:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | [package]
19 | edition = { workspace = true }
20 | homepage = { workspace = true }
21 | name = "iceberg-catalog-hms"
22 | rust-version = { workspace = true }
23 | version = { workspace = true }
24 |
25 | categories = ["database"]
26 | description = "Apache Iceberg Hive Metastore Catalog Support"
27 | keywords = ["iceberg", "hive", "catalog"]
28 | license = { workspace = true }
29 | repository = { workspace = true }
30 |
31 | [dependencies]
32 | anyhow = { workspace = true }
33 | async-trait = { workspace = true }
34 | chrono = { workspace = true }
35 | hive_metastore = { workspace = true }
36 | iceberg = { workspace = true }
37 | pilota = { workspace = true }
38 | serde_json = { workspace = true }
39 | tokio = { workspace = true }
40 | tracing = { workspace = true }
41 | typed-builder = { workspace = true }
42 | uuid = { workspace = true }
43 | volo-thrift = { workspace = true }
44 |
45 | # Transitive dependencies below
46 |
47 | # Some dependencies don't correctly specify a minimal version for their dependencies and will fail to build in minimal versions build.
48 | # So we specify the version of these transitive dependencies here.
49 | # They can be removed when the direct dependencies are updated.
50 |
51 | # transitive dependencies of pilota/volo-thrift
52 | faststr = { workspace = true }
53 | linkedbytes = { workspace = true }
54 | metainfo = { workspace = true }
55 | motore-macros = { workspace = true }
56 | volo = { workspace = true }
57 |
58 | [dev-dependencies]
59 | ctor = { workspace = true }
60 | iceberg_test_utils = { path = "../../test_utils", features = ["tests"] }
61 | port_scanner = { workspace = true }
62 |
63 | [package.metadata.cargo-machete]
64 | # These dependencies are added to ensure minimal dependency version
65 | ignored = ["faststr", "linkedbytes", "metainfo", "volo", "motore-macros"]
66 |
--------------------------------------------------------------------------------
/crates/catalog/hms/README.md:
--------------------------------------------------------------------------------
1 |
19 |
20 | # Apache Iceberg HiveMetaStore Catalog Official Native Rust Implementation
21 |
22 | [](https://crates.io/crates/iceberg-catalog-hms)
23 | [](https://docs.rs/iceberg/latest/iceberg-catalog-hms/)
24 |
25 | This crate contains the official Native Rust implementation of Apache Iceberg HiveMetaStore Catalog.
26 |
27 | See the [API documentation](https://docs.rs/iceberg-catalog-hms/latest) for examples and the full API.
28 |
--------------------------------------------------------------------------------
/crates/catalog/hms/src/error.rs:
--------------------------------------------------------------------------------
1 | // Licensed to the Apache Software Foundation (ASF) under one
2 | // or more contributor license agreements. See the NOTICE file
3 | // distributed with this work for additional information
4 | // regarding copyright ownership. The ASF licenses this file
5 | // to you under the Apache License, Version 2.0 (the
6 | // "License"); you may not use this file except in compliance
7 | // with the License. You may obtain a copy of the License at
8 | //
9 | // http://www.apache.org/licenses/LICENSE-2.0
10 | //
11 | // Unless required by applicable law or agreed to in writing,
12 | // software distributed under the License is distributed on an
13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | // KIND, either express or implied. See the License for the
15 | // specific language governing permissions and limitations
16 | // under the License.
17 |
18 | use std::fmt::Debug;
19 | use std::io;
20 |
21 | use anyhow::anyhow;
22 | use iceberg::{Error, ErrorKind};
23 | use volo_thrift::MaybeException;
24 |
25 | /// Format a thrift error into iceberg error.
26 | ///
27 | /// Please only throw this error when you are sure that the error is caused by thrift.
28 | pub fn from_thrift_error(error: impl std::error::Error) -> Error {
29 | Error::new(
30 | ErrorKind::Unexpected,
31 | "Operation failed for hitting thrift error".to_string(),
32 | )
33 | .with_source(anyhow!("thrift error: {:?}", error))
34 | }
35 |
36 | /// Format a thrift exception into iceberg error.
37 | pub fn from_thrift_exception(value: MaybeException) -> Result {
38 | match value {
39 | MaybeException::Ok(v) => Ok(v),
40 | MaybeException::Exception(err) => Err(Error::new(
41 | ErrorKind::Unexpected,
42 | "Operation failed for hitting thrift error".to_string(),
43 | )
44 | .with_source(anyhow!("thrift error: {:?}", err))),
45 | }
46 | }
47 |
48 | /// Format an io error into iceberg error.
49 | pub fn from_io_error(error: io::Error) -> Error {
50 | Error::new(
51 | ErrorKind::Unexpected,
52 | "Operation failed for hitting io error".to_string(),
53 | )
54 | .with_source(error)
55 | }
56 |
--------------------------------------------------------------------------------
/crates/catalog/hms/src/lib.rs:
--------------------------------------------------------------------------------
1 | // Licensed to the Apache Software Foundation (ASF) under one
2 | // or more contributor license agreements. See the NOTICE file
3 | // distributed with this work for additional information
4 | // regarding copyright ownership. The ASF licenses this file
5 | // to you under the Apache License, Version 2.0 (the
6 | // "License"); you may not use this file except in compliance
7 | // with the License. You may obtain a copy of the License at
8 | //
9 | // http://www.apache.org/licenses/LICENSE-2.0
10 | //
11 | // Unless required by applicable law or agreed to in writing,
12 | // software distributed under the License is distributed on an
13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | // KIND, either express or implied. See the License for the
15 | // specific language governing permissions and limitations
16 | // under the License.
17 |
18 | //! Iceberg Hive Metastore Catalog implementation.
19 |
20 | #![deny(missing_docs)]
21 |
22 | mod catalog;
23 | pub use catalog::*;
24 |
25 | mod error;
26 | mod schema;
27 | mod utils;
28 |
--------------------------------------------------------------------------------
/crates/catalog/hms/testdata/hms_catalog/Dockerfile:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one or more
2 | # contributor license agreements. See the NOTICE file distributed with
3 | # this work for additional information regarding copyright ownership.
4 | # The ASF licenses this file to You under the Apache License, Version 2.0
5 | # (the "License"); you may not use this file except in compliance with
6 | # the License. You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | FROM --platform=$BUILDPLATFORM openjdk:8-jre-slim AS build
17 |
18 | ARG BUILDPLATFORM
19 |
20 | RUN apt-get update -qq && apt-get -qq -y install curl
21 |
22 | ENV AWSSDK_VERSION=2.20.18
23 | ENV HADOOP_VERSION=3.1.0
24 |
25 | RUN curl https://repo1.maven.org/maven2/com/amazonaws/aws-java-sdk-bundle/1.11.271/aws-java-sdk-bundle-1.11.271.jar -Lo /tmp/aws-java-sdk-bundle-1.11.271.jar
26 | RUN curl https://repo1.maven.org/maven2/org/apache/hadoop/hadoop-aws/${HADOOP_VERSION}/hadoop-aws-${HADOOP_VERSION}.jar -Lo /tmp/hadoop-aws-${HADOOP_VERSION}.jar
27 |
28 |
29 | FROM apache/hive:3.1.3
30 |
31 | ENV AWSSDK_VERSION=2.20.18
32 | ENV HADOOP_VERSION=3.1.0
33 |
34 | COPY --from=build /tmp/hadoop-aws-${HADOOP_VERSION}.jar /opt/hive/lib/hadoop-aws-${HADOOP_VERSION}.jar
35 | COPY --from=build /tmp/aws-java-sdk-bundle-1.11.271.jar /opt/hive/lib/aws-java-sdk-bundle-1.11.271.jar
36 | COPY core-site.xml /opt/hadoop/etc/hadoop/core-site.xml
--------------------------------------------------------------------------------
/crates/catalog/hms/testdata/hms_catalog/core-site.xml:
--------------------------------------------------------------------------------
1 |
17 |
18 |
19 |
20 | fs.defaultFS
21 | s3a://warehouse/hive
22 |
23 |
24 | fs.s3a.impl
25 | org.apache.hadoop.fs.s3a.S3AFileSystem
26 |
27 |
28 | fs.s3a.fast.upload
29 | true
30 |
31 |
32 | fs.s3a.endpoint
33 | http://minio:9000
34 |
35 |
36 | fs.s3a.access.key
37 | admin
38 |
39 |
40 | fs.s3a.secret.key
41 | password
42 |
43 |
44 | fs.s3a.connection.ssl.enabled
45 | false
46 |
47 |
48 | fs.s3a.path.style.access
49 | true
50 |
51 |
--------------------------------------------------------------------------------
/crates/catalog/hms/testdata/hms_catalog/docker-compose.yaml:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | services:
19 | minio:
20 | image: minio/minio:RELEASE.2025-05-24T17-08-30Z
21 | expose:
22 | - 9000
23 | - 9001
24 | environment:
25 | - MINIO_ROOT_USER=admin
26 | - MINIO_ROOT_PASSWORD=password
27 | - MINIO_DOMAIN=minio
28 | command: [ "server", "/data", "--console-address", ":9001" ]
29 |
30 | mc:
31 | depends_on:
32 | - minio
33 | image: minio/mc:RELEASE.2025-05-21T01-59-54Z
34 | environment:
35 | - AWS_ACCESS_KEY_ID=admin
36 | - AWS_SECRET_ACCESS_KEY=password
37 | - AWS_REGION=us-east-1
38 | entrypoint: >
39 | /bin/sh -c " until (/usr/bin/mc alias set minio http://minio:9000 admin password) do echo '...waiting...' && sleep 1; done; /usr/bin/mc mb minio/warehouse; /usr/bin/mc policy set public minio/warehouse; tail -f /dev/null "
40 |
41 | hive-metastore:
42 | image: iceberg-hive-metastore
43 | build: ./
44 | platform: ${DOCKER_DEFAULT_PLATFORM}
45 | expose:
46 | - 9083
47 | environment:
48 | SERVICE_NAME: "metastore"
49 | SERVICE_OPTS: "-Dmetastore.warehouse.dir=s3a://warehouse/hive/"
50 |
--------------------------------------------------------------------------------
/crates/catalog/memory/Cargo.toml:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | [package]
19 | edition = { workspace = true }
20 | homepage = { workspace = true }
21 | name = "iceberg-catalog-memory"
22 | rust-version = { workspace = true }
23 | version = { workspace = true }
24 |
25 | categories = ["database"]
26 | description = "Apache Iceberg Rust Memory Catalog API"
27 | keywords = ["iceberg", "memory", "catalog"]
28 | license = { workspace = true }
29 | repository = { workspace = true }
30 |
31 | [dependencies]
32 | async-trait = { workspace = true }
33 | futures = { workspace = true }
34 | iceberg = { workspace = true }
35 | itertools = { workspace = true }
36 | serde_json = { workspace = true }
37 | uuid = { workspace = true, features = ["v4"] }
38 |
39 | [dev-dependencies]
40 | regex = { workspace = true }
41 | tempfile = { workspace = true }
42 | tokio = { workspace = true }
43 |
--------------------------------------------------------------------------------
/crates/catalog/memory/README.md:
--------------------------------------------------------------------------------
1 |
19 |
20 | # Apache Iceberg Memory Catalog Official Native Rust Implementation
21 |
22 | [](https://crates.io/crates/iceberg-catalog-memory)
23 | [](https://docs.rs/iceberg/latest/iceberg-catalog-memory/)
24 |
25 | This crate contains the official Native Rust implementation of Apache Iceberg Memory Catalog.
26 |
27 | See the [API documentation](https://docs.rs/iceberg-catalog-memory/latest) for examples and the full API.
28 |
--------------------------------------------------------------------------------
/crates/catalog/memory/src/lib.rs:
--------------------------------------------------------------------------------
1 | // Licensed to the Apache Software Foundation (ASF) under one
2 | // or more contributor license agreements. See the NOTICE file
3 | // distributed with this work for additional information
4 | // regarding copyright ownership. The ASF licenses this file
5 | // to you under the Apache License, Version 2.0 (the
6 | // "License"); you may not use this file except in compliance
7 | // with the License. You may obtain a copy of the License at
8 | //
9 | // http://www.apache.org/licenses/LICENSE-2.0
10 | //
11 | // Unless required by applicable law or agreed to in writing,
12 | // software distributed under the License is distributed on an
13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | // KIND, either express or implied. See the License for the
15 | // specific language governing permissions and limitations
16 | // under the License.
17 |
18 | //! Iceberg memory Catalog API implementation.
19 |
20 | #![deny(missing_docs)]
21 |
22 | mod catalog;
23 | mod namespace_state;
24 |
25 | pub use catalog::*;
26 |
--------------------------------------------------------------------------------
/crates/catalog/rest/Cargo.toml:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | [package]
19 | edition = { workspace = true }
20 | homepage = { workspace = true }
21 | name = "iceberg-catalog-rest"
22 | rust-version = { workspace = true }
23 | version = { workspace = true }
24 |
25 | categories = ["database"]
26 | description = "Apache Iceberg Rust REST API"
27 | keywords = ["iceberg", "rest", "catalog"]
28 | license = { workspace = true }
29 | repository = { workspace = true }
30 |
31 | [dependencies]
32 | async-trait = { workspace = true }
33 | chrono = { workspace = true }
34 | http = { workspace = true }
35 | iceberg = { workspace = true }
36 | itertools = { workspace = true }
37 | reqwest = { workspace = true }
38 | serde = { workspace = true }
39 | serde_derive = { workspace = true }
40 | serde_json = { workspace = true }
41 | tokio = { workspace = true, features = ["sync"] }
42 | tracing = { workspace = true }
43 | typed-builder = { workspace = true }
44 | uuid = { workspace = true, features = ["v4"] }
45 |
46 | [dev-dependencies]
47 | ctor = { workspace = true }
48 | iceberg_test_utils = { path = "../../test_utils", features = ["tests"] }
49 | mockito = { workspace = true }
50 | port_scanner = { workspace = true }
51 | tokio = { workspace = true }
52 |
--------------------------------------------------------------------------------
/crates/catalog/rest/README.md:
--------------------------------------------------------------------------------
1 |
19 |
20 | # Apache Iceberg Rest Catalog Official Native Rust Implementation
21 |
22 | [](https://crates.io/crates/iceberg-catalog-rest)
23 | [](https://docs.rs/iceberg/latest/iceberg-catalog-rest/)
24 |
25 | This crate contains the official Native Rust implementation of Apache Iceberg Rest Catalog.
26 |
27 | See the [API documentation](https://docs.rs/iceberg-catalog-rest/latest) for examples and the full API.
28 |
--------------------------------------------------------------------------------
/crates/catalog/rest/src/lib.rs:
--------------------------------------------------------------------------------
1 | // Licensed to the Apache Software Foundation (ASF) under one
2 | // or more contributor license agreements. See the NOTICE file
3 | // distributed with this work for additional information
4 | // regarding copyright ownership. The ASF licenses this file
5 | // to you under the Apache License, Version 2.0 (the
6 | // "License"); you may not use this file except in compliance
7 | // with the License. You may obtain a copy of the License at
8 | //
9 | // http://www.apache.org/licenses/LICENSE-2.0
10 | //
11 | // Unless required by applicable law or agreed to in writing,
12 | // software distributed under the License is distributed on an
13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | // KIND, either express or implied. See the License for the
15 | // specific language governing permissions and limitations
16 | // under the License.
17 |
18 | //! Iceberg REST API implementation.
19 |
20 | #![deny(missing_docs)]
21 |
22 | mod catalog;
23 | mod client;
24 | mod types;
25 |
26 | pub use catalog::*;
27 |
--------------------------------------------------------------------------------
/crates/catalog/rest/testdata/create_table_response.json:
--------------------------------------------------------------------------------
1 | {
2 | "metadata-location": "s3://warehouse/database/table/metadata.json",
3 | "metadata": {
4 | "format-version": 1,
5 | "table-uuid": "bf289591-dcc0-4234-ad4f-5c3eed811a29",
6 | "location": "s3://warehouse/database/table",
7 | "last-updated-ms": 1657810967051,
8 | "last-column-id": 3,
9 | "schema": {
10 | "type": "struct",
11 | "schema-id": 0,
12 | "identifier-field-ids": [2],
13 | "fields": [
14 | {"id": 1, "name": "foo", "required": false, "type": "string"},
15 | {"id": 2, "name": "bar", "required": true, "type": "int"},
16 | {"id": 3, "name": "baz", "required": false, "type": "boolean"}
17 | ]
18 | },
19 | "current-schema-id": 0,
20 | "schemas": [
21 | {
22 | "type": "struct",
23 | "schema-id": 0,
24 | "identifier-field-ids": [2],
25 | "fields": [
26 | {"id": 1, "name": "foo", "required": false, "type": "string"},
27 | {"id": 2, "name": "bar", "required": true, "type": "int"},
28 | {"id": 3, "name": "baz", "required": false, "type": "boolean"}
29 | ]
30 | }
31 | ],
32 | "partition-spec": [],
33 | "default-spec-id": 0,
34 | "last-partition-id": 999,
35 | "default-sort-order-id": 0,
36 | "sort-orders": [{"order-id": 0, "fields": []}],
37 | "properties": {
38 | "write.delete.parquet.compression-codec": "zstd",
39 | "write.metadata.compression-codec": "gzip",
40 | "write.summary.partition-limit": "100",
41 | "write.parquet.compression-codec": "zstd"
42 | },
43 | "current-snapshot-id": -1,
44 | "refs": {},
45 | "snapshots": [],
46 | "snapshot-log": [],
47 | "metadata-log": []
48 | },
49 | "config": {
50 | "client.factory": "io.tabular.iceberg.catalog.TabularAwsClientFactory",
51 | "region": "us-west-2"
52 | }
53 | }
--------------------------------------------------------------------------------
/crates/catalog/rest/testdata/load_table_response.json:
--------------------------------------------------------------------------------
1 | {
2 | "metadata-location": "s3://warehouse/database/table/metadata/00001-5f2f8166-244c-4eae-ac36-384ecdec81fc.gz.metadata.json",
3 | "metadata": {
4 | "format-version": 1,
5 | "table-uuid": "b55d9dda-6561-423a-8bfc-787980ce421f",
6 | "location": "s3://warehouse/database/table",
7 | "last-updated-ms": 1646787054459,
8 | "last-column-id": 2,
9 | "schema": {
10 | "type": "struct",
11 | "schema-id": 0,
12 | "fields": [
13 | {"id": 1, "name": "id", "required": false, "type": "int"},
14 | {"id": 2, "name": "data", "required": false, "type": "string"}
15 | ]
16 | },
17 | "current-schema-id": 0,
18 | "schemas": [
19 | {
20 | "type": "struct",
21 | "schema-id": 0,
22 | "fields": [
23 | {"id": 1, "name": "id", "required": false, "type": "int"},
24 | {"id": 2, "name": "data", "required": false, "type": "string"}
25 | ]
26 | }
27 | ],
28 | "partition-spec": [],
29 | "default-spec-id": 0,
30 | "partition-specs": [{"spec-id": 0, "fields": []}],
31 | "last-partition-id": 999,
32 | "default-sort-order-id": 0,
33 | "sort-orders": [{"order-id": 0, "fields": []}],
34 | "properties": {"owner": "bryan", "write.metadata.compression-codec": "gzip"},
35 | "current-snapshot-id": 3497810964824022504,
36 | "refs": {"main": {"snapshot-id": 3497810964824022504, "type": "branch"}},
37 | "snapshots": [
38 | {
39 | "snapshot-id": 3497810964824022504,
40 | "timestamp-ms": 1646787054459,
41 | "summary": {
42 | "operation": "append",
43 | "spark.app.id": "local-1646787004168",
44 | "added-data-files": "1",
45 | "added-records": "1",
46 | "added-files-size": "697",
47 | "changed-partition-count": "1",
48 | "total-records": "1",
49 | "total-files-size": "697",
50 | "total-data-files": "1",
51 | "total-delete-files": "0",
52 | "total-position-deletes": "0",
53 | "total-equality-deletes": "0"
54 | },
55 | "manifest-list": "s3://warehouse/database/table/metadata/snap-3497810964824022504-1-c4f68204-666b-4e50-a9df-b10c34bf6b82.avro",
56 | "schema-id": 0
57 | }
58 | ],
59 | "snapshot-log": [{"timestamp-ms": 1646787054459, "snapshot-id": 3497810964824022504}],
60 | "metadata-log": [
61 | {
62 | "timestamp-ms": 1646787031514,
63 | "metadata-file": "s3://warehouse/database/table/metadata/00000-88484a1c-00e5-4a07-a787-c0e7aeffa805.gz.metadata.json"
64 | }
65 | ]
66 | },
67 | "config": {"client.factory": "io.tabular.iceberg.catalog.TabularAwsClientFactory", "region": "us-west-2"}
68 | }
--------------------------------------------------------------------------------
/crates/catalog/rest/testdata/rest_catalog/docker-compose.yaml:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | networks:
19 | rest_bridge:
20 |
21 | services:
22 | rest:
23 | image: apache/iceberg-rest-fixture
24 | environment:
25 | - AWS_ACCESS_KEY_ID=admin
26 | - AWS_SECRET_ACCESS_KEY=password
27 | - AWS_REGION=us-east-1
28 | - CATALOG_CATALOG__IMPL=org.apache.iceberg.jdbc.JdbcCatalog
29 | - CATALOG_URI=jdbc:sqlite:file:/tmp/iceberg_rest_mode=memory
30 | - CATALOG_WAREHOUSE=s3://icebergdata/demo
31 | - CATALOG_IO__IMPL=org.apache.iceberg.aws.s3.S3FileIO
32 | - CATALOG_S3_ENDPOINT=http://minio:9000
33 | depends_on:
34 | - minio
35 | networks:
36 | rest_bridge:
37 | expose:
38 | - 8181
39 |
40 | minio:
41 | image: minio/minio:RELEASE.2025-05-24T17-08-30Z
42 | environment:
43 | - MINIO_ROOT_USER=admin
44 | - MINIO_ROOT_PASSWORD=password
45 | - MINIO_DOMAIN=minio
46 | hostname: icebergdata.minio
47 | networks:
48 | rest_bridge:
49 | expose:
50 | - 9001
51 | - 9000
52 | command: ["server", "/data", "--console-address", ":9001"]
53 |
54 | mc:
55 | depends_on:
56 | - minio
57 | image: minio/mc:RELEASE.2025-05-21T01-59-54Z
58 | environment:
59 | - AWS_ACCESS_KEY_ID=admin
60 | - AWS_SECRET_ACCESS_KEY=password
61 | - AWS_REGION=us-east-1
62 | entrypoint: >
63 | /bin/sh -c " until (/usr/bin/mc alias set minio http://minio:9000 admin password) do echo '...waiting...' && sleep 1; done; /usr/bin/mc rm -r --force minio/icebergdata; /usr/bin/mc mb minio/icebergdata; /usr/bin/mc policy set public minio/icebergdata; tail -f /dev/null "
64 | networks:
65 | rest_bridge:
66 |
--------------------------------------------------------------------------------
/crates/catalog/rest/testdata/update_table_response.json:
--------------------------------------------------------------------------------
1 | {
2 | "metadata-location": "s3://warehouse/database/table/metadata.json",
3 | "metadata": {
4 | "format-version": 2,
5 | "table-uuid": "bf289591-dcc0-4234-ad4f-5c3eed811a29",
6 | "location": "s3://warehouse/database/table",
7 | "last-sequence-number" : 1,
8 | "last-updated-ms": 1657810967051,
9 | "last-column-id": 3,
10 | "current-schema-id": 0,
11 | "schemas": [
12 | {
13 | "type": "struct",
14 | "schema-id": 0,
15 | "identifier-field-ids": [2],
16 | "fields": [
17 | {"id": 1, "name": "foo", "required": false, "type": "string"},
18 | {"id": 2, "name": "bar", "required": true, "type": "int"},
19 | {"id": 3, "name": "baz", "required": false, "type": "boolean"}
20 | ]
21 | }
22 | ],
23 | "partition-specs": [],
24 | "default-spec-id": 0,
25 | "last-partition-id": 999,
26 | "default-sort-order-id": 0,
27 | "sort-orders": [{"order-id": 0, "fields": []}],
28 | "properties": {
29 | "write.delete.parquet.compression-codec": "zstd",
30 | "write.metadata.compression-codec": "gzip",
31 | "write.summary.partition-limit": "100",
32 | "write.parquet.compression-codec": "zstd"
33 | },
34 | "current-snapshot-id": -1,
35 | "refs": {},
36 | "snapshots": [],
37 | "snapshot-log": [],
38 | "metadata-log": []
39 | }
40 | }
--------------------------------------------------------------------------------
/crates/catalog/s3tables/Cargo.toml:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | [package]
19 | edition = { workspace = true }
20 | homepage = { workspace = true }
21 | name = "iceberg-catalog-s3tables"
22 | rust-version = { workspace = true }
23 | version = { workspace = true }
24 |
25 | categories = ["database"]
26 | description = "Apache Iceberg Rust S3Tables Catalog"
27 | keywords = ["iceberg", "sql", "catalog"]
28 | license = { workspace = true }
29 | repository = { workspace = true }
30 |
31 | [dependencies]
32 | anyhow = { workspace = true }
33 | async-trait = { workspace = true }
34 | aws-config = { workspace = true }
35 | aws-sdk-s3tables = "1.10.0"
36 | iceberg = { workspace = true }
37 | serde_json = { workspace = true }
38 | typed-builder = { workspace = true }
39 | uuid = { workspace = true, features = ["v4"] }
40 |
41 | [dev-dependencies]
42 | iceberg_test_utils = { path = "../../test_utils", features = ["tests"] }
43 | itertools = { workspace = true }
44 | tokio = { workspace = true }
45 |
--------------------------------------------------------------------------------
/crates/catalog/s3tables/src/lib.rs:
--------------------------------------------------------------------------------
1 | // Licensed to the Apache Software Foundation (ASF) under one
2 | // or more contributor license agreements. See the NOTICE file
3 | // distributed with this work for additional information
4 | // regarding copyright ownership. The ASF licenses this file
5 | // to you under the Apache License, Version 2.0 (the
6 | // "License"); you may not use this file except in compliance
7 | // with the License. You may obtain a copy of the License at
8 | //
9 | // http://www.apache.org/licenses/LICENSE-2.0
10 | //
11 | // Unless required by applicable law or agreed to in writing,
12 | // software distributed under the License is distributed on an
13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | // KIND, either express or implied. See the License for the
15 | // specific language governing permissions and limitations
16 | // under the License.
17 |
18 | //! Iceberg s3tables catalog implementation.
19 |
20 | #![deny(missing_docs)]
21 |
22 | mod catalog;
23 | mod utils;
24 |
25 | pub use catalog::*;
26 |
--------------------------------------------------------------------------------
/crates/catalog/sql/Cargo.toml:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | [package]
19 | edition = { workspace = true }
20 | homepage = { workspace = true }
21 | name = "iceberg-catalog-sql"
22 | rust-version = { workspace = true }
23 | version = { workspace = true }
24 |
25 | categories = ["database"]
26 | description = "Apache Iceberg Rust Sql Catalog"
27 | keywords = ["iceberg", "sql", "catalog"]
28 | license = { workspace = true }
29 | repository = { workspace = true }
30 |
31 | [dependencies]
32 | async-trait = { workspace = true }
33 | iceberg = { workspace = true }
34 | serde_json = { workspace = true }
35 | sqlx = { version = "0.8.1", features = ["any"], default-features = false }
36 | typed-builder = { workspace = true }
37 | uuid = { workspace = true, features = ["v4"] }
38 |
39 | [dev-dependencies]
40 | iceberg_test_utils = { path = "../../test_utils", features = ["tests"] }
41 | itertools = { workspace = true }
42 | regex = "1.10.5"
43 | sqlx = { version = "0.8.1", features = [
44 | "tls-rustls",
45 | "runtime-tokio",
46 | "any",
47 | "sqlite",
48 | "migrate",
49 | ], default-features = false }
50 | tempfile = { workspace = true }
51 | tokio = { workspace = true }
52 |
--------------------------------------------------------------------------------
/crates/catalog/sql/src/error.rs:
--------------------------------------------------------------------------------
1 | // Licensed to the Apache Software Foundation (ASF) under one
2 | // or more contributor license agreements. See the NOTICE file
3 | // distributed with this work for additional information
4 | // regarding copyright ownership. The ASF licenses this file
5 | // to you under the Apache License, Version 2.0 (the
6 | // "License"); you may not use this file except in compliance
7 | // with the License. You may obtain a copy of the License at
8 | //
9 | // http://www.apache.org/licenses/LICENSE-2.0
10 | //
11 | // Unless required by applicable law or agreed to in writing,
12 | // software distributed under the License is distributed on an
13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | // KIND, either express or implied. See the License for the
15 | // specific language governing permissions and limitations
16 | // under the License.
17 |
18 | use iceberg::{Error, ErrorKind, NamespaceIdent, Result, TableIdent};
19 |
20 | /// Format an sqlx error into iceberg error.
21 | pub fn from_sqlx_error(error: sqlx::Error) -> Error {
22 | Error::new(
23 | ErrorKind::Unexpected,
24 | "operation failed for hitting sqlx error".to_string(),
25 | )
26 | .with_source(error)
27 | }
28 |
29 | pub fn no_such_namespace_err(namespace: &NamespaceIdent) -> Result {
30 | Err(Error::new(
31 | ErrorKind::Unexpected,
32 | format!("No such namespace: {:?}", namespace),
33 | ))
34 | }
35 |
36 | pub fn no_such_table_err(table_ident: &TableIdent) -> Result {
37 | Err(Error::new(
38 | ErrorKind::Unexpected,
39 | format!("No such table: {:?}", table_ident),
40 | ))
41 | }
42 |
43 | pub fn table_already_exists_err(table_ident: &TableIdent) -> Result {
44 | Err(Error::new(
45 | ErrorKind::Unexpected,
46 | format!("Table {:?} already exists.", table_ident),
47 | ))
48 | }
49 |
--------------------------------------------------------------------------------
/crates/catalog/sql/src/lib.rs:
--------------------------------------------------------------------------------
1 | // Licensed to the Apache Software Foundation (ASF) under one
2 | // or more contributor license agreements. See the NOTICE file
3 | // distributed with this work for additional information
4 | // regarding copyright ownership. The ASF licenses this file
5 | // to you under the Apache License, Version 2.0 (the
6 | // "License"); you may not use this file except in compliance
7 | // with the License. You may obtain a copy of the License at
8 | //
9 | // http://www.apache.org/licenses/LICENSE-2.0
10 | //
11 | // Unless required by applicable law or agreed to in writing,
12 | // software distributed under the License is distributed on an
13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | // KIND, either express or implied. See the License for the
15 | // specific language governing permissions and limitations
16 | // under the License.
17 |
18 | //! Iceberg sql catalog implementation.
19 |
20 | #![deny(missing_docs)]
21 |
22 | mod catalog;
23 | mod error;
24 | pub use catalog::*;
25 |
--------------------------------------------------------------------------------
/crates/examples/Cargo.toml:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | [package]
19 | edition = { workspace = true }
20 | homepage = { workspace = true }
21 | license = { workspace = true }
22 | name = "iceberg-examples"
23 | repository = { workspace = true }
24 | rust-version = { workspace = true }
25 | version = { workspace = true }
26 |
27 | [dependencies]
28 | futures = { workspace = true }
29 | iceberg = { workspace = true }
30 | iceberg-catalog-rest = { workspace = true }
31 | tokio = { workspace = true, features = ["full"] }
32 |
33 | [[example]]
34 | name = "rest-catalog-namespace"
35 | path = "src/rest_catalog_namespace.rs"
36 |
37 | [[example]]
38 | name = "rest-catalog-table"
39 | path = "src/rest_catalog_table.rs"
40 |
41 | [[example]]
42 | name = "oss-backend"
43 | path = "src/oss_backend.rs"
44 | required-features = ["storage-oss"]
45 |
46 | [features]
47 | default = []
48 | storage-oss = ["iceberg/storage-oss"]
49 |
--------------------------------------------------------------------------------
/crates/examples/README.md:
--------------------------------------------------------------------------------
1 |
19 |
20 | Example usage codes for `iceberg-rust`. Currently, these examples can't run directly since it requires setting up of
21 | environments for catalogs, for example, rest catalog server.
--------------------------------------------------------------------------------
/crates/iceberg/src/arrow/mod.rs:
--------------------------------------------------------------------------------
1 | // Licensed to the Apache Software Foundation (ASF) under one
2 | // or more contributor license agreements. See the NOTICE file
3 | // distributed with this work for additional information
4 | // regarding copyright ownership. The ASF licenses this file
5 | // to you under the Apache License, Version 2.0 (the
6 | // "License"); you may not use this file except in compliance
7 | // with the License. You may obtain a copy of the License at
8 | //
9 | // http://www.apache.org/licenses/LICENSE-2.0
10 | //
11 | // Unless required by applicable law or agreed to in writing,
12 | // software distributed under the License is distributed on an
13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | // KIND, either express or implied. See the License for the
15 | // specific language governing permissions and limitations
16 | // under the License.
17 |
18 | //! Conversion between Iceberg and Arrow schema
19 |
20 | mod schema;
21 | pub use schema::*;
22 |
23 | mod nan_val_cnt_visitor;
24 | pub(crate) use nan_val_cnt_visitor::*;
25 |
26 | pub(crate) mod delete_file_manager;
27 |
28 | mod reader;
29 | pub(crate) mod record_batch_projector;
30 | pub(crate) mod record_batch_transformer;
31 | mod value;
32 | pub use reader::*;
33 | pub use value::*;
34 |
--------------------------------------------------------------------------------
/crates/iceberg/src/avro/mod.rs:
--------------------------------------------------------------------------------
1 | // Licensed to the Apache Software Foundation (ASF) under one
2 | // or more contributor license agreements. See the NOTICE file
3 | // distributed with this work for additional information
4 | // regarding copyright ownership. The ASF licenses this file
5 | // to you under the Apache License, Version 2.0 (the
6 | // "License"); you may not use this file except in compliance
7 | // with the License. You may obtain a copy of the License at
8 | //
9 | // http://www.apache.org/licenses/LICENSE-2.0
10 | //
11 | // Unless required by applicable law or agreed to in writing,
12 | // software distributed under the License is distributed on an
13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | // KIND, either express or implied. See the License for the
15 | // specific language governing permissions and limitations
16 | // under the License.
17 |
18 | //! Avro related codes.
19 | mod schema;
20 | pub(crate) use schema::*;
21 |
--------------------------------------------------------------------------------
/crates/iceberg/src/cache.rs:
--------------------------------------------------------------------------------
1 | // Licensed to the Apache Software Foundation (ASF) under one
2 | // or more contributor license agreements. See the NOTICE file
3 | // distributed with this work for additional information
4 | // regarding copyright ownership. The ASF licenses this file
5 | // to you under the Apache License, Version 2.0 (the
6 | // "License"); you may not use this file except in compliance
7 | // with the License. You may obtain a copy of the License at
8 | //
9 | // http://www.apache.org/licenses/LICENSE-2.0
10 | //
11 | // Unless required by applicable law or agreed to in writing,
12 | // software distributed under the License is distributed on an
13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | // KIND, either express or implied. See the License for the
15 | // specific language governing permissions and limitations
16 | // under the License.
17 |
18 | //! Cache management for Iceberg.
19 |
20 | use std::sync::Arc;
21 |
22 | use crate::spec::{Manifest, ManifestList};
23 |
24 | /// A trait for caching in-memory objects of given type.
25 | ///
26 | /// # Notes
27 | ///
28 | /// ObjectCache will store deeply nested objects, such as `Manifest`,
29 | /// which contains `Schema`. Please ensure that the cache stores the
30 | /// object in memory as-is, without attempting to serialize it, as
31 | /// serialization could be extremely expensive.
32 | pub trait ObjectCache: Send + Sync {
33 | /// Gets an object from the cache by its key.
34 | fn get(&self, key: &K) -> Option;
35 | /// Sets an object in the cache with the given key and value.
36 | fn set(&self, key: K, value: V);
37 | }
38 |
39 | /// A trait for caching different in-memory objects used by iceberg.
40 | ///
41 | /// # Notes
42 | ///
43 | /// ObjectCache will store deeply nested objects, such as `Manifest`,
44 | /// which contains `Schema`. Please ensure that the cache stores the
45 | /// object in memory as-is, without attempting to serialize it, as
46 | /// serialization could be extremely expensive.
47 | pub trait ObjectCacheProvide: Send + Sync {
48 | /// Gets a cache for manifests.
49 | fn manifest_cache(&self) -> &dyn ObjectCache>;
50 | /// Gets a cache for manifest lists.
51 | fn manifest_list_cache(&self) -> &dyn ObjectCache>;
52 | }
53 |
54 | /// CacheProvider is a type alias for a thread-safe reference-counted pointer to a CacheProvide trait object.
55 | pub type ObjectCacheProvider = Arc;
56 |
57 | #[cfg(test)]
58 | mod tests {
59 | use super::*;
60 |
61 | struct _TestDynCompatibleForObjectCache(Arc>>);
62 | struct _TestDynCompatibleForObjectCacheProvider(ObjectCacheProvider);
63 | }
64 |
--------------------------------------------------------------------------------
/crates/iceberg/src/expr/visitors/mod.rs:
--------------------------------------------------------------------------------
1 | // Licensed to the Apache Software Foundation (ASF) under one
2 | // or more contributor license agreements. See the NOTICE file
3 | // distributed with this work for additional information
4 | // regarding copyright ownership. The ASF licenses this file
5 | // to you under the Apache License, Version 2.0 (the
6 | // "License"); you may not use this file except in compliance
7 | // with the License. You may obtain a copy of the License at
8 | //
9 | // http://www.apache.org/licenses/LICENSE-2.0
10 | //
11 | // Unless required by applicable law or agreed to in writing,
12 | // software distributed under the License is distributed on an
13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | // KIND, either express or implied. See the License for the
15 | // specific language governing permissions and limitations
16 | // under the License.
17 |
18 | pub(crate) mod bound_predicate_visitor;
19 | pub(crate) mod expression_evaluator;
20 | pub(crate) mod inclusive_metrics_evaluator;
21 | pub(crate) mod inclusive_projection;
22 | pub(crate) mod manifest_evaluator;
23 | pub(crate) mod page_index_evaluator;
24 | pub(crate) mod row_group_metrics_evaluator;
25 | pub(crate) mod strict_metrics_evaluator;
26 | pub(crate) mod strict_projection;
27 |
--------------------------------------------------------------------------------
/crates/iceberg/src/inspect/mod.rs:
--------------------------------------------------------------------------------
1 | // Licensed to the Apache Software Foundation (ASF) under one
2 | // or more contributor license agreements. See the NOTICE file
3 | // distributed with this work for additional information
4 | // regarding copyright ownership. The ASF licenses this file
5 | // to you under the Apache License, Version 2.0 (the
6 | // "License"); you may not use this file except in compliance
7 | // with the License. You may obtain a copy of the License at
8 | //
9 | // http://www.apache.org/licenses/LICENSE-2.0
10 | //
11 | // Unless required by applicable law or agreed to in writing,
12 | // software distributed under the License is distributed on an
13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | // KIND, either express or implied. See the License for the
15 | // specific language governing permissions and limitations
16 | // under the License.
17 |
18 | //! Metadata table APIs.
19 |
20 | mod manifests;
21 | mod metadata_table;
22 | mod snapshots;
23 |
24 | pub use manifests::ManifestsTable;
25 | pub use metadata_table::*;
26 | pub use snapshots::SnapshotsTable;
27 |
--------------------------------------------------------------------------------
/crates/iceberg/src/io/storage_fs.rs:
--------------------------------------------------------------------------------
1 | // Licensed to the Apache Software Foundation (ASF) under one
2 | // or more contributor license agreements. See the NOTICE file
3 | // distributed with this work for additional information
4 | // regarding copyright ownership. The ASF licenses this file
5 | // to you under the Apache License, Version 2.0 (the
6 | // "License"); you may not use this file except in compliance
7 | // with the License. You may obtain a copy of the License at
8 | //
9 | // http://www.apache.org/licenses/LICENSE-2.0
10 | //
11 | // Unless required by applicable law or agreed to in writing,
12 | // software distributed under the License is distributed on an
13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | // KIND, either express or implied. See the License for the
15 | // specific language governing permissions and limitations
16 | // under the License.
17 |
18 | use opendal::Operator;
19 | use opendal::services::FsConfig;
20 |
21 | use crate::Result;
22 |
23 | /// Build new opendal operator from give path.
24 | pub(crate) fn fs_config_build() -> Result {
25 | let mut cfg = FsConfig::default();
26 | cfg.root = Some("/".to_string());
27 |
28 | Ok(Operator::from_config(cfg)?.finish())
29 | }
30 |
--------------------------------------------------------------------------------
/crates/iceberg/src/io/storage_memory.rs:
--------------------------------------------------------------------------------
1 | // Licensed to the Apache Software Foundation (ASF) under one
2 | // or more contributor license agreements. See the NOTICE file
3 | // distributed with this work for additional information
4 | // regarding copyright ownership. The ASF licenses this file
5 | // to you under the Apache License, Version 2.0 (the
6 | // "License"); you may not use this file except in compliance
7 | // with the License. You may obtain a copy of the License at
8 | //
9 | // http://www.apache.org/licenses/LICENSE-2.0
10 | //
11 | // Unless required by applicable law or agreed to in writing,
12 | // software distributed under the License is distributed on an
13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | // KIND, either express or implied. See the License for the
15 | // specific language governing permissions and limitations
16 | // under the License.
17 |
18 | use opendal::Operator;
19 | use opendal::services::MemoryConfig;
20 |
21 | use crate::Result;
22 |
23 | pub(crate) fn memory_config_build() -> Result {
24 | Ok(Operator::from_config(MemoryConfig::default())?.finish())
25 | }
26 |
--------------------------------------------------------------------------------
/crates/iceberg/src/io/storage_oss.rs:
--------------------------------------------------------------------------------
1 | // Licensed to the Apache Software Foundation (ASF) under one
2 | // or more contributor license agreements. See the NOTICE file
3 | // distributed with this work for additional information
4 | // regarding copyright ownership. The ASF licenses this file
5 | // to you under the Apache License, Version 2.0 (the
6 | // "License"); you may not use this file except in compliance
7 | // with the License. You may obtain a copy of the License at
8 | //
9 | // http://www.apache.org/licenses/LICENSE-2.0
10 | //
11 | // Unless required by applicable law or agreed to in writing,
12 | // software distributed under the License is distributed on an
13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | // KIND, either express or implied. See the License for the
15 | // specific language governing permissions and limitations
16 | // under the License.
17 |
18 | use std::collections::HashMap;
19 |
20 | use opendal::services::OssConfig;
21 | use opendal::{Configurator, Operator};
22 | use url::Url;
23 |
24 | use crate::{Error, ErrorKind, Result};
25 |
26 | /// Required configuration arguments for creating an Aliyun OSS Operator with OpenDAL:
27 | /// - `oss.endpoint`: The OSS service endpoint URL
28 | /// - `oss.access-key-id`: The access key ID for authentication
29 | /// - `oss.access-key-secret`: The access key secret for authentication
30 | /// Aliyun oss endpoint.
31 | pub const OSS_ENDPOINT: &str = "oss.endpoint";
32 | /// Aliyun oss access key id.
33 | pub const OSS_ACCESS_KEY_ID: &str = "oss.access-key-id";
34 | /// Aliyun oss access key secret.
35 | pub const OSS_ACCESS_KEY_SECRET: &str = "oss.access-key-secret";
36 |
37 | /// Parse iceberg props to oss config.
38 | pub(crate) fn oss_config_parse(mut m: HashMap) -> Result {
39 | let mut cfg: OssConfig = OssConfig::default();
40 | if let Some(endpoint) = m.remove(OSS_ENDPOINT) {
41 | cfg.endpoint = Some(endpoint);
42 | };
43 | if let Some(access_key_id) = m.remove(OSS_ACCESS_KEY_ID) {
44 | cfg.access_key_id = Some(access_key_id);
45 | };
46 | if let Some(access_key_secret) = m.remove(OSS_ACCESS_KEY_SECRET) {
47 | cfg.access_key_secret = Some(access_key_secret);
48 | };
49 |
50 | Ok(cfg)
51 | }
52 |
53 | /// Build new opendal operator from give path.
54 | pub(crate) fn oss_config_build(cfg: &OssConfig, path: &str) -> Result {
55 | let url = Url::parse(path)?;
56 | let bucket = url.host_str().ok_or_else(|| {
57 | Error::new(
58 | ErrorKind::DataInvalid,
59 | format!("Invalid oss url: {}, missing bucket", path),
60 | )
61 | })?;
62 |
63 | let builder = cfg.clone().into_builder().bucket(bucket);
64 |
65 | Ok(Operator::new(builder)?.finish())
66 | }
67 |
--------------------------------------------------------------------------------
/crates/iceberg/src/lib.rs:
--------------------------------------------------------------------------------
1 | // Licensed to the Apache Software Foundation (ASF) under one
2 | // or more contributor license agreements. See the NOTICE file
3 | // distributed with this work for additional information
4 | // regarding copyright ownership. The ASF licenses this file
5 | // to you under the Apache License, Version 2.0 (the
6 | // "License"); you may not use this file except in compliance
7 | // with the License. You may obtain a copy of the License at
8 | //
9 | // http://www.apache.org/licenses/LICENSE-2.0
10 | //
11 | // Unless required by applicable law or agreed to in writing,
12 | // software distributed under the License is distributed on an
13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | // KIND, either express or implied. See the License for the
15 | // specific language governing permissions and limitations
16 | // under the License.
17 |
18 | //! Apache Iceberg Official Native Rust Implementation
19 | //!
20 | //! # Examples
21 | //!
22 | //! ## Scan A Table
23 | //!
24 | //! ```rust, ignore
25 | //! // This example uses `iceberg_catalog_memory`, which isn't enabled by default.
26 | //! // To run this, add `iceberg-catalog-memory` as a dependency in your Cargo.toml.
27 | //! use futures::TryStreamExt;
28 | //! use iceberg::io::{FileIO, FileIOBuilder};
29 | //! use iceberg::{Catalog, Result, TableIdent};
30 | //! use iceberg_catalog_memory::MemoryCatalog;
31 | //!
32 | //! #[tokio::main]
33 | //! async fn main() -> Result<()> {
34 | //! // Build your file IO.
35 | //! let file_io = FileIOBuilder::new("memory").build()?;
36 | //! // Connect to a catalog.
37 | //! let catalog = MemoryCatalog::new(file_io, None);
38 | //! // Load table from catalog.
39 | //! let table = catalog
40 | //! .load_table(&TableIdent::from_strs(["hello", "world"])?)
41 | //! .await?;
42 | //! // Build table scan.
43 | //! let stream = table
44 | //! .scan()
45 | //! .select(["name", "id"])
46 | //! .build()?
47 | //! .to_arrow()
48 | //! .await?;
49 | //!
50 | //! // Consume this stream like arrow record batch stream.
51 | //! let _data: Vec<_> = stream.try_collect().await?;
52 | //! Ok(())
53 | //! }
54 | //! ```
55 |
56 | #![deny(missing_docs)]
57 |
58 | #[macro_use]
59 | extern crate derive_builder;
60 | extern crate core;
61 |
62 | mod error;
63 | pub use error::{Error, ErrorKind, Result};
64 |
65 | mod catalog;
66 |
67 | pub use catalog::*;
68 |
69 | pub mod table;
70 |
71 | mod avro;
72 | pub mod cache;
73 | pub mod io;
74 | pub mod spec;
75 |
76 | pub mod inspect;
77 | pub mod scan;
78 |
79 | pub mod expr;
80 | pub mod transaction;
81 | pub mod transform;
82 |
83 | mod runtime;
84 |
85 | pub mod arrow;
86 | pub(crate) mod delete_file_index;
87 | mod utils;
88 | pub mod writer;
89 |
90 | mod delete_vector;
91 | pub mod puffin;
92 |
--------------------------------------------------------------------------------
/crates/iceberg/src/puffin/blob.rs:
--------------------------------------------------------------------------------
1 | // Licensed to the Apache Software Foundation (ASF) under one
2 | // or more contributor license agreements. See the NOTICE file
3 | // distributed with this work for additional information
4 | // regarding copyright ownership. The ASF licenses this file
5 | // to you under the Apache License, Version 2.0 (the
6 | // "License"); you may not use this file except in compliance
7 | // with the License. You may obtain a copy of the License at
8 | //
9 | // http://www.apache.org/licenses/LICENSE-2.0
10 | //
11 | // Unless required by applicable law or agreed to in writing,
12 | // software distributed under the License is distributed on an
13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | // KIND, either express or implied. See the License for the
15 | // specific language governing permissions and limitations
16 | // under the License.
17 |
18 | use std::collections::HashMap;
19 |
20 | use typed_builder::TypedBuilder;
21 |
22 | /// A serialized form of a "compact" Theta sketch produced by the Apache DataSketches library.
23 | pub const APACHE_DATASKETCHES_THETA_V1: &str = "apache-datasketches-theta-v1";
24 | /// A serialized form of a deletion vector.
25 | pub const DELETION_VECTOR_V1: &str = "deletion-vector-v1";
26 |
27 | /// The blob
28 | #[derive(Debug, PartialEq, Clone, TypedBuilder)]
29 | pub struct Blob {
30 | pub(crate) r#type: String,
31 | pub(crate) fields: Vec,
32 | pub(crate) snapshot_id: i64,
33 | pub(crate) sequence_number: i64,
34 | pub(crate) data: Vec,
35 | pub(crate) properties: HashMap,
36 | }
37 |
38 | impl Blob {
39 | #[inline]
40 | /// See blob types: https://iceberg.apache.org/puffin-spec/#blob-types
41 | pub fn blob_type(&self) -> &str {
42 | &self.r#type
43 | }
44 |
45 | #[inline]
46 | /// List of field IDs the blob was computed for; the order of items is used to compute sketches stored in the blob.
47 | pub fn fields(&self) -> &[i32] {
48 | &self.fields
49 | }
50 |
51 | #[inline]
52 | /// ID of the Iceberg table's snapshot the blob was computed from
53 | pub fn snapshot_id(&self) -> i64 {
54 | self.snapshot_id
55 | }
56 |
57 | #[inline]
58 | /// Sequence number of the Iceberg table's snapshot the blob was computed from
59 | pub fn sequence_number(&self) -> i64 {
60 | self.sequence_number
61 | }
62 |
63 | #[inline]
64 | /// The uncompressed blob data
65 | pub fn data(&self) -> &[u8] {
66 | &self.data
67 | }
68 |
69 | #[inline]
70 | /// Arbitrary meta-information about the blob
71 | pub fn properties(&self) -> &HashMap {
72 | &self.properties
73 | }
74 | }
75 |
--------------------------------------------------------------------------------
/crates/iceberg/src/puffin/mod.rs:
--------------------------------------------------------------------------------
1 | // Licensed to the Apache Software Foundation (ASF) under one
2 | // or more contributor license agreements. See the NOTICE file
3 | // distributed with this work for additional information
4 | // regarding copyright ownership. The ASF licenses this file
5 | // to you under the Apache License, Version 2.0 (the
6 | // "License"); you may not use this file except in compliance
7 | // with the License. You may obtain a copy of the License at
8 | //
9 | // http://www.apache.org/licenses/LICENSE-2.0
10 | //
11 | // Unless required by applicable law or agreed to in writing,
12 | // software distributed under the License is distributed on an
13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | // KIND, either express or implied. See the License for the
15 | // specific language governing permissions and limitations
16 | // under the License.
17 |
18 | //! Iceberg Puffin implementation.
19 |
20 | #![deny(missing_docs)]
21 |
22 | mod blob;
23 | pub use blob::{APACHE_DATASKETCHES_THETA_V1, Blob, DELETION_VECTOR_V1};
24 |
25 | mod compression;
26 | pub use compression::CompressionCodec;
27 |
28 | mod metadata;
29 | pub use metadata::{BlobMetadata, CREATED_BY_PROPERTY, FileMetadata};
30 |
31 | mod reader;
32 | pub use reader::PuffinReader;
33 |
34 | mod writer;
35 | pub use writer::PuffinWriter;
36 |
37 | #[cfg(test)]
38 | mod test_utils;
39 |
--------------------------------------------------------------------------------
/crates/iceberg/src/spec/mod.rs:
--------------------------------------------------------------------------------
1 | // Licensed to the Apache Software Foundation (ASF) under one
2 | // or more contributor license agreements. See the NOTICE file
3 | // distributed with this work for additional information
4 | // regarding copyright ownership. The ASF licenses this file
5 | // to you under the Apache License, Version 2.0 (the
6 | // "License"); you may not use this file except in compliance
7 | // with the License. You may obtain a copy of the License at
8 | //
9 | // http://www.apache.org/licenses/LICENSE-2.0
10 | //
11 | // Unless required by applicable law or agreed to in writing,
12 | // software distributed under the License is distributed on an
13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | // KIND, either express or implied. See the License for the
15 | // specific language governing permissions and limitations
16 | // under the License.
17 |
18 | //! Spec for Iceberg.
19 |
20 | mod datatypes;
21 | mod encrypted_key;
22 | mod manifest;
23 | mod manifest_list;
24 | mod name_mapping;
25 | mod partition;
26 | mod schema;
27 | mod snapshot;
28 | mod snapshot_summary;
29 | mod sort;
30 | mod statistic_file;
31 | mod table_metadata;
32 | mod table_metadata_builder;
33 | mod transform;
34 | mod values;
35 | mod view_metadata;
36 | mod view_metadata_builder;
37 | mod view_version;
38 |
39 | pub use datatypes::*;
40 | pub use encrypted_key::*;
41 | pub use manifest::*;
42 | pub use manifest_list::*;
43 | pub use name_mapping::*;
44 | pub use partition::*;
45 | pub use schema::*;
46 | pub use snapshot::*;
47 | pub use snapshot_summary::*;
48 | pub use sort::*;
49 | pub use statistic_file::*;
50 | pub use table_metadata::*;
51 | pub use transform::*;
52 | pub use values::*;
53 | pub use view_metadata::*;
54 | pub use view_version::*;
55 |
--------------------------------------------------------------------------------
/crates/iceberg/src/spec/schema/utils.rs:
--------------------------------------------------------------------------------
1 | // Licensed to the Apache Software Foundation (ASF) under one
2 | // or more contributor license agreements. See the NOTICE file
3 | // distributed with this work for additional information
4 | // regarding copyright ownership. The ASF licenses this file
5 | // to you under the Apache License, Version 2.0 (the
6 | // "License"); you may not use this file except in compliance
7 | // with the License. You may obtain a copy of the License at
8 | //
9 | // http://www.apache.org/licenses/LICENSE-2.0
10 | //
11 | // Unless required by applicable law or agreed to in writing,
12 | // software distributed under the License is distributed on an
13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | // KIND, either express or implied. See the License for the
15 | // specific language governing permissions and limitations
16 | // under the License.
17 |
18 | use std::collections::HashMap;
19 |
20 | use crate::{Error, ErrorKind, Result};
21 |
22 | pub fn try_insert_field(map: &mut HashMap, field_id: i32, value: V) -> Result<()> {
23 | map.insert(field_id, value).map_or_else(
24 | || Ok(()),
25 | |_| {
26 | Err(Error::new(
27 | ErrorKind::DataInvalid,
28 | format!(
29 | "Found duplicate 'field.id' {}. Field ids must be unique.",
30 | field_id
31 | ),
32 | ))
33 | },
34 | )
35 | }
36 |
--------------------------------------------------------------------------------
/crates/iceberg/src/utils.rs:
--------------------------------------------------------------------------------
1 | // Licensed to the Apache Software Foundation (ASF) under one
2 | // or more contributor license agreements. See the NOTICE file
3 | // distributed with this work for additional information
4 | // regarding copyright ownership. The ASF licenses this file
5 | // to you under the Apache License, Version 2.0 (the
6 | // "License"); you may not use this file except in compliance
7 | // with the License. You may obtain a copy of the License at
8 | //
9 | // http://www.apache.org/licenses/LICENSE-2.0
10 | //
11 | // Unless required by applicable law or agreed to in writing,
12 | // software distributed under the License is distributed on an
13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | // KIND, either express or implied. See the License for the
15 | // specific language governing permissions and limitations
16 | // under the License.
17 |
18 | use std::num::NonZeroUsize;
19 |
20 | // Use a default value of 1 as the safest option.
21 | // See https://doc.rust-lang.org/std/thread/fn.available_parallelism.html#limitations
22 | // for more details.
23 | const DEFAULT_PARALLELISM: usize = 1;
24 |
25 | /// Uses [`std::thread::available_parallelism`] in order to
26 | /// retrieve an estimate of the default amount of parallelism
27 | /// that should be used. Note that [`std::thread::available_parallelism`]
28 | /// returns a `Result` as it can fail, so here we use
29 | /// a default value instead.
30 | /// Note: we don't use a OnceCell or LazyCell here as there
31 | /// are circumstances where the level of available
32 | /// parallelism can change during the lifetime of an executing
33 | /// process, but this should not be called in a hot loop.
34 | pub(crate) fn available_parallelism() -> NonZeroUsize {
35 | std::thread::available_parallelism().unwrap_or_else(|_err| {
36 | // Failed to get the level of parallelism.
37 | // TODO: log/trace when this fallback occurs.
38 |
39 | // Using a default value.
40 | NonZeroUsize::new(DEFAULT_PARALLELISM).unwrap()
41 | })
42 | }
43 |
--------------------------------------------------------------------------------
/crates/iceberg/src/writer/base_writer/mod.rs:
--------------------------------------------------------------------------------
1 | // Licensed to the Apache Software Foundation (ASF) under one
2 | // or more contributor license agreements. See the NOTICE file
3 | // distributed with this work for additional information
4 | // regarding copyright ownership. The ASF licenses this file
5 | // to you under the Apache License, Version 2.0 (the
6 | // "License"); you may not use this file except in compliance
7 | // with the License. You may obtain a copy of the License at
8 | //
9 | // http://www.apache.org/licenses/LICENSE-2.0
10 | //
11 | // Unless required by applicable law or agreed to in writing,
12 | // software distributed under the License is distributed on an
13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | // KIND, either express or implied. See the License for the
15 | // specific language governing permissions and limitations
16 | // under the License.
17 |
18 | //! Base writer module contains the basic writer provide by iceberg: `DataFileWriter`, `PositionDeleteFileWriter`, `EqualityDeleteFileWriter`.
19 |
20 | pub mod data_file_writer;
21 | pub mod equality_delete_writer;
22 |
--------------------------------------------------------------------------------
/crates/iceberg/src/writer/file_writer/mod.rs:
--------------------------------------------------------------------------------
1 | // Licensed to the Apache Software Foundation (ASF) under one
2 | // or more contributor license agreements. See the NOTICE file
3 | // distributed with this work for additional information
4 | // regarding copyright ownership. The ASF licenses this file
5 | // to you under the Apache License, Version 2.0 (the
6 | // "License"); you may not use this file except in compliance
7 | // with the License. You may obtain a copy of the License at
8 | //
9 | // http://www.apache.org/licenses/LICENSE-2.0
10 | //
11 | // Unless required by applicable law or agreed to in writing,
12 | // software distributed under the License is distributed on an
13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | // KIND, either express or implied. See the License for the
15 | // specific language governing permissions and limitations
16 | // under the License.
17 |
18 | //! This module contains the writer for data file format supported by iceberg: parquet, orc.
19 |
20 | use arrow_array::RecordBatch;
21 | use futures::Future;
22 |
23 | use super::CurrentFileStatus;
24 | use crate::Result;
25 | use crate::spec::DataFileBuilder;
26 |
27 | mod parquet_writer;
28 | pub use parquet_writer::{ParquetWriter, ParquetWriterBuilder};
29 | mod track_writer;
30 |
31 | pub mod location_generator;
32 |
33 | type DefaultOutput = Vec;
34 |
35 | /// File writer builder trait.
36 | pub trait FileWriterBuilder: Send + Clone + 'static {
37 | /// The associated file writer type.
38 | type R: FileWriter;
39 | /// Build file writer.
40 | fn build(self) -> impl Future