├── .gitattributes
├── examples
    ├── core
    │   └── output
    │   │   └── .gitignore
    ├── netcdf_engine
    │   ├── output
    │   │   └── .gitignore
    │   └── netcdf-to-tiledb-set-max-fragment-size.ipynb
    ├── xarray_engine
    │   ├── output
    │   │   └── .gitignore
    │   └── tiledb-xarray-partially-filled-arrays.ipynb
    └── README.md
├── setup.py
├── tiledb
    └── cf
    │   ├── __main__.py
    │   ├── xarray_engine
    │       ├── _common.py
    │       ├── __init__.py
    │       ├── engine.py
    │       ├── _encoding.py
    │       └── _array_wrapper.py
    │   ├── netcdf_engine
    │       ├── __init__.py
    │       ├── api.py
    │       └── _utils.py
    │   ├── testing.py
    │   ├── core
    │       ├── __init__.py
    │       ├── registry.py
    │       ├── source.py
    │       ├── _shared_dim.py
    │       ├── api.py
    │       ├── _dim_creator.py
    │       ├── _attr_creator.py
    │       └── _metadata.py
    │   ├── __init__.py
    │   ├── _utils.py
    │   └── cli.py
├── requirements_dev.txt
├── quarto-materials
    ├── tiledb-logo.png
    ├── Background-tdb-header.jpg
    ├── tiledb.css
    ├── tiledb-logo.svg
    └── tiledb.scss
├── .editorconfig
├── documentation
    ├── core.md
    ├── code-of-conduct.md
    ├── index.md
    ├── netcdf-engine.md
    ├── xarray-engine.md
    ├── contributing.md
    └── tiledb-cf-spec.md
├── .gitignore
├── pyproject.toml
├── tests
    ├── core
    │   ├── test_shared_dimension.py
    │   ├── test_dim_metadata.py
    │   ├── test_attr_metadata.py
    │   ├── test_array_metadata.py
    │   ├── test_group.py
    │   ├── test_fragment_writer.py
    │   └── test_write_array.py
    ├── netcdf_engine
    │   ├── test_netcdf_coord_to_dim_converter.py
    │   ├── test_convert_timestamp.py
    │   ├── test_open_netcdf_group.py
    │   ├── test_netcdf4_to_dim_converter.py
    │   ├── test_cli_netcdf_convert.py
    │   ├── test_utils.py
    │   ├── conftest.py
    │   ├── test_netcdf4_converter_array.py
    │   └── test_convert_multifragments.py
    └── xarray_engine
    │   ├── test_plugin_distributed.py
    │   ├── test_plugin_timestamp.py
    │   └── conftest.py
├── tools
    ├── lint.sh
    └── hooks
    │   └── pre-commit.sh
├── .github
    └── workflows
    │   ├── release.yml
    │   ├── quarto-render.yml
    │   └── ci.yml
├── LICENSE
├── README.md
├── setup.cfg
└── _quarto.yml


/.gitattributes:
--------------------------------------------------------------------------------
1 | *.qmd linguist-language=RMarkdown
2 | 


--------------------------------------------------------------------------------
/examples/core/output/.gitignore:
--------------------------------------------------------------------------------
1 | *
2 | !.gitignore
3 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | import setuptools
2 | 
3 | setuptools.setup()
4 | 


--------------------------------------------------------------------------------
/examples/netcdf_engine/output/.gitignore:
--------------------------------------------------------------------------------
1 | *
2 | !.gitignore
3 | 


--------------------------------------------------------------------------------
/examples/xarray_engine/output/.gitignore:
--------------------------------------------------------------------------------
1 | *
2 | !.gitignore
3 | 


--------------------------------------------------------------------------------
/tiledb/cf/__main__.py:
--------------------------------------------------------------------------------
1 | from tiledb.cf import cli
2 | 
3 | cli()
4 | 


--------------------------------------------------------------------------------
/requirements_dev.txt:
--------------------------------------------------------------------------------
1 | black
2 | isort
3 | flake8
4 | mypy
5 | flake8-bugbear
6 | pytest
7 | pytidylib
8 | 


--------------------------------------------------------------------------------
/quarto-materials/tiledb-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TileDB-Inc/TileDB-CF-Py/HEAD/quarto-materials/tiledb-logo.png


--------------------------------------------------------------------------------
/quarto-materials/Background-tdb-header.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TileDB-Inc/TileDB-CF-Py/HEAD/quarto-materials/Background-tdb-header.jpg


--------------------------------------------------------------------------------
/tiledb/cf/xarray_engine/_common.py:
--------------------------------------------------------------------------------
1 | # Group level metadata
2 | _ARRAY_FIXED_DIMS_PREFIX = "__tiledb_array_fixed_dimensions."
3 | _ATTR_PREFIX = "__tiledb_attr."
4 | 


--------------------------------------------------------------------------------
/.editorconfig:
--------------------------------------------------------------------------------
 1 | root = true
 2 | 
 3 | # Unix-style newlines with a newline ending every file
 4 | [*]
 5 | end_of_line = lf
 6 | insert_final_newline = true
 7 | trim_trailing_whitespace = true
 8 | indent_style = space
 9 | indent_size = 4
10 | 


--------------------------------------------------------------------------------
/documentation/core.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: TileDB-CF Core
 3 | ---
 4 | 
 5 | :::{.callout-warning}
 6 | The TileDB-CF library is still under initial development and changes may not be backward compatible.
 7 | :::
 8 | 
 9 | Working with large multi-array datasets with complex metadata can be unwieldy. The core TileDB-CF library provides additional support for common operations.
10 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | 
 4 | # Distribution / packaging
 5 | build/
 6 | dist/
 7 | *.egg-info/
 8 | 
 9 | # Unit test / coverage reports
10 | .tox/
11 | .pytest_cache/
12 | .mypy_cache/
13 | .coverage*
14 | 
15 | # Quarto documentation
16 | /.quarto/
17 | docs/.quarto
18 | /docs
19 | documentation/api
20 | objects.json
21 | 
22 | # pyenv
23 | .python-version
24 | 


--------------------------------------------------------------------------------
/tiledb/cf/xarray_engine/__init__.py:
--------------------------------------------------------------------------------
 1 | try:
 2 |     import xarray
 3 | 
 4 |     has_xarray = True
 5 | 
 6 | except ImportError:
 7 |     has_xarray = False
 8 | 
 9 | 
10 | from .api import (
11 |     copy_data_from_xarray,
12 |     copy_metadata_from_xarray,
13 |     create_group_from_xarray,
14 |     from_xarray,
15 | )
16 | 
17 | __all__ = [
18 |     "has_xarray",
19 |     "copy_data_from_xarray",
20 |     "copy_metadata_from_xarray",
21 |     "create_group_from_xarray",
22 |     "from_xarray",
23 | ]  # type: ignore
24 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = ["setuptools", "wheel", "build"]
 3 | build-backend = "setuptools.build_meta"
 4 | 
 5 | [tool.black]
 6 | line-length = 88
 7 | target-version = ['py38']
 8 | exclude = '''
 9 | /(
10 |   | \.git
11 |   | \.mypy_cache
12 |   | \.pytest_cache
13 |   | docs
14 |   | dist
15 | )/
16 | '''
17 | 
18 | [tool.isort]
19 | profile = "black"
20 | multi_line_output = 3
21 | 
22 | [tool.pylint.messages_control]
23 | disable = "C0330, C0326"
24 | 
25 | [tool.pylint.format]
26 | max-line-length = "88"
27 | 
28 | [tool.pytest.ini_options]
29 | markers = [
30 |     "flaky: flaky tests",
31 |     "network: tests requiring a network connection",
32 | ]
33 | 


--------------------------------------------------------------------------------
/tiledb/cf/netcdf_engine/__init__.py:
--------------------------------------------------------------------------------
 1 | try:
 2 |     import netCDF4
 3 | 
 4 |     has_netCDF4 = True
 5 | 
 6 | except ImportError:
 7 |     has_netCDF4 = False
 8 | 
 9 | from .api import from_netcdf
10 | 
11 | __all__ = ["has_netCDF4", "from_netcdf"]  # type: ignore
12 | 
13 | if has_netCDF4:
14 |     from ._array_converters import NetCDF4ArrayConverter, NetCDF4DomainConverter
15 |     from ._attr_converters import NetCDF4VarToAttrConverter
16 |     from ._dim_converters import (
17 |         NetCDF4CoordToDimConverter,
18 |         NetCDF4DimToDimConverter,
19 |         NetCDF4ScalarToDimConverter,
20 |         NetCDF4ToDimConverter,
21 |     )
22 |     from ._utils import open_netcdf_group
23 |     from .converter import NetCDF4ConverterEngine
24 | 
25 |     __all__.append("NetCDF4ConverterEngine")
26 | 


--------------------------------------------------------------------------------
/tests/core/test_shared_dimension.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pytest
 3 | 
 4 | import tiledb
 5 | from tiledb.cf.core._shared_dim import SharedDim
 6 | 
 7 | _tiledb_dim = [
 8 |     tiledb.Dim(name="dim", domain=(1, 4), tile=4, dtype=np.int32),
 9 | ]
10 | 
11 | 
12 | @pytest.mark.parametrize(
13 |     "domain, dtype, result",
14 |     [
15 |         ((0, 4), np.int32, True),
16 |         ((0, 4), np.uint32, True),
17 |         ((1, 4), np.int32, False),
18 |         ((0, 4), np.float64, False),
19 |         (None, np.int32, False),
20 |     ],
21 | )
22 | def test_is_index_dim(domain, dtype, result):
23 |     shared_dim = SharedDim("name", domain, dtype)
24 |     assert shared_dim.is_index_dim == result
25 | 
26 | 
27 | def test_compare_other_object():
28 |     assert SharedDim("dim", (1, 4), np.int32) != "not a dimension"
29 | 


--------------------------------------------------------------------------------
/tiledb/cf/testing.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def assert_dict_arrays_equal(d1, d2, ordered=True):
 5 |     assert d1.keys() == d2.keys(), "Keys not equal"
 6 | 
 7 |     if ordered:
 8 |         for k in d1.keys():
 9 |             np.testing.assert_array_equal(d1[k], d2[k])
10 |     else:
11 |         d1_dtypes = [tuple((name, value.dtype)) for name, value in d1.items()]
12 |         d2_dtypes = [tuple((name, value.dtype)) for name, value in d2.items()]
13 | 
14 |         assert d1_dtypes == d2_dtypes
15 | 
16 |         d1_records = [tuple(values) for values in zip(*d1.values())]
17 |         array1 = np.sort(np.array(d1_records, dtype=d1_dtypes))
18 | 
19 |         d2_records = [tuple(values) for values in zip(*d2.values())]
20 |         array2 = np.sort(np.array(d2_records, dtype=d2_dtypes))
21 | 
22 |         np.testing.assert_array_equal(array1, array2)
23 | 


--------------------------------------------------------------------------------
/tiledb/cf/core/__init__.py:
--------------------------------------------------------------------------------
 1 | """Core TileDB-CF functionality."""
 2 | 
 3 | from ._array_creator import ArrayCreator, DomainCreator
 4 | from ._attr_creator import AttrCreator
 5 | from ._dataspace_creator import DataspaceCreator
 6 | from ._dim_creator import DimCreator
 7 | from ._metadata import (
 8 |     ATTR_METADATA_FLAG,
 9 |     DIM_METADATA_FLAG,
10 |     ArrayMetadata,
11 |     AttrMetadata,
12 |     DimMetadata,
13 | )
14 | from ._shared_dim import SharedDim
15 | from .api import create_group, open_group_array
16 | from .source import NumpyData
17 | 
18 | __all__ = [
19 |     ATTR_METADATA_FLAG,
20 |     DIM_METADATA_FLAG,
21 |     ArrayCreator,
22 |     AttrCreator,
23 |     ArrayMetadata,
24 |     AttrMetadata,
25 |     DataspaceCreator,
26 |     DimCreator,
27 |     DimMetadata,
28 |     DomainCreator,
29 |     NumpyData,
30 |     SharedDim,
31 |     create_group,
32 |     open_group_array,
33 | ]
34 | 


--------------------------------------------------------------------------------
/tiledb/cf/__init__.py:
--------------------------------------------------------------------------------
 1 | """``tiledb.cf`` is the core module for the TileDB-CF-Py library.
 2 | 
 3 | This module contains core classes and functions for supporting the NetCDF data model in
 4 | the `TileDB storage engine <https://github.com/TileDB-Inc/TileDB>`_. To use this module
 5 | simply import using:
 6 | 
 7 | .. code-block:: python
 8 | 
 9 |     import tiledb.cf
10 | """
11 | 
12 | from .cli import cli
13 | from .core import (
14 |     ATTR_METADATA_FLAG,
15 |     DIM_METADATA_FLAG,
16 |     ArrayMetadata,
17 |     AttrMetadata,
18 |     DataspaceCreator,
19 |     DimMetadata,
20 |     create_group,
21 |     open_group_array,
22 | )
23 | from .netcdf_engine import from_netcdf, has_netCDF4
24 | from .xarray_engine import (
25 |     copy_data_from_xarray,
26 |     copy_metadata_from_xarray,
27 |     create_group_from_xarray,
28 |     from_xarray,
29 |     has_xarray,
30 | )
31 | 
32 | if has_netCDF4:
33 |     from .netcdf_engine import NetCDF4ConverterEngine
34 | 


--------------------------------------------------------------------------------
/tools/lint.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | # Function to automate running linting/formatting tests.
 4 | ask_run_tool() {
 5 |     name=$2
 6 |     read -r -p "Run ${name}? [Y/n] " response
 7 |     case "$response" in
 8 | 	[nN][oO]|[nN])
 9 | 	    echo "* Skipping ${name}"
10 | 	    ;;
11 | 	[yY][eE][sS]|[yY]|"")
12 | 	    echo "* Running ${name}.. "
13 | 	    echo "..................."
14 | 	    $1
15 | 	    echo "..................."
16 | 	    ;;
17 | 	*)
18 | 	    echo "Not a valid response. Skipping ${name}."
19 |     esac
20 | }
21 | 
22 | project_root=$(git rev-parse --show-toplevel)
23 | source_dir="${project_root}/tiledb"
24 | test_dir="${project_root}/tests"
25 | 
26 | ask_run_tool "isort ${project_root}" "isort"
27 | ask_run_tool "black ${project_root}" "black"
28 | ask_run_tool "flake8 ${project_root}" "flake8"
29 | ask_run_tool "mypy ${source_dir} ${test_dir}" "mypy"
30 | ask_run_tool "pytest --cov-report term-missing --cov=${source_dir} ${test_dir}" "pytest"
31 | 


--------------------------------------------------------------------------------
/examples/README.md:
--------------------------------------------------------------------------------
 1 | examples.md
 2 | 
 3 | # Example notebooks
 4 | 
 5 | ## Core
 6 | 
 7 | The [core](./core) subdirectory includes examples on using TileDB-CF core module. This includes:
 8 | 
 9 | * [Working with arrays in a TileDB group](./core/group-basics.ipynb)
10 | 
11 | ## NetCDF Engine
12 | 
13 | The [netcdf_engine](./netcdf_engine) subdirectory includes examples for converting NetCDF to TileDB.
14 | 
15 | * [NetCDF to TileDB conversion basics](./netcdf_engine/netcdf-to-tiledb-basics.ipynb)
16 | * [Setting tile size for TileDB arrrays](./netcdf_engine/netcdf-to-tiledb-set-tiles.ipynb)
17 | * [Setting the max fragment size or max chunk size for copying data from NetCDF to TileDB](./netcdf_engine/netcdf-to-tiledb-set-max-fragment-size.ipynb)
18 | 
19 | ## Xarray Backend
20 | 
21 | The [xarray_engine](./xarray_engine) subdirectory inclues examples for using the TileDB backenf for xarray.
22 | 
23 | * [Getting started with TileDB and xarray](./xarray_engine/tiledb-xarray-basics.ipynb)
24 | 


--------------------------------------------------------------------------------
/tests/netcdf_engine/test_netcdf_coord_to_dim_converter.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pytest
 3 | 
 4 | from tiledb.cf.netcdf_engine import NetCDF4CoordToDimConverter
 5 | 
 6 | netCDF4 = pytest.importorskip("netCDF4")
 7 | 
 8 | 
 9 | def test_coord_converter_simple():
10 |     with netCDF4.Dataset("example.nc", mode="w", diskless=True) as dataset:
11 |         dataset.createDimension("x", 4)
12 |         x = dataset.createVariable("x", datatype=np.float64, dimensions=("x",))
13 |         converter = NetCDF4CoordToDimConverter.from_netcdf(x)
14 |         assert converter.name == "x"
15 |         assert converter.dtype == np.dtype("float64")
16 |         assert converter.domain is None
17 | 
18 | 
19 | def test_bad_size_error():
20 |     with netCDF4.Dataset("example.nc", mode="w", diskless=True) as group:
21 |         group.createDimension("x", 16)
22 |         group.createDimension("y", 16)
23 |         x = group.createVariable(
24 |             "x", datatype=np.dtype("float64"), dimensions=("x", "y")
25 |         )
26 |         with pytest.raises(ValueError):
27 |             NetCDF4CoordToDimConverter.from_netcdf(x)
28 | 


--------------------------------------------------------------------------------
/.github/workflows/release.yml:
--------------------------------------------------------------------------------
 1 | name: Release
 2 | 
 3 | on:
 4 |   workflow_dispatch:
 5 |   release:
 6 |     types: [ 'published' ]
 7 |   push:
 8 |     branches:
 9 |       - 'release-*'
10 | 
11 | jobs:
12 |   release:
13 |     runs-on: ubuntu-latest
14 |     steps:
15 |     - name: Setup Python
16 |       uses: actions/setup-python@v2
17 |     - name: Checkout
18 |       uses: actions/checkout@v2
19 |     - name: Build package
20 |       run: pip install build && python -m build && ls -l dist
21 |     - name: Publish package to TestPyPI
22 |       if: github.repository == 'TileDB-Inc/TileDB-CF-Py'
23 |       uses: pypa/gh-action-pypi-publish@release/v1
24 |       with:
25 |         user: __token__
26 |         password: ${{ secrets.TEST_PYPI_API_TOKEN }}
27 |         repository_url: https://test.pypi.org/legacy/
28 |     - name: Publish package to PyPI
29 |       if: github.event_name == 'release' && github.event.action == 'published' && github.repository == 'TileDB-Inc/TileDB-CF-Py'
30 |       uses: pypa/gh-action-pypi-publish@release/v1
31 |       with:
32 |         user: __token__
33 |         password: ${{ secrets.PYPI_API_TOKEN }}
34 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021 TileDB, Inc.
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/tests/netcdf_engine/test_convert_timestamp.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pytest
 3 | 
 4 | import tiledb
 5 | from tiledb.cf import NetCDF4ConverterEngine
 6 | 
 7 | netCDF4 = pytest.importorskip("netCDF4")
 8 | 
 9 | 
10 | class TestCopyAtTimestamp:
11 |     """Test copying a simple NetCDF file at a specified timestamp.
12 | 
13 |     NetCDF File:
14 | 
15 |     dimensions:
16 |         x (8)
17 | 
18 |     variables:
19 |         f (x) = np.linspace(0, 1, 8)
20 |     """
21 | 
22 |     attr_data = np.linspace(0, 1, 8)
23 | 
24 |     def test_copy_to_timestamp(self, tmpdir):
25 |         uri = str(tmpdir.mkdir("output").join("timestamp_array"))
26 |         timestamp = 1
27 |         with netCDF4.Dataset("tmp", mode="w", diskless=True) as dataset:
28 |             dataset.setncatts({"title": "test timestamp"})
29 |             dataset.createDimension("x", 8)
30 |             var = dataset.createVariable("f", np.float64, ("x",))
31 |             var[:] = self.attr_data
32 |             converter = NetCDF4ConverterEngine.from_group(dataset)
33 |             converter.convert_to_array(
34 |                 uri, input_netcdf_group=dataset, timestamp=timestamp
35 |             )
36 |         with tiledb.open(uri, timestamp=(1, 1)) as array:
37 |             assert array.meta["title"] == "test timestamp"
38 |             result_data = array[:]["f"]
39 |             np.testing.assert_equal(self.attr_data, result_data)
40 | 


--------------------------------------------------------------------------------
/tests/netcdf_engine/test_open_netcdf_group.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from tiledb.cf.netcdf_engine import open_netcdf_group
 4 | 
 5 | netCDF4 = pytest.importorskip("netCDF4")
 6 | 
 7 | 
 8 | def test_open_netcdf_group_with_group(tmpdir):
 9 |     with netCDF4.Dataset("example.nc", mode="w", diskless=True) as dataset:
10 |         with open_netcdf_group(dataset) as group:
11 |             assert isinstance(group, netCDF4.Dataset)
12 |             assert group == dataset
13 | 
14 | 
15 | def test_open_netcdf_group_with_file(tmpdir):
16 |     filepath = str(tmpdir.mkdir("open_group").join("simple_dataset.nc"))
17 |     with netCDF4.Dataset(filepath, mode="w") as dataset:
18 |         group1 = dataset.createGroup("group1")
19 |         group1.createGroup("group2")
20 |     with open_netcdf_group(input_file=filepath, group_path="/group1/group2") as group:
21 |         assert isinstance(group, netCDF4.Group)
22 |         assert group.path == "/group1/group2"
23 | 
24 | 
25 | def test_open_netcdf_group_bad_type_error():
26 |     with pytest.raises(TypeError):
27 |         with open_netcdf_group("input_file"):
28 |             pass
29 | 
30 | 
31 | def test_open_netcdf_group_no_file_error():
32 |     with pytest.raises(ValueError):
33 |         with open_netcdf_group():
34 |             pass
35 | 
36 | 
37 | def test_open_netcdf_group_no_group_error():
38 |     with pytest.raises(ValueError):
39 |         with open_netcdf_group(input_file="test.nc"):
40 |             pass
41 | 


--------------------------------------------------------------------------------
/tests/netcdf_engine/test_netcdf4_to_dim_converter.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pytest
 3 | 
 4 | from tiledb.cf.core._shared_dim import SharedDim
 5 | from tiledb.cf.netcdf_engine import NetCDF4ToDimConverter
 6 | 
 7 | netCDF4 = pytest.importorskip("netCDF4")
 8 | 
 9 | 
10 | class TestSharedDimBase:
11 |     """This class tests the NetCDF4ToDimConverter for a non-NetCDF dimension."""
12 | 
13 |     shared_dim = SharedDim(
14 |         name="dim0",
15 |         dtype=np.dtype("int32"),
16 |         domain=(0, 31),
17 |         registry=None,
18 |     )
19 | 
20 |     def test_default_properties(self):
21 |         """Tests the default properties are correctly set."""
22 |         dim_converter = NetCDF4ToDimConverter(self.shared_dim)
23 |         assert dim_converter.tile is None
24 |         assert dim_converter.filters is None
25 |         assert dim_converter.max_fragment_length is None
26 | 
27 |     def test_set_max_fragment_length(self):
28 |         """Tests setting max_fragment_length."""
29 |         dim_converter = NetCDF4ToDimConverter(self.shared_dim)
30 |         dim_converter.max_fragment_length = 1
31 |         assert dim_converter.max_fragment_length == 1
32 | 
33 |     def test_bad_max_fragment_length_error(self):
34 |         """Tests error when setting an invalid max_fragment_length."""
35 |         dim_converter = NetCDF4ToDimConverter(self.shared_dim)
36 |         with pytest.raises(ValueError):
37 |             dim_converter.max_fragment_length = 0
38 | 


--------------------------------------------------------------------------------
/.github/workflows/quarto-render.yml:
--------------------------------------------------------------------------------
 1 | # Cloned from https://github.com/TileDB-Inc/tiledb-quarto-template
 2 | 
 3 | name: Render and deploy Quarto files
 4 | on:
 5 |   push:
 6 |   pull_request:
 7 | 
 8 | jobs:
 9 |   quarto-render-and-deploy:
10 |     runs-on: ubuntu-latest
11 |     steps:
12 |     - uses: actions/checkout@v2
13 | 
14 |     - name: "Install Quarto"
15 |       uses: quarto-dev/quarto-actions/setup@v2
16 |       with:
17 |         version: 0.9.141
18 | 
19 |     - name: "Setup Python"
20 |       uses: actions/setup-python@v2
21 |       with:
22 |           python-version: "3.11"
23 | 
24 |     - name: "Upgrade pip"
25 |       run: python -m pip install --upgrade pip
26 | 
27 |     - name: "Install Python Dependencies"
28 |       run: python -m pip install ".[docs]"
29 | 
30 |     - name: "Quarto render"
31 |       shell: bash
32 |       run: |
33 |         quartodoc build
34 |         quarto render --fail-if-warnings
35 |         # https://github.com/quarto-dev/quarto-cli/issues/493
36 | 
37 |     - name: "Deploy to gh-pages"
38 |       uses: peaceiris/actions-gh-pages@v3
39 |       # Change to the name of your repo's primary branch name:
40 |       if: github.ref == 'refs/heads/dev'
41 |       with:
42 |         # This is GitHub Actions magic; no secrets for us to manage; and this works first-time
43 |         # without any extra configs other than visiting Settings -> Pages in your GitHub repo.
44 |         github_token: ${{ secrets.GITHUB_TOKEN }}
45 |         publish_dir: docs
46 |         destination_dir: docs
47 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | <a href="https://tiledb.com"><img src="https://github.com/TileDB-Inc/TileDB/raw/dev/doc/source/_static/tiledb-logo_color_no_margin_@4x.png" alt="TileDB logo" width="400"></a>
 2 | 
 3 | # TileDB-CF-Py
 4 | 
 5 | The TileDB-CF-Py library is a Python library for supporting the NetCDF data model in the [TileDB storage engine](https://github.com/TileDB-Inc/TileDB). TileDB-CF-Py provides readers and writers for viewing and manipulating TileDB arrays and groups using TileDB CF Dataspaces - a special TileDB group that follows the requirements in [docs/source/tiledb-cf-spec.md](documentation/tiledb-cf-spec.md).
 6 | 
 7 | :warning: This library is still under initial development and changes may not be backward compatible.
 8 | 
 9 | ## TileDB Quick Links
10 | 
11 |   * [Homepage](https://tiledb.com)
12 |   * [Documentation](https://docs.tiledb.com/main/)
13 |   * [Forum](https://forum.tiledb.io/)
14 |   * [Organization](https://github.com/TileDB-Inc/)
15 | 
16 | ## Getting Started
17 | 
18 | ### Quick Installation
19 | 
20 | This project is available from [PyPI](https://pypi.org/project/tiledb-cf/) and may be installed with ``pip``:
21 | 
22 | ```bash
23 | pip install tiledb-cf
24 | ```
25 | 
26 | ### Documentation
27 | 
28 | Documentation is available at: [https://tiledb-inc.github.io/TileDB-CF-Py](https://tiledb-inc.github.io/TileDB-CF-Py/)
29 | 
30 | ### Example Notebooks
31 | 
32 | Example Jupyter notebooks are available in the [examples](./examples) folder.
33 | 
34 | 
35 | ## Development
36 | 
37 | For information on contributing to this project see the [contributing](documentation/contributing.md) document and the [code of conduct](documentation/code-of-conduct.md).
38 | 


--------------------------------------------------------------------------------
/quarto-materials/tiledb.css:
--------------------------------------------------------------------------------
 1 | /*
 2 | Cloned from https://github.com/TileDB-Inc/tiledb-quarto-template
 3 | 
 4 | tiledb light blue #4d9fff
 5 | tiledb dark  blue #0a2580
 6 | */
 7 | 
 8 | .navbar-nav:hover .nav-link:hover {
 9 |   color: #4d9fff;
10 | }
11 | 
12 | .nav-page:hover .nav-page-previous:hover {
13 |   color: #4d9fff;
14 | }
15 | 
16 | .nav-page:hover .nav-page-next:hover {
17 |   color: #4d9fff;
18 | }
19 | 
20 | .nav-page:hover .nav-page-text:hover {
21 |   color: #4d9fff;
22 | }
23 | 
24 | .toc-actions a:hover {
25 |   color: #4d9fff;
26 | }
27 | 
28 | .page-navigation:hover {
29 |   color: #4d9fff;
30 | }
31 | 
32 | a.pagination-link:hover {
33 |   color: #4d9fff;
34 | }
35 | 
36 | .sidebar-navigation .text-start {
37 |   font-weight: bold;
38 | }
39 | 
40 | .sidebar.sidebar-navigation .active {
41 |   /*
42 |   color: #800000;
43 |   background-color: #e0e0e0;
44 |   */
45 | }
46 | 
47 | .sidebar.sidebar-navigation .active,
48 | .sidebar.sidebar-navigation .show > .nav-link {
49 |   /*color: #0a2580;*/
50 |   color: #2c4396;
51 |   background-color: #e0e0e0;
52 |   padding-left: 4px;
53 |   padding-right: 4px;
54 | }
55 | 
56 | a {
57 |   color: #2c4396;
58 | }
59 | a:before,
60 | a:focus,
61 | a:hover,
62 | a:link,
63 | a:visited {
64 |   color: #4629c9;
65 | }
66 | 
67 | code,
68 | p code:not(.sourceCode),
69 | li code:not(.sourceCode),
70 | kbd,
71 | pre {
72 |   color: #000000;
73 |   background-color: #f0f0f0;
74 |   font-size: 12px;
75 |   direction: ltr;
76 |   border-radius: 3px;
77 | }
78 | 
79 | pre {
80 |   font-size: 12px;
81 |   padding: 10px;
82 |   text-decoration: none;
83 | 
84 |   white-space: pre-wrap; /* css-3 */
85 |   white-space: -moz-pre-wrap; /* Mozilla, since 1999 */
86 |   white-space: -pre-wrap; /* Opera 4-6 */
87 |   white-space: -o-pre-wrap; /* Opera 7 */
88 | }
89 | 


--------------------------------------------------------------------------------
/tiledb/cf/core/registry.py:
--------------------------------------------------------------------------------
 1 | """Create a name registry for use in modifying grouped objects."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from typing import Optional, TypeVar
 6 | 
 7 | from typing_extensions import Protocol, Self
 8 | 
 9 | T = TypeVar("T")
10 | 
11 | 
12 | class Registry(Protocol[T]):
13 |     def __delitem__(self, name: str):
14 |         """Delete the element with the provided name."""
15 | 
16 |     def __getitem__(self, name: str) -> T:
17 |         """Get the element with the provided name."""
18 | 
19 |     def __setitem__(self, name: str, value: T):
20 |         """Set the elemetn with the provided name to the provided value."""
21 | 
22 |     def rename(self, old_name: str, new_name: str):
23 |         """Rename an element of the registry.
24 | 
25 |         If the rename fails, the registry should be left unchanged.
26 |         """
27 | 
28 | 
29 | class RegisteredByNameMixin:
30 |     def __init__(self, name: str, registry: Optional[Registry[Self]]):
31 |         self._name = name
32 |         self._registry: Optional[Registry[Self]] = None
33 |         self.set_registry(registry)
34 | 
35 |     @property
36 |     def is_registered(self) -> bool:
37 |         return self._registry is not None
38 | 
39 |     @property
40 |     def name(self) -> str:
41 |         return self._name
42 | 
43 |     @name.setter
44 |     def name(self, name: str):
45 |         if self._registry is not None:
46 |             self._registry.rename(self.name, name)
47 |         self._name = name
48 | 
49 |     def set_registry(self, registry: Optional[Registry[Self]]):
50 |         if self._registry is not None:
51 |             raise ValueError("Registry is already set.")
52 |         if registry is not None:
53 |             registry[self.name] = self
54 |         self._registry = registry
55 | 


--------------------------------------------------------------------------------
/tests/netcdf_engine/test_cli_netcdf_convert.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from click.testing import CliRunner
 3 | 
 4 | import tiledb
 5 | import tiledb.cf
 6 | 
 7 | 
 8 | def test_netcdf_convert_collect(tmpdir, simple1_netcdf_file):
 9 |     uri = str(tmpdir.mkdir("output").join("simple1"))
10 |     runner = CliRunner()
11 |     result = runner.invoke(
12 |         tiledb.cf.cli,
13 |         [
14 |             "netcdf-convert",
15 |             "-i",
16 |             simple1_netcdf_file.filepath,
17 |             "-o",
18 |             uri,
19 |             "--collect-attrs",
20 |         ],
21 |     )
22 |     assert result.exit_code == 0
23 |     array_schema = tiledb.ArraySchema.load(uri + "/array0")
24 |     attr_names = [attr.name for attr in array_schema]
25 |     dim_names = [dim.name for dim in array_schema.domain]
26 |     assert attr_names == ["x1"]
27 |     assert dim_names == ["row"]
28 |     with tiledb.open(uri + "/array0", attr="x1") as array:
29 |         x1 = array[:]
30 |     np.testing.assert_equal(x1, np.linspace(1.0, 4.0, 8))
31 | 
32 | 
33 | def test_netcdf_convert_separate(tmpdir, simple1_netcdf_file):
34 |     uri = str(tmpdir.mkdir("output").join("simple1"))
35 |     runner = CliRunner()
36 |     result = runner.invoke(
37 |         tiledb.cf.cli,
38 |         [
39 |             "netcdf-convert",
40 |             "-i",
41 |             simple1_netcdf_file.filepath,
42 |             "-o",
43 |             uri,
44 |             "--array-per-attr",
45 |         ],
46 |     )
47 |     assert result.exit_code == 0
48 |     array_schema = tiledb.ArraySchema.load(uri + "/x1")
49 |     attr_names = [attr.name for attr in array_schema]
50 |     dim_names = [dim.name for dim in array_schema.domain]
51 |     assert attr_names == ["x1"]
52 |     assert dim_names == ["row"]
53 |     with tiledb.open(uri + "/x1", attr="x1") as array:
54 |         x1 = array[:]
55 |     np.testing.assert_equal(x1, np.linspace(1.0, 4.0, 8))
56 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
 1 | name: CI
 2 | 
 3 | on: [workflow_dispatch, pull_request]
 4 | 
 5 | jobs:
 6 |   precommit_checks:
 7 |     runs-on: ubuntu-latest
 8 |     steps:
 9 |       - name: Setup Python
10 |         uses: actions/setup-python@v2
11 |         with:
12 |           python-version: '3.11'
13 |       - name: Display Python version
14 |         run: python -c "import sys; print(sys.version)"
15 |       - name: Upgrade pip
16 |         run: python -m pip install --upgrade pip
17 |       - name: Checkout
18 |         uses: actions/checkout@v2
19 |       - name: Install dependencies
20 |         run: python -m pip install -r requirements_dev.txt
21 |       - name: Run black
22 |         run: black --check .
23 |       - name: Run isort
24 |         run: isort --check .
25 |       - name: Run flake8
26 |         run: flake8 --statistics .
27 | 
28 |   tests:
29 |     runs-on: ${{ matrix.os }}
30 |     strategy:
31 |       fail-fast: false
32 |       matrix:
33 |         os: [ubuntu-latest, macos-latest, windows-latest]
34 |         python-version: ['3.8', '3.9', '3.10', '3.11']
35 |     steps:
36 |       - name: Setup Python
37 |         uses: actions/setup-python@v2
38 |         with:
39 |           python-version: ${{ matrix.python-version }}
40 |       - name: Display Python version
41 |         run: python -c "import sys; print(sys.version)"
42 |       - name: Upgrade pip
43 |         run: python -m pip install --upgrade pip
44 |       - name: Install Pytest
45 |         run: python -m pip install pytest pytest-cov
46 |       - name: Checkout
47 |         uses: actions/checkout@v2
48 |       - name: Install TileDB-CF-Py
49 |         run: python -m pip install ".[netCDF4,xarray,parallel]"
50 |       - name: Test with coverage
51 |         run: |
52 |           python -m pytest --cov-report term-missing --cov-report=html:coverage --cov-report=xml:coverage/coverage.xml --cov="tiledb/cf"
53 |       - name: Archive code coverage results
54 |         uses: actions/upload-artifact@v2
55 |         with:
56 |           name: code-coverage-report
57 |           path: coverage
58 | 


--------------------------------------------------------------------------------
/tests/xarray_engine/test_plugin_distributed.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | import pytest
 4 | 
 5 | xr = pytest.importorskip("xarray")  # isort:skip
 6 | dask = pytest.importorskip("dask")  # isort:skip
 7 | distributed = pytest.importorskip("distributed")  # isort:skip
 8 | 
 9 | from dask.distributed import Client
10 | from distributed.utils_test import cleanup, cluster, loop, loop_in_thread  # noqa: F401
11 | from xarray.tests import assert_allclose
12 | 
13 | pytestmark = pytest.mark.skipif(
14 |     sys.version_info < (3, 9), reason="xarray requires python3.9 or higher"
15 | )
16 | 
17 | da = pytest.importorskip("dask.array")
18 | loop = loop  # loop is an imported fixture, which flake8 has issues ack-ing
19 | 
20 | 
21 | def test_dask_distributed_tiledb_integration_test(loop, create_tiledb_example):
22 |     array_uri, expected = create_tiledb_example
23 |     with cluster() as (s, [a, b]):
24 |         with Client(s["address"], loop=loop):
25 |             ds = xr.open_dataset(array_uri, chunks={"time": 1}, engine="tiledb")
26 |             assert isinstance(ds["pressure"].data, da.Array)
27 |             actual = ds.compute()
28 |             assert_allclose(actual, expected)
29 | 
30 | 
31 | @pytest.mark.skip(
32 |     reason="failing test of deprecated engine. Will implement for new backend engine"
33 |     "after xarray implements better non-nanosecond datetime support."
34 | )
35 | def test_dask_distributed_tiledb_datetime_integration_test(
36 |     loop,
37 |     create_tiledb_datetime_example,
38 | ):
39 |     array_uri, expected = create_tiledb_datetime_example
40 |     with cluster() as (s, [a, b]):
41 |         with Client(s["address"], loop=loop):
42 |             with pytest.deprecated_call():
43 |                 ds = xr.open_dataset(
44 |                     array_uri,
45 |                     chunks={"date": 1},
46 |                     use_deprecated_engine=True,
47 |                     engine="tiledb",
48 |                 )
49 |             assert isinstance(ds["temperature"].data, da.Array)
50 |             actual = ds.compute()
51 |             assert_allclose(actual, expected)
52 | 


--------------------------------------------------------------------------------
/tiledb/cf/core/source.py:
--------------------------------------------------------------------------------
 1 | from typing import Any, Mapping, Optional, Tuple, Union
 2 | 
 3 | import numpy as np
 4 | from typing_extensions import Protocol
 5 | 
 6 | 
 7 | class FieldData(Protocol):
 8 |     @property
 9 |     def dtype(self) -> np.dtype:
10 |         """The numpy dtype of the data."""
11 | 
12 |     @property
13 |     def metadata(self) -> Mapping[str, Any]:
14 |         """A mapping of metadata string-to-value pairs."""
15 | 
16 |     @property
17 |     def shape(self) -> Optional[Tuple[int, ...]]:
18 |         """Shape of the data, or `None` if no shape."""
19 | 
20 |     @shape.setter
21 |     def shape(self, new_shape: Tuple[int, ...]):
22 |         """Set the shape to `new_shape`."""
23 | 
24 |     @property
25 |     def size(self) -> int:
26 |         """Size of the data."""
27 | 
28 |     @property
29 |     def values(self) -> np.array:
30 |         """Data values."""
31 | 
32 | 
33 | class NumpyData:
34 |     def __init__(
35 |         self, input: np.array, *, metadata: Optional[Mapping[str, Any]] = None
36 |     ):
37 |         self._source_data = input
38 |         self._metadata = dict() if metadata is None else dict(metadata)
39 | 
40 |     @property
41 |     def dtype(self):
42 |         return self._source_data.dtype
43 | 
44 |     @property
45 |     def metadata(self):
46 |         return self._metadata
47 | 
48 |     @property
49 |     def shape(self):
50 |         return self._source_data.shape
51 | 
52 |     @shape.setter
53 |     def shape(self, new_shape):
54 |         self._source_data = np.reshape(self._source_data, new_shape)
55 | 
56 |     @property
57 |     def size(self):
58 |         return self._source_data.size
59 | 
60 |     @property
61 |     def values(self):
62 |         return self._source_data
63 | 
64 | 
65 | def create_field_data(
66 |     source: Union[np.ndarray, int, FieldData], dtype: np.dtype
67 | ) -> FieldData:
68 |     if isinstance(source, np.ndarray):
69 |         field_data = NumpyData(source.astype(dtype))
70 |     elif isinstance(source, int):
71 |         field_data = NumpyData(np.ndarray(source, dtype=dtype))
72 |     else:
73 |         field_data = source
74 |     return field_data
75 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [metadata]
 2 | name = tiledb-cf
 3 | version = 0.9.1
 4 | description = TileDB Python library for supporting Climate and Forecast datasets.
 5 | author = TileDB, Inc.
 6 | author_email = help@tiledb.io
 7 | long_description = file: README.md
 8 | long_description_content_type = text/markdown
 9 | license = MIT
10 | keywords = tiledb, climate, forecast, netcdf
11 | url = https://github.com/TileDB-Inc/TileDB-CF-Py
12 | project_urls =
13 |     Documentation = https://docs.tiledb.com
14 | classifiers =
15 |     Development Status :: 3 - Alpha
16 |     Intended Audience :: Developers
17 |     Intended Audience :: Information Technology
18 |     Intended Audience :: Science/Research
19 |     License :: OSI Approved :: MIT License
20 |     Operating System :: OS Independent
21 |     Programming Language :: Python :: 3
22 |     Programming Language :: Python :: 3.7
23 |     Programming Language :: Python :: 3.8
24 |     Programming Language :: Python :: 3.9
25 |     Programming Language :: Python :: Implementation :: PyPy
26 |     Topic :: Software Development
27 | 
28 | [options]
29 | zip_safe = False
30 | packages =
31 |     tiledb.cf
32 |     tiledb.cf.core
33 |     tiledb.cf.netcdf_engine
34 |     tiledb.cf.xarray_engine
35 | python_requires = >=3.7
36 | install_requires =
37 |     numpy >= 1.16.5
38 |     setuptools >= 40.4
39 |     tiledb >= 0.21.2
40 |     click >= 0.7.0
41 |     typing-extensions >= 4.0.0
42 | 
43 | [options.extras_require]
44 | netCDF4 = netCDF4
45 | xarray = xarray >= 0.18.0
46 | parallel = dask[complete]
47 | complete =
48 |     %(netCDF4)s
49 |     %(xarray)s
50 |     %(parallel)s
51 | docs =
52 |     quartodoc
53 |     matplotlib
54 |     jupyter
55 |     %(complete)s
56 | 
57 | [options.entry_points]
58 | console_scripts =
59 |     tiledb-cf = tiledb.cf:cli
60 | xarray.backends =
61 |     tiledb = tiledb.cf.xarray_engine.engine:TileDBXarrayBackendEntrypoint
62 | 
63 | 
64 | [flake8]
65 | ignore = E41,E203,E226,E302,E402,W503,B024
66 | max-line-length = 88
67 | exclude = docs/* ./.*
68 | max-complexity = 10
69 | per-file-ignores = __init__.py:F401
70 | 
71 | [mypy]
72 | ignore_missing_imports = True
73 | exclude = conftest.py
74 | 


--------------------------------------------------------------------------------
/tiledb/cf/_utils.py:
--------------------------------------------------------------------------------
 1 | """Helper functions for internal use only."""
 2 | from __future__ import annotations
 3 | 
 4 | import os.path
 5 | from typing import Dict, Optional, Union
 6 | 
 7 | import numpy as np
 8 | 
 9 | import tiledb
10 | 
11 | DType = Union[int, float, str, None]
12 | 
13 | 
14 | def check_valid_group(group_uri, ctx):
15 |     """Raise a ValueError if the provided URI is not for a TileDB group."""
16 |     object_type = tiledb.object_type(group_uri, ctx=ctx)
17 |     if object_type != "group":
18 |         raise ValueError(
19 |             f"Cannot open group at URI '{group_uri}'. TileDB object with "
20 |             f"type '{object_type}' is no a valid TileDB group."
21 |         )
22 | 
23 | 
24 | def get_array_key(
25 |     key: Optional[Union[Dict[str, str], str]], array_name
26 | ) -> Optional[str]:
27 |     """Returns a key for the array with name ``array_name``.
28 | 
29 |     Parameters
30 |     ----------
31 |     key
32 |         If not ``None``, encryption key, or dictionary of encryption keys, to decrypt
33 |         arrays.
34 |     array_name
35 |         Name of the array to decrypt.
36 | 
37 |     Returns
38 |     -------
39 |     Optional[str]
40 |        Key for the array with name ``array_name``.
41 |     """
42 |     return key.get(array_name) if isinstance(key, dict) else key
43 | 
44 | 
45 | def get_array_uri(group_uri: str, array_name: str) -> str:
46 |     """Returns a URI for an array with name ``array_name`` inside a group at URI
47 |      ``group_uri``.
48 | 
49 |      This method is only needed for creating relative arrays before adding them
50 |      to a group.
51 | 
52 |     Parameters
53 |     ----------
54 |     group_uri
55 |         URI of the group containing the array
56 |     array_name
57 |         name of the array
58 | 
59 |     Returns
60 |     -------
61 |     str:
62 |         Array URI of an array with name ``array_name`` inside a group at URI
63 |         ``group_uri``.
64 |     """
65 |     return os.path.join(group_uri, array_name)
66 | 
67 | 
68 | def safe_set_metadata(meta, key, value):
69 |     """Copy a metadata item to a TileDB array catching any errors as warnings."""
70 |     if isinstance(value, np.ndarray):
71 |         value = tuple(value.tolist())
72 |     elif isinstance(value, np.generic):
73 |         value = (value.tolist(),)
74 |     meta[key] = value
75 | 


--------------------------------------------------------------------------------
/tiledb/cf/core/_shared_dim.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from typing import Optional, Tuple
 4 | 
 5 | import numpy as np
 6 | from tiledb.datatypes import DataType
 7 | from typing_extensions import Self
 8 | 
 9 | from .._utils import DType
10 | from .registry import RegisteredByNameMixin, Registry
11 | 
12 | 
13 | class SharedDim(RegisteredByNameMixin):
14 |     """Definition for the name, domain and data type of a collection of dimensions.
15 | 
16 |     Parameters
17 |     ----------
18 |     name
19 |         The name of the shared dimension.
20 |     domain
21 |         The domain for the shared dimension.
22 |     dtype
23 |         The datatype of the shared dimension.
24 |     registry
25 |         If provided, a registry for the shared dimension.
26 |     """
27 | 
28 |     def __init__(
29 |         self,
30 |         name: str,
31 |         domain: Optional[Tuple[Optional[DType], Optional[DType]]],
32 |         dtype: np.dtype,
33 |         *,
34 |         registry: Optional[Registry[Self]] = None,
35 |     ):
36 |         self._name = name
37 |         self.domain = domain
38 |         self.dtype = DataType.from_numpy(dtype).np_dtype
39 |         super().__init__(name, registry)
40 | 
41 |     def __eq__(self, other):
42 |         if not isinstance(other, self.__class__) or not isinstance(
43 |             self, other.__class__
44 |         ):
45 |             return False
46 |         return (
47 |             self.name == other.name
48 |             and self.domain == other.domain
49 |             and self.dtype == other.dtype
50 |         )
51 | 
52 |     def __repr__(self) -> str:
53 |         return (
54 |             f"SharedDim(name={self.name}, domain={self.domain}, dtype='{self.dtype!s}')"
55 |         )
56 | 
57 |     def html_input_summary(self) -> str:
58 |         """Returns a HTML string summarizing the input for the dimension."""
59 |         return ""
60 | 
61 |     def html_output_summary(self) -> str:
62 |         """Returns a string HTML summary of the :class:`SharedDim`."""
63 |         return f"name={self.name}, domain={self.domain}, dtype='{self.dtype!s}'"
64 | 
65 |     @property
66 |     def is_index_dim(self) -> bool:
67 |         """Returns ``True`` if this is an `index dimension` and ``False`` otherwise.
68 | 
69 |         An index dimension is a dimension with an integer data type and whose domain
70 |         starts at 0.
71 |         """
72 |         if self.domain:
73 |             return np.issubdtype(self.dtype, np.integer) and self.domain[0] == 0
74 |         return False
75 | 


--------------------------------------------------------------------------------
/quarto-materials/tiledb-logo.svg:
--------------------------------------------------------------------------------
 1 | <svg width="204" height="51" viewBox="0 0 204 51" fill="none" xmlns="http://www.w3.org/2000/svg">
 2 | <g clip-path="url(#clip0_118_314)">
 3 | <path d="M197.534 25.1122C201.073 23.4608 202.772 20.5093 202.772 17.0223C202.772 11.5212 198.145 7.38135 190.059 7.38135H178.489V43.6186H190.905C199.62 43.6186 204 39.423 204 33.5034C204 29.5143 201.684 26.5684 197.534 25.1122ZM191.124 37.7604H184.95V28.1139H190.905C195.308 28.1139 197.427 30.0276 197.427 32.9846C197.427 35.7854 195.403 37.7604 191.124 37.7604V37.7604ZM190.715 22.2612H184.95V13.2396H190.277C194.068 13.2396 195.981 14.958 195.981 17.7476C195.981 20.3364 194.236 22.2612 190.715 22.2612V22.2612ZM169.177 12.8044C168.047 11.1477 166.541 9.77921 164.781 8.80964C162.802 7.76614 160.587 7.24733 158.348 7.30324H146.807V43.5405H158.348C160.603 43.6013 162.836 43.0886 164.837 42.0509C166.6 41.0847 168.107 39.7155 169.233 38.0561C170.402 36.2955 171.237 34.3363 171.695 32.276C172.21 30.0247 172.466 27.7224 172.458 25.4135C172.469 23.1048 172.203 20.8029 171.667 18.5566C171.189 16.5066 170.346 14.5584 169.177 12.8044V12.8044ZM157.602 37.6823H153.267V13.1615H157.473C162.122 13.1615 165.773 17.4296 165.773 25.3968C165.773 33.8438 162.7 37.6823 157.602 37.6823V37.6823Z" fill="#4C9DFF"/>
 4 | <path d="M120.931 0H133.583V51H120.931V45.1418H127.695V5.85822H120.931V0ZM118.632 29.7765C118.645 31.0001 118.483 32.2194 118.15 33.3974H98.0002C98.819 36.4214 101.718 38.3853 105.711 38.3853C109.126 38.3853 111.224 37.6154 113.299 35.7128L117.241 39.8247C114.235 42.7315 110.529 44.2435 105.767 44.2435C97.0861 44.2435 91.2089 39.0492 91.2089 30.6413C91.2089 23.1818 96.7664 16.9107 105.027 16.9107C113.147 16.9107 118.626 22.769 118.632 29.7765ZM98.0899 27.6508H111.768C111.297 24.9449 108.655 22.769 104.96 22.769C101.337 22.769 98.7236 24.8221 98.0899 27.6508ZM79.3759 7.38136H67.7898V13.2396H72.9155V37.7604H67.3523V43.6186H85.2475V37.7604H79.3759V7.38136ZM55.5867 17.5077H44.0005V23.3659H49.1263V37.7604H43.507V43.6186H61.4022V37.7604H55.5867V17.5077ZM48.8627 13.2396H55.5923V7.38136H48.8627V13.2396ZM26.8736 23.3659H37.5457V17.5077H26.8736V7.38136H20.4132V17.5077H14.6538V23.3659H20.4132V37.0184C20.4131 37.8859 20.5848 38.7449 20.9185 39.5464C21.2521 40.3479 21.7412 41.0762 22.3578 41.6896C22.9744 42.303 23.7064 42.7896 24.512 43.1216C25.3177 43.4535 26.1811 43.6243 27.0531 43.6242H37.5457V37.766H26.8737L26.8736 23.3659ZM0 51H12.6517V45.1418H5.88282V5.85822H12.6517V0H0V51Z" fill="white"/>
 5 | </g>
 6 | <defs>
 7 | <clipPath id="clip0_118_314">
 8 | <rect width="204" height="51" fill="white"/>
 9 | </clipPath>
10 | </defs>
11 | </svg>
12 | 


--------------------------------------------------------------------------------
/documentation/code-of-conduct.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: TileDB-CF Code of Conduct
 3 | ---
 4 | 
 5 | ## Introduction
 6 | 
 7 | All participants in TileDB spaces are expected to adhere to high standards of
 8 | professionalism in all interactions. These standards include, but are not
 9 | limited to, the specific behaviors outlined below. Upholding these standards
10 | is fundamental to our commitment to create a welcoming, positive, and
11 | inclusive environment for everyone. We as contributors and maintainers
12 | pledge to making participation in our project and our community a
13 | harassment-free experience for everyone, regardless of age, body
14 | size, disability, ethnicity, gender identity and expression, level
15 | of experience, nationality, personal appearance, race, religion, or
16 | sexual identity and orientation.
17 | 
18 | ### Our Standards
19 | 
20 | Examples of behavior that contributes to creating a positive environment
21 | include:
22 | 
23 | * Using welcoming and inclusive language
24 | * Being respectful of differing viewpoints and experiences
25 | * Gracefully accepting constructive criticism
26 | * Focusing on what is best for the community
27 | * Showing empathy towards other community members
28 | 
29 | All of these serve to help the make the project better, but also serve to make
30 | the experience of participating in the project better as well.
31 | 
32 | Examples of unacceptable behavior by participants include:
33 | 
34 | * Sexist, racist, and other exclusionary language
35 | * The use of sexualized language or imagery and unwelcome sexual attention or
36 |   advances
37 | * Trolling, insulting/derogatory comments, and personal or political attacks
38 | * Public or private harassment or intimidation
39 | * Publishing others' private information, such as a physical or electronic
40 |   address, without explicit permission
41 | * Other conduct which could reasonably be considered inappropriate in a
42 |   professional setting
43 | 
44 | ### Responsibilities
45 | 
46 | Project maintainers are responsible for maintaining, upholding, and
47 | observing these standards.
48 | 
49 | ### Reporting
50 | 
51 | Please contact [conduct@tiledb.com](conduct@tiledb.com). All code of conduct
52 | reports will be kept in confidence.
53 | 
54 | ### Attribution
55 | 
56 | This document is adapted from the [Bokeh](https://raw.githubusercontent.com/bokeh/bokeh/9844e3240aab1100d7ad2621e8b62a2597846b96/CODE_OF_CONDUCT.md)
57 | code of conduct, which is in turn adapted from the [Contributor Covenant][homepage], version 1.4,
58 | available at [http://contributor-covenant.org/version/1/4][version]
59 | 
60 | [homepage]: http://contributor-covenant.org
61 | [version]: http://contributor-covenant.org/version/1/4/.
62 | 


--------------------------------------------------------------------------------
/tests/netcdf_engine/test_utils.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pytest
 3 | 
 4 | from tiledb.cf.netcdf_engine._utils import get_netcdf_metadata, get_unpacked_dtype
 5 | 
 6 | netCDF4 = pytest.importorskip("netCDF4")
 7 | 
 8 | 
 9 | @pytest.mark.parametrize(
10 |     "input_dtype,scale_factor,add_offset,output_dtype",
11 |     (
12 |         (np.int16, None, None, np.int16),
13 |         (np.int16, np.float32(1), None, np.float32),
14 |         (np.int16, None, np.float32(1), np.float32),
15 |         (np.int16, np.float64(1), np.float32(1), np.float64),
16 |     ),
17 | )
18 | def test_unpacked_dtype(input_dtype, scale_factor, add_offset, output_dtype):
19 |     """Tests computing the unpacked data type for a NetCDF variable."""
20 |     with netCDF4.Dataset("tmp.nc", diskless=True, mode="w") as dataset:
21 |         dataset.createDimension("t", None)
22 |         variable = dataset.createVariable("x", dimensions=("t",), datatype=input_dtype)
23 |         if scale_factor is not None:
24 |             variable.setncattr("scale_factor", scale_factor)
25 |         if add_offset is not None:
26 |             variable.setncattr("add_offset", add_offset)
27 |         dtype = get_unpacked_dtype(variable)
28 |     assert dtype == output_dtype
29 | 
30 | 
31 | def test_unpacked_dtype_unsupported_dtype_error():
32 |     """Tests attempting to unpack a NetCDF variable with a data type that does not
33 |     support packing/unpacking."""
34 |     with netCDF4.Dataset("tmp.nc", diskless=True, mode="w") as dataset:
35 |         variable = dataset.createVariable("x", dimensions=tuple(), datatype="S1")
36 |         with pytest.raises(ValueError):
37 |             get_unpacked_dtype(variable)
38 | 
39 | 
40 | @pytest.mark.parametrize(
41 |     "value, expected_result",
42 |     (
43 |         (np.float64(1), np.float64(1)),
44 |         (np.array((1), dtype=np.float64), np.float64(1)),
45 |         (np.array([1], dtype=np.int32), np.int32(1)),
46 |     ),
47 | )
48 | def test_get_netcdf_metadata_number(value, expected_result):
49 |     """Tests computing the unpacked data type for a NetCDF variable."""
50 |     key = "name"
51 |     with netCDF4.Dataset("tmp.nc", diskless=True, mode="w") as dataset:
52 |         dataset.setncattr(key, value)
53 |         result = get_netcdf_metadata(dataset, key, is_number=True)
54 |         assert result == expected_result
55 | 
56 | 
57 | @pytest.mark.parametrize("value", (("",), (1, 2)))
58 | def test_get_netcdf_metadata_number_with_warning(value):
59 |     """Tests computing the unpacked data type for a NetCDF variable."""
60 |     key = "name"
61 |     with netCDF4.Dataset("tmp.nc", diskless=True, mode="w") as dataset:
62 |         dataset.setncattr(key, value)
63 |         with pytest.warns(Warning):
64 |             result = get_netcdf_metadata(dataset, key, is_number=True)
65 |         assert result is None
66 | 


--------------------------------------------------------------------------------
/tiledb/cf/cli.py:
--------------------------------------------------------------------------------
  1 | from typing import Optional
  2 | 
  3 | import click
  4 | import numpy as np
  5 | 
  6 | from .netcdf_engine import from_netcdf
  7 | 
  8 | 
  9 | @click.group()
 10 | def cli():
 11 |     pass
 12 | 
 13 | 
 14 | @cli.command("netcdf-convert")
 15 | @click.option(
 16 |     "-i",
 17 |     "--input-file",
 18 |     required=True,
 19 |     type=str,
 20 |     help="The path or URI to the NetCDF file that will be converted.",
 21 | )
 22 | @click.option(
 23 |     "-o",
 24 |     "--output-uri",
 25 |     required=True,
 26 |     type=str,
 27 |     help="The URI for the output TileDB group.",
 28 | )
 29 | @click.option(
 30 |     "--input-group-path",
 31 |     type=str,
 32 |     default="/",
 33 |     show_default=True,
 34 |     help="The path in the input NetCDF for the root group that will be converted.",
 35 | )
 36 | @click.option(
 37 |     "--recursive/--no-recursive",
 38 |     default=True,
 39 |     show_default=True,
 40 |     help="Recursively convert all groups contained in the input group path.",
 41 | )
 42 | @click.option(
 43 |     "--collect-attrs/--array-per-attr",
 44 |     default=True,
 45 |     show_default=True,
 46 |     help="Collect variables with the same dimensions into a single array.",
 47 | )
 48 | @click.option(
 49 |     "-k",
 50 |     "--output-key",
 51 |     type=str,
 52 |     default=None,
 53 |     show_default=True,
 54 |     help="Key for the generated TileDB arrays.",
 55 | )
 56 | @click.option(
 57 |     "--unlimited-dim-size",
 58 |     type=int,
 59 |     default=10000,
 60 |     show_default=True,
 61 |     help="Size to convert unlimited dimensions to.",
 62 | )
 63 | @click.option(
 64 |     "--dim-dtype",
 65 |     type=click.Choice(
 66 |         [
 67 |             "int8",
 68 |             "int16",
 69 |             "int32",
 70 |             "int64",
 71 |             "uint8",
 72 |             "uint16",
 73 |             "uint32",
 74 |             "uint64",
 75 |         ]
 76 |     ),
 77 |     default="uint64",
 78 |     show_default=True,
 79 |     help="The data type for TileDB dimensions created from converted NetCDF.",
 80 | )
 81 | def netcdf_convert(
 82 |     input_file: str,
 83 |     output_uri: str,
 84 |     input_group_path: str,
 85 |     recursive: bool,
 86 |     output_key: Optional[str],
 87 |     unlimited_dim_size: int,
 88 |     dim_dtype: str,
 89 |     collect_attrs: bool,
 90 | ):
 91 |     """Converts a NetCDF input file to nested TileDB groups."""
 92 |     from_netcdf(
 93 |         input_file=input_file,
 94 |         output_uri=output_uri,
 95 |         input_group_path=input_group_path,
 96 |         recursive=recursive,
 97 |         output_key=output_key,
 98 |         output_ctx=None,
 99 |         unlimited_dim_size=unlimited_dim_size,
100 |         dim_dtype=np.dtype(dim_dtype),
101 |         tiles_by_var=None,
102 |         tiles_by_dims=None,
103 |         coords_to_dims=False,
104 |         collect_attrs=collect_attrs,
105 |     )
106 | 


--------------------------------------------------------------------------------
/tests/xarray_engine/test_plugin_timestamp.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | import numpy as np
 4 | import pytest
 5 | 
 6 | import tiledb
 7 | 
 8 | xr = pytest.importorskip("xarray")
 9 | 
10 | pytestmark = pytest.mark.skipif(
11 |     sys.version_info < (3, 9), reason="xarray requires python3.9 or higher"
12 | )
13 | 
14 | 
15 | class TestOpenDatasetTimestep:
16 |     """Test reading an empty TileDB array into xarray."""
17 | 
18 |     @pytest.fixture(scope="class")
19 |     def tiledb_uri(self, tmpdir_factory):
20 |         """Creates a TileDB array and returns the URI."""
21 |         uri = str(tmpdir_factory.mktemp("output").join("empty_array"))
22 |         tiledb.Array.create(
23 |             uri,
24 |             tiledb.ArraySchema(
25 |                 domain=tiledb.Domain(
26 |                     tiledb.Dim("x", domain=(0, 3), dtype=np.uint64),
27 |                 ),
28 |                 attrs=[tiledb.Attr("z", dtype=np.float64)],
29 |             ),
30 |         )
31 |         with tiledb.open(uri, mode="w", timestamp=1) as array:
32 |             array[:] = np.zeros((4))
33 |             array.meta["global"] = 0
34 |             array.meta["__tiledb_attr.z.variable"] = 0
35 |         with tiledb.open(uri, mode="w", timestamp=2) as array:
36 |             array[:] = np.ones((4))
37 |             array.meta["global"] = 1
38 |             array.meta["__tiledb_attr.z.variable"] = 1
39 |         with tiledb.open(uri, mode="w", timestamp=3) as array:
40 |             array[1] = 2
41 |             array.meta["global"] = 2
42 |             array.meta["__tiledb_attr.z.variable"] = 2
43 |         with tiledb.open(uri, mode="w", timestamp=4) as array:
44 |             array[2] = 3
45 |             array.meta["global"] = 3
46 |             array.meta["__tiledb_attr.z.variable"] = 3
47 |         return uri
48 | 
49 |     def test_variable_data_timestamp_int(self, tiledb_uri):
50 |         result = xr.open_dataset(tiledb_uri, timestamp=2, engine="tiledb")
51 |         expected = xr.Dataset({"z": xr.DataArray(np.ones((4)), dims=("x",))})
52 |         xr.testing.assert_equal(result, expected)
53 | 
54 |     def test_variable_metadata_timestamp_int(self, tiledb_uri):
55 |         result = xr.open_dataset(tiledb_uri, timestamp=2, engine="tiledb")
56 |         assert result["z"].attrs["variable"] == 1
57 | 
58 |     def test_global_metadata_timestamp_int(self, tiledb_uri):
59 |         result = xr.open_dataset(tiledb_uri, timestamp=2, engine="tiledb")
60 |         assert result.attrs["global"] == 1
61 | 
62 |     def test_variable_data_timestamp_tuple(self, tiledb_uri):
63 |         result = xr.open_dataset(tiledb_uri, timestamp=(2, 3), engine="tiledb")
64 |         expected = xr.Dataset({"z": xr.DataArray(np.array((1, 2, 1, 1)), dims=("x",))})
65 |         xr.testing.assert_equal(result, expected)
66 | 
67 |     def test_variable_metadata_timestamp_tuple(self, tiledb_uri):
68 |         result = xr.open_dataset(tiledb_uri, timestamp=(2, 3), engine="tiledb")
69 |         assert result["z"].attrs["variable"] == 2
70 | 
71 |     def test_global_metadata_timestamp_tuple(self, tiledb_uri):
72 |         result = xr.open_dataset(tiledb_uri, timestamp=(2, 3), engine="tiledb")
73 |         assert result.attrs["global"] == 2
74 | 


--------------------------------------------------------------------------------
/tests/core/test_dim_metadata.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pytest
 3 | 
 4 | import tiledb
 5 | from tiledb.cf import DimMetadata
 6 | 
 7 | 
 8 | class TestDimMetadata:
 9 |     @pytest.fixture(scope="class")
10 |     def array_uri(self, tmpdir_factory):
11 |         array_uri = str(tmpdir_factory.mktemp("test_array"))
12 |         schema = tiledb.ArraySchema(
13 |             domain=tiledb.Domain(
14 |                 tiledb.Dim(name="dim", domain=(0, 0), tile=1, dtype=np.int32)
15 |             ),
16 |             attrs=[
17 |                 tiledb.Attr(name="attr", dtype=np.int32),
18 |             ],
19 |         )
20 |         tiledb.Array.create(array_uri, schema)
21 |         with tiledb.DenseArray(array_uri, mode="w") as array:
22 |             array.meta["array_key"] = "array_value"
23 |         return array_uri
24 | 
25 |     def test_modify_metadata(self, array_uri):
26 |         with tiledb.DenseArray(array_uri, mode="r") as array:
27 |             meta = DimMetadata(array.meta, "dim")
28 |             assert len(meta) == 0
29 |         with tiledb.DenseArray(array_uri, mode="w", timestamp=1) as array:
30 |             meta = DimMetadata(array.meta, "dim")
31 |             meta["key0"] = "dim_value"
32 |             meta["key1"] = 10
33 |             meta["key2"] = 0.1
34 |         with tiledb.DenseArray(array_uri, mode="w", timestamp=2) as array:
35 |             meta = DimMetadata(array.meta, "dim")
36 |             del meta["key2"]
37 |         with tiledb.DenseArray(array_uri, mode="r") as array:
38 |             meta = DimMetadata(array.meta, "dim")
39 |             assert set(meta.keys()) == set(["key0", "key1"])
40 |             assert "key0" in meta
41 |             assert meta["key0"] == "dim_value"
42 | 
43 |     def test_open_from_index(self, array_uri):
44 |         with tiledb.DenseArray(array_uri, mode="r") as array:
45 |             DimMetadata(array.meta, 0)
46 | 
47 |     def test_attr_not_in_array_exception(self, array_uri):
48 |         with pytest.raises(KeyError):
49 |             with tiledb.DenseArray(array_uri, mode="w") as array:
50 |                 _ = DimMetadata(array.meta, "x")
51 | 
52 |     def test_contains_not_string_exception(self, array_uri):
53 |         with pytest.raises(TypeError):
54 |             with tiledb.DenseArray(array_uri, mode="r") as array:
55 |                 meta = DimMetadata(array.meta, "dim")
56 |                 _ = 1 in meta
57 | 
58 |     def test_delitem_not_string_exception(self, array_uri):
59 |         with pytest.raises(TypeError):
60 |             with tiledb.DenseArray(array_uri, mode="w") as array:
61 |                 meta = DimMetadata(array.meta, "dim")
62 |                 del meta[1]
63 | 
64 |     def test_getitem_not_string_exception(self, array_uri):
65 |         with pytest.raises(TypeError):
66 |             with tiledb.DenseArray(array_uri, mode="r") as array:
67 |                 meta = DimMetadata(array.meta, "dim")
68 |                 _ = meta[1]
69 | 
70 |     def test_setitem_not_string_exception(self, array_uri):
71 |         with pytest.raises(TypeError):
72 |             with tiledb.DenseArray(array_uri, mode="w") as array:
73 |                 meta = DimMetadata(array.meta, "dim")
74 |                 meta[1] = "value"
75 | 


--------------------------------------------------------------------------------
/tests/core/test_attr_metadata.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pytest
 3 | 
 4 | import tiledb
 5 | from tiledb.cf import AttrMetadata
 6 | 
 7 | 
 8 | class TestAttrMetadata:
 9 |     @pytest.fixture(scope="class")
10 |     def array_uri(self, tmpdir_factory):
11 |         array_uri = str(tmpdir_factory.mktemp("test_array"))
12 |         schema = tiledb.ArraySchema(
13 |             domain=tiledb.Domain(
14 |                 tiledb.Dim(name="dim", domain=(0, 0), tile=1, dtype=np.int32)
15 |             ),
16 |             attrs=[
17 |                 tiledb.Attr(name="attr", dtype=np.int32),
18 |             ],
19 |         )
20 |         tiledb.Array.create(array_uri, schema)
21 |         with tiledb.DenseArray(array_uri, mode="w") as array:
22 |             array.meta["array_key"] = "array_value"
23 |         return array_uri
24 | 
25 |     def test_modify_metadata(self, array_uri):
26 |         with tiledb.DenseArray(array_uri, mode="r") as array:
27 |             meta = AttrMetadata(array.meta, "attr")
28 |             assert len(meta) == 0
29 |         with tiledb.DenseArray(array_uri, mode="w", timestamp=1) as array:
30 |             meta = AttrMetadata(array.meta, "attr")
31 |             meta["key0"] = "attribute_value"
32 |             meta["key1"] = 10
33 |             meta["key2"] = 0.1
34 |         with tiledb.DenseArray(array_uri, mode="w", timestamp=2) as array:
35 |             meta = AttrMetadata(array.meta, "attr")
36 |             del meta["key2"]
37 |         with tiledb.DenseArray(array_uri, mode="r") as array:
38 |             meta = AttrMetadata(array.meta, "attr")
39 |             assert set(meta.keys()) == set(["key0", "key1"])
40 |             assert "key0" in meta
41 |             assert meta["key0"] == "attribute_value"
42 | 
43 |     def test_open_from_index(self, array_uri):
44 |         with tiledb.DenseArray(array_uri, mode="r") as array:
45 |             AttrMetadata(array.meta, 0)
46 | 
47 |     def test_attr_not_in_array_exception(self, array_uri):
48 |         with pytest.raises(KeyError):
49 |             with tiledb.DenseArray(array_uri, mode="w") as array:
50 |                 _ = AttrMetadata(array.meta, "x")
51 | 
52 |     def test_contains_not_string_exception(self, array_uri):
53 |         with pytest.raises(TypeError):
54 |             with tiledb.DenseArray(array_uri, mode="r") as array:
55 |                 meta = AttrMetadata(array.meta, "attr")
56 |                 _ = 1 in meta
57 | 
58 |     def test_delitem_not_string_exception(self, array_uri):
59 |         with pytest.raises(TypeError):
60 |             with tiledb.DenseArray(array_uri, mode="w") as array:
61 |                 meta = AttrMetadata(array.meta, "attr")
62 |                 del meta[1]
63 | 
64 |     def test_getitem_not_string_exception(self, array_uri):
65 |         with pytest.raises(TypeError):
66 |             with tiledb.DenseArray(array_uri, mode="r") as array:
67 |                 meta = AttrMetadata(array.meta, "attr")
68 |                 _ = meta[1]
69 | 
70 |     def test_setitem_not_string_exception(self, array_uri):
71 |         with pytest.raises(TypeError):
72 |             with tiledb.DenseArray(array_uri, mode="w") as array:
73 |                 meta = AttrMetadata(array.meta, "attr")
74 |                 meta[1] = "value"
75 | 


--------------------------------------------------------------------------------
/tools/hooks/pre-commit.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/sh
  2 | 
  3 | # Function to automate running linting/formatting tests.
  4 | run_test() {
  5 |     name=$2
  6 |     fix_msg=$3
  7 |     echo "* Running ${name}.. "
  8 |     echo "..................."
  9 |     $1
 10 |     status=$?
 11 |     echo "..................."
 12 |     if [ $status -ne 0 ]; then
 13 | 	read -r -p "..failed. Would you like continue with commit? [y/N] " response
 14 | 	case "$response" in
 15 | 	    [yY][eE][sS]|[yY])
 16 | 		echo "Continuing with tests .."
 17 | 		;;
 18 | 	    *)
 19 | 		echo $fix_msg
 20 | 		exit $status
 21 | 	esac
 22 |     else
 23 | 	echo "..passed"
 24 |     fi
 25 | }
 26 | 
 27 | # get all python files that aren't deleted
 28 | python_files=$(git diff --cached --name-only --diff-filter=AM | grep '\.py$')
 29 | 
 30 | if [ ! -z "${python_files}" ]; then
 31 |     # run isort
 32 |     run_test "isort --check --diff ${python_files}" \
 33 | 	     "isort" \
 34 | 	     "Try running 'isort .' and add changes to git."
 35 |     # run black
 36 |     run_test "black --check ${python_files}" \
 37 | 	     "black" \
 38 | 	     "Try running 'black .' and add changes to git."
 39 |     # run flake8
 40 |     run_test "flake8 ${python_files}" "flake8" ""
 41 |     # run mypy
 42 |     run_test "mypy ${python_files}" "mypy" ""
 43 | fi
 44 | 
 45 | # Check for whitespace errors
 46 | if git rev-parse --verify HEAD >/dev/null 2>&1
 47 | then
 48 |     against=HEAD
 49 | else
 50 |     # Initial commit: diff against an empty tree object
 51 |     against=$(git hash-object -t tree /dev/null)
 52 | fi
 53 | 
 54 | exec git diff-index --check --cached $against --
 55 | 
 56 | 
 57 | #!/bin/sh
 58 | 
 59 | # Function to automate running linting/formatting tests.
 60 | run_test() {
 61 |     name=$2
 62 |     fix_msg=$3
 63 |     echo "* Running ${name}.. "
 64 |     echo "..................."
 65 |     $1
 66 |     status=$?
 67 |     echo "..................."
 68 |     if [ $status -ne 0 ]; then
 69 | 	read -r -p "..failed. Would you like continue with commit? [y/N] " response
 70 | 	case "$response" in
 71 | 	    [yY][eE][sS]|[yY])
 72 | 		echo "Continuing with tests .."
 73 | 		;;
 74 | 	    *)
 75 | 		echo $fix_msg
 76 | 		exit $status
 77 | 	esac
 78 |     else
 79 | 	echo "..passed"
 80 |     fi
 81 | }
 82 | 
 83 | # get all python files that aren't deleted
 84 | python_files=$(git diff --cached --name-only --diff-filter=AM | grep '\.py$')
 85 | 
 86 | if [ ! -z "${python_files}" ]; then
 87 |     # run isort
 88 |     run_test "isort --check --diff ${python_files}" \
 89 | 	     "isort" \
 90 | 	     "Try running 'isort .' and add changes to git."
 91 |     # run black
 92 |     run_test "black --check ${python_files}" \
 93 | 	     "black" \
 94 | 	     "Try running 'black .' and add changes to git."
 95 |     # run flake8
 96 |     run_test "flake8 ${python_files}" "flake8" ""
 97 |     # run mypy
 98 |     run_test "mypy ${python_files}" "mypy" ""
 99 | fi
100 | 
101 | # Check for whitespace errors
102 | if git rev-parse --verify HEAD >/dev/null 2>&1
103 | then
104 |     against=HEAD
105 | else
106 |     # Initial commit: diff against an empty tree object
107 |     against=$(git hash-object -t tree /dev/null)
108 | fi
109 | 
110 | exec git diff-index --check --cached $against --
111 | 


--------------------------------------------------------------------------------
/documentation/index.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: TileDB-CF Overview
 3 | ---
 4 | 
 5 | :::{.callout-warning}
 6 | The TileDB-CF library is still under initial development and changes may not be backward compatible.
 7 | :::
 8 | 
 9 | ## About
10 | 
11 | TileDB-CF is a python package intended to aid in modeling and analyzing complex multi-dimensional data in TileDB. It currently contains the following components:
12 | 
13 | * **Core**: High-level API for common TileDB group and metadata actions, and a creator class for generating TileDB groups following the TileDB-CF Dataspace specification.
14 | 
15 | * **NetCDF Engine**: Support for creating a TileDB group or array from NetCDF data and copying the data into the new group or array.
16 | 
17 | * **Xarray Engine**:
18 | 
19 |   - Backend engine that can be used with `xarray.open_dataset`.
20 |   - Support for creating a TileDB from an xarray dataset and copying the data into the new group.
21 | 
22 | 
23 | ## Installation
24 | 
25 | This project is available from [PyPI](https://pypi.org/project/tiledb-cf) and may be installed with `pip`:
26 | 
27 | ```bash
28 | pip install tiledb-cf
29 | ```
30 | 
31 | TileDB-CF contains optional features that will be enabled if the required python packages are included in the python environment. These include:
32 | 
33 | * `netCDF4`: support for the NetCDF engine,
34 | * `xarray`: support for the xarray engine,
35 | * `parallel`: support for dask operations (used with the xarray engine),
36 | * `complete`: all of the above packages,
37 | * `docs`: support for quartodoc and the example notebooks.
38 | 
39 | To install tiledb-cf with additional dependencies use:
40 | 
41 | ```bash
42 | pip install tiledb-cf[<optional dependecies>]
43 | ```
44 | 
45 | For example, to install TileDB-CF-Py and enable the xarray engine with dask support:
46 | 
47 | ```bash
48 | pip install 'tiledb-cf[xarray,parallel]'
49 | ```
50 | 
51 | ## TileDB Data Model
52 | 
53 | [TileDB](https://github.com/TileDB-Inc/TileDB) is a powerful open-source engine for storing and accessing dense and sparse multi-dimensional arrays.  A complete description of the TileDB data model can be found at the [TileDB website](https://docs.tiledb.com).
54 | 
55 | TileDB stores data as dense or sparse multi-dimensional arrays. The arrays can be grouped together in TileDB groups. A brief summary:
56 | 
57 | * **Group**: A group is a TileDB object that stores metadata, arrays, and other groups. The groups use URIs to track members, so multiple groups can store the same assets.
58 | 
59 | * **Array**: A set of attributes and dimensions that can be queried together:
60 | 
61 |     * **Dimensions**: The dimensions along with their domains orient a multi-dimensional space of cells. A dimension is defined by its name, domain, and data type along with additional data that specifies data storage and compression. The dimension values is called the cell coordinates. There can be any number of dimensions in an array.
62 | 
63 |     * **Attributes**: In each cell in the logical layout, TileDB stores a tuple comprised of any number of attributes, each of any data type (fixed- or variable-sized).
64 | 
65 | * **Metadata**: This is (typically small) key-value data associated with an array or a group.
66 | 
67 | * **Dimension labels** (experimental): Dimension labels store either increasing of decreasing data in a one-dimensional TileDB array that can be used to indirectly query other dimensions.
68 | 


--------------------------------------------------------------------------------
/tests/core/test_array_metadata.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pytest
 3 | 
 4 | import tiledb
 5 | from tiledb.cf import ArrayMetadata
 6 | 
 7 | 
 8 | class TestArrayMetadata:
 9 |     @pytest.fixture(scope="class")
10 |     def array_uri(self, tmpdir_factory):
11 |         array_uri = str(tmpdir_factory.mktemp("test_array"))
12 |         schema = tiledb.ArraySchema(
13 |             domain=tiledb.Domain(
14 |                 tiledb.Dim(name="dim", domain=(0, 0), tile=1, dtype=np.int32)
15 |             ),
16 |             attrs=[
17 |                 tiledb.Attr(name="attr", dtype=np.int32),
18 |             ],
19 |         )
20 |         tiledb.Array.create(array_uri, schema)
21 |         with tiledb.DenseArray(array_uri, mode="w") as array:
22 |             array.meta["__tiledb_attr.attr"] = "attribute value"
23 |             array.meta["__tiledb_dim.dim"] = "dimension value"
24 |         return array_uri
25 | 
26 |     def test_modify_metadata(self, array_uri):
27 |         with tiledb.DenseArray(array_uri, mode="r") as array:
28 |             meta = ArrayMetadata(array.meta)
29 |             assert len(meta) == 0
30 |             assert "__tiledb_attr.attr" not in meta
31 |             assert "__tiledb_dim.dim" not in meta
32 |         with tiledb.DenseArray(array_uri, mode="w", timestamp=1) as array:
33 |             meta = ArrayMetadata(array.meta)
34 |             meta["key0"] = "array value"
35 |             meta["key1"] = 10
36 |             meta["key2"] = 0.1
37 |         with tiledb.DenseArray(array_uri, mode="w", timestamp=2) as array:
38 |             meta = ArrayMetadata(array.meta)
39 |             del meta["key2"]
40 |         with tiledb.DenseArray(array_uri, mode="r") as array:
41 |             meta = ArrayMetadata(array.meta)
42 |             assert set(meta.keys()) == set(["key0", "key1"])
43 |             assert "key0" in meta
44 |             assert meta["key0"] == "array value"
45 | 
46 |     def test_delitem_attr_key_exception(self, array_uri):
47 |         with pytest.raises(KeyError):
48 |             with tiledb.DenseArray(array_uri, mode="w") as array:
49 |                 meta = ArrayMetadata(array.meta)
50 |                 del meta["__tiledb_attr.attr"]
51 | 
52 |     def test_delitem_dim_key_exeception(self, array_uri):
53 |         with pytest.raises(KeyError):
54 |             with tiledb.DenseArray(array_uri, mode="w") as array:
55 |                 meta = ArrayMetadata(array.meta)
56 |                 del meta["__tiledb_dim.dim"]
57 | 
58 |     def test_getitem_attr_key_exception(self, array_uri):
59 |         with pytest.raises(KeyError):
60 |             with tiledb.DenseArray(array_uri, mode="r") as array:
61 |                 meta = ArrayMetadata(array.meta)
62 |                 _ = meta["__tiledb_attr.attr"]
63 | 
64 |     def test_getitem_dim_key_exception(self, array_uri):
65 |         with pytest.raises(KeyError):
66 |             with tiledb.DenseArray(array_uri, mode="r") as array:
67 |                 meta = ArrayMetadata(array.meta)
68 |                 _ = meta["__tiledb_dim.dim"]
69 | 
70 |     def test_setitem_attr_key_exception(self, array_uri):
71 |         with pytest.raises(KeyError):
72 |             with tiledb.DenseArray(array_uri, mode="w") as array:
73 |                 meta = ArrayMetadata(array.meta)
74 |                 meta["__tiledb_attr.a"] = "value"
75 | 
76 |     def test_setitem_dim_key_exception(self, array_uri):
77 |         with pytest.raises(KeyError):
78 |             with tiledb.DenseArray(array_uri, mode="w") as array:
79 |                 meta = ArrayMetadata(array.meta)
80 |                 meta["__tiledb_dim.a"] = "value"
81 | 


--------------------------------------------------------------------------------
/_quarto.yml:
--------------------------------------------------------------------------------
  1 | project:
  2 |   type: website
  3 |   output-dir: docs
  4 |   render:
  5 |     - "documentation/index.md"
  6 |     - "documentation/"
  7 |     - "examples/"
  8 | 
  9 | format:
 10 |   html:
 11 |     toc: true
 12 |     theme:
 13 |       light: [flatly, "quarto-materials/tiledb.scss"]
 14 |     mainfont: Helvetica
 15 |     fontsize: 1rem
 16 |     linkcolor: "#4d9fff"
 17 |     code-copy: true
 18 |     code-overflow: scroll
 19 |     css: "quarto-materials/tiledb.css"
 20 | 
 21 | quartodoc:
 22 |     title: "API Reference"
 23 |     package: tiledb
 24 |     dir: "documentation/api"
 25 |     sections:
 26 |         - title: "Core"
 27 |           desc: ""
 28 |           contents:
 29 |             - cf.create_group
 30 |             - cf.open_group_array
 31 |             - cf.ArrayMetadata
 32 |             - cf.AttrMetadata
 33 |             - cf.DimMetadata
 34 |             - cf.DataspaceCreator
 35 | 
 36 |         - title: "NetCDF Support"
 37 |           desc: ""
 38 |           contents:
 39 |             - cf.from_netcdf
 40 |             - cf.NetCDF4ConverterEngine
 41 |             - cf.netcdf_engine.NetCDF4CoordToDimConverter
 42 |             - cf.netcdf_engine.NetCDF4DimToDimConverter
 43 |             - cf.netcdf_engine.NetCDF4ScalarToDimConverter
 44 |             - cf.netcdf_engine.NetCDF4ArrayConverter
 45 |             - cf.netcdf_engine.NetCDF4DomainConverter
 46 |             - cf.netcdf_engine.NetCDF4ToDimConverter
 47 |             - cf.netcdf_engine.NetCDF4VarToAttrConverter
 48 | 
 49 |         - title: "Xarray Support"
 50 |           desc: ""
 51 |           contents:
 52 |             - cf.from_xarray
 53 |             - cf.create_group_from_xarray
 54 |             - cf.copy_data_from_xarray
 55 |             - cf.copy_metadata_from_xarray
 56 | 
 57 | website:
 58 |   favicon: "images/favicon.ico"
 59 |   site-url: https://tiledb-inc.github.io/tiledb-quarto-template/
 60 |   repo-url: https://github.com/TileDB-Inc/tiledb-quarto-template
 61 | 
 62 |   repo-actions: [issue]
 63 |   page-navigation: true
 64 |   navbar:
 65 |     background: light
 66 |     logo: "quarto-materials/tiledb-logo.png"
 67 |     collapse-below: lg
 68 |     left:
 69 |       - text: "Home page"
 70 |         href: "https://tiledb.com"
 71 |       - text: "Login"
 72 |         href: "https://cloud.tiledb.com/auth/login"
 73 |       - text: "Contact us"
 74 |         href: "https://tiledb.com/contact"
 75 |       - text: "Repo"
 76 |         href: "https://github.com/TileDB-Inc/tiledb-cf-py"
 77 | 
 78 |   sidebar:
 79 |     - style: "floating"
 80 |       collapse-level: 2
 81 |       align: left
 82 |       contents:
 83 | 
 84 |         - section: "Overview"
 85 |           contents:
 86 |             - href: "documentation/index.md"
 87 | 
 88 |         - section: "TileDB-CF Core"
 89 |           contents:
 90 |             - href: "documentation/core.md"
 91 |             - href: "documentation/tiledb-cf-spec.md"
 92 |             - section: "Examples"
 93 |               contents:
 94 |                 - href: "examples/core/group-basics.ipynb"
 95 | 
 96 |         - section: "NetCDF Engine"
 97 |           contents:
 98 |             - href: "documentation/netcdf-engine.md"
 99 |             - section: "Examples"
100 |               contents:
101 |                 - href: "examples/netcdf_engine/netcdf-to-tiledb-basics.ipynb"
102 |                 - href: "examples/netcdf_engine/netcdf-to-tiledb-set-max-fragment-size.ipynb"
103 |                 - href: "examples/netcdf_engine/netcdf-to-tiledb-set-tiles.ipynb"
104 | 
105 |         - section: "Xarray Engine"
106 |           contents:
107 |             - href: "documentation/xarray-engine.md"
108 |             - section: "Examples"
109 |               contents:
110 |                 - href: "examples/xarray_engine/tiledb-xarray-basics.ipynb"
111 |                 - href: "examples/xarray_engine/tiledb-xarray-partially-filled-arrays.ipynb"
112 | 
113 |         - section: "API Reference"
114 |           contents:
115 |             - href: "documentation/api/index.qmd"
116 | 
117 |         - section: "Contributing"
118 |           contents:
119 |             - href: "documentation/contributing.md"
120 |             - href: "documentation/code-of-conduct.md"
121 | 


--------------------------------------------------------------------------------
/tiledb/cf/core/api.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | from collections.abc import Mapping
  4 | from typing import Dict, Optional, Union
  5 | 
  6 | import tiledb
  7 | 
  8 | from .._utils import check_valid_group, get_array_key, get_array_uri
  9 | 
 10 | 
 11 | def create_group(
 12 |     uri: str,
 13 |     group_schema: Mapping[str, tiledb.ArraySchema],
 14 |     *,
 15 |     key: Optional[Union[Dict[str, str], str]] = None,
 16 |     ctx: Optional[tiledb.Ctx] = None,
 17 |     config: Optional[tiledb.Config] = None,
 18 |     append: bool = False,
 19 | ):
 20 |     """Creates a TileDB group with arrays at relative locations inside the group.
 21 | 
 22 |     All arrays in the group will be added at a relative URI that matches the array name.
 23 | 
 24 |     Parameters
 25 |     ----------
 26 |     uri
 27 |         Uniform resource identifier for TileDB group or array.
 28 |     group_schema
 29 |         A mapping from array names to array schemas to add to the group.
 30 |     key
 31 |         A encryption key or dict from array names to encryption keys.
 32 |     ctx
 33 |         If not ``None``, TileDB context wrapper for a TileDB storage manager.
 34 |     append
 35 |         If ``True``, add arrays from the provided group schema to an already existing
 36 |         group. The names for the arrays in the group schema cannot already exist in the
 37 |         group being append to.
 38 |     """
 39 |     if append:
 40 |         check_valid_group(uri, ctx=ctx)
 41 |         with tiledb.Group(uri, ctx=ctx) as group:
 42 |             for array_name in group_schema:
 43 |                 if array_name in group:
 44 |                     raise ValueError(
 45 |                         f"Cannot append to group. Array `{array_name}` already exists."
 46 |                     )
 47 |     else:
 48 |         tiledb.group_create(uri, ctx)
 49 |     with tiledb.Group(uri, mode="w", ctx=ctx) as group:
 50 |         for array_name, array_schema in group_schema.items():
 51 |             tiledb.Array.create(
 52 |                 uri=get_array_uri(uri, array_name),
 53 |                 schema=array_schema,
 54 |                 key=get_array_key(key, array_name),
 55 |                 ctx=ctx,
 56 |             )
 57 |             group.add(uri=array_name, name=array_name, relative=True)
 58 | 
 59 | 
 60 | def open_group_array(
 61 |     group: tiledb.Group,
 62 |     *,
 63 |     array: Optional[str] = None,
 64 |     attr: Optional[str] = None,
 65 |     **kwargs,
 66 | ) -> tiledb.Array:
 67 |     """Opens an array in a group either by specifying the name of the array or the name
 68 |     of an attribute in the array.
 69 | 
 70 |     If only providing the attribute, there must be exactly one array in the group with
 71 |     an attribute with the requested name.
 72 | 
 73 |     Parameters
 74 |     ----------
 75 |     group
 76 |         The tiledb group to open the array in.
 77 |     array
 78 |         If not ``None``, the name of the array to open. Overrides attr if both are
 79 |         provided.
 80 |     attr
 81 |         If not ``None``, open the array that contains this attr. Attr must be in only
 82 |         one of the group arrays.
 83 |     **kwargs: dict, optional
 84 |         Keyword arguments to pass to the ``tiledb.open`` method.
 85 | 
 86 |     Returns
 87 |     -------
 88 |     tiledb.Array:
 89 |         An array opened in the specified mode
 90 |     """
 91 |     # Get the item in the group that either has the requested array name or
 92 |     # requested attribute.
 93 |     if array is not None:
 94 |         item = group[array]
 95 |     elif attr is not None:
 96 |         arrays = tuple(
 97 |             item
 98 |             for item in group
 99 |             if item.type == tiledb.libtiledb.Array
100 |             and tiledb.ArraySchema.load(item.uri).has_attr(attr)
101 |         )
102 |         if not arrays:
103 |             raise KeyError(f"No attribute with name '{attr}' found.")
104 |         if len(arrays) > 1:
105 |             raise ValueError(
106 |                 f"The array must be specified when opening an attribute that "
107 |                 f"exists in multiple arrays in a group. Arrays with attribute "
108 |                 f"'{attr}' include: {list(item.name for item in group)}."
109 |             )
110 |         item = arrays[0]
111 |     else:
112 |         raise ValueError(
113 |             "Cannot open array. Either an array or attribute must be specified."
114 |         )
115 |     return tiledb.open(item.uri, attr=attr, **kwargs)
116 | 


--------------------------------------------------------------------------------
/tiledb/cf/core/_dim_creator.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | from typing import Optional, Tuple, Union
  4 | 
  5 | import numpy as np
  6 | from typing_extensions import Protocol
  7 | 
  8 | import tiledb
  9 | 
 10 | from .._utils import DType
 11 | from ._shared_dim import SharedDim
 12 | from .source import FieldData, create_field_data
 13 | 
 14 | 
 15 | class DimRegistry(Protocol):
 16 |     def set_writer_data(
 17 |         self, writer_index: Optional[int], dim_name: str, data: FieldData
 18 |     ):
 19 |         """Set the data to the requested frgament writer."""
 20 | 
 21 | 
 22 | class DimCreator:
 23 |     """Creator for a TileDB dimension using a SharedDim.
 24 | 
 25 |     Parameters
 26 |     ----------
 27 |     base
 28 |         The core shared dimension describing the dimension.
 29 |     tile
 30 |         The tile size for the dimension.
 31 |     filters
 32 |         Specifies compression filters for the dimension.
 33 |     registry
 34 |         An optional registry for the dimension registry.
 35 | 
 36 |     Attributes
 37 |     ----------
 38 |     tile
 39 |         The tile size for the dimension.
 40 |     filters
 41 |         Specifies compression filters for the dimension.
 42 |     """
 43 | 
 44 |     def __init__(
 45 |         self,
 46 |         base: SharedDim,
 47 |         *,
 48 |         tile: Optional[Union[int, float]] = None,
 49 |         filters: Optional[tiledb.FilterList] = None,
 50 |         registry: Optional[DimRegistry] = None,
 51 |     ):
 52 |         self._base = base
 53 |         self.tile = tile
 54 |         self.filters = filters
 55 |         self._registry = registry
 56 | 
 57 |     def __repr__(self):
 58 |         filters_str = ""
 59 |         if self.filters:
 60 |             filters_str = ", filters=FilterList(["
 61 |             for dim_filter in self.filters:
 62 |                 filters_str += repr(dim_filter) + ", "
 63 |             filters_str += "])"
 64 |         return f"DimCreator({repr(self._base)}, tile={self.tile}{filters_str})"
 65 | 
 66 |     @property
 67 |     def base(self) -> SharedDim:
 68 |         """Shared definition for the dimensions name, domain, and dtype."""
 69 |         return self._base
 70 | 
 71 |     @property
 72 |     def dtype(self) -> np.dtype:
 73 |         """The numpy dtype of the values and domain of the dimension."""
 74 |         return self._base.dtype
 75 | 
 76 |     @property
 77 |     def domain(self) -> Optional[Tuple[Optional[DType], Optional[DType]]]:
 78 |         """The (inclusive) interval on which the dimension is valid."""
 79 |         return self._base.domain
 80 | 
 81 |     def html_summary(self) -> str:
 82 |         """Returns a string HTML summary of the :class:`DimCreator`."""
 83 |         filters_str = ""
 84 |         if self.filters:
 85 |             filters_str = ", filters=FilterList(["
 86 |             for dim_filter in self.filters:
 87 |                 filters_str += repr(dim_filter) + ", "
 88 |             filters_str += "])"
 89 |         return (
 90 |             f"{self._base.html_input_summary()} &rarr; tiledb.Dim("
 91 |             f"{self._base.html_output_summary()}, tile={self.tile}{filters_str})"
 92 |         )
 93 | 
 94 |     @property
 95 |     def name(self) -> str:
 96 |         """Name of the dimension."""
 97 |         return self._base.name
 98 | 
 99 |     def set_writer_data(
100 |         self,
101 |         dim_data: Union[np.ndarray, FieldData],
102 |         *,
103 |         writer_index: Optional[int] = None,
104 |     ):
105 |         """Set dimension data on a fragment writer
106 | 
107 |         Parameters
108 |         ----------
109 |         dim_data
110 |             The dimension data to set.
111 |         writer_index
112 |             The index of the fragment writer to set the data on.
113 |         """
114 |         if self._registry is None:
115 |             raise ValueError("Dimension creator is not registered to an array.")
116 |         data = create_field_data(dim_data, self.dtype)
117 |         self._registry.set_writer_data(writer_index, self.name, data)
118 | 
119 |     def to_tiledb(self, ctx: Optional[tiledb.Ctx] = None) -> tiledb.Dim:
120 |         """Returns a `tiledb.Dim` using the creator properties.
121 | 
122 |         Parameters
123 |         ----------
124 |         ctx
125 |             If not ``None``, TileDB context wrapper for a TileDB storage manager.
126 | 
127 |         Returns
128 |         -------
129 |         tiledb.Dim
130 |             A tiledb dimension with the set properties.
131 |         """
132 |         return tiledb.Dim(
133 |             name=self.name,
134 |             domain=self.domain,
135 |             tile=self.tile,
136 |             filters=self.filters,
137 |             dtype=self.dtype,
138 |             ctx=ctx,
139 |         )
140 | 


--------------------------------------------------------------------------------
/documentation/netcdf-engine.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: TileDB-CF NetCDF Engine
 3 | ---
 4 | 
 5 | ## NetCDF Data Model
 6 | The NetCDF data model is a common choice for multi-dimensional data, especially in the climate and weather space. NetCDF and TileDB use over lapping terminology to refer to concepts in their respective data model.
 7 | 
 8 | A complete description of the NetCDF data model can be found at the [UCAR website](https://www.unidata.ucar.edu/software/netcdf/docs/netcdf_data_model.html).
 9 | 
10 | A NetCDF file consists of **groups**, **dimensions**, **variables**, and **attributes**. Each NetCDF file has at least one root group that contains all other objects. Additional subgroups can be added to heirarchically organize the data.
11 | 
12 | * **Dimensions**: A dimension is a name-size pair that describes an axis of a multi-dimension array. The size of the dimension may be "unlimited" (allowed to grow). The NetCDF dimension is roughly ananlogous to a TileDB dimension in a dense TileDB array.
13 | 
14 | * **Variables**: A variable is a multi-dimensional array with a NetCDF dimension associated to each axis of the array. The size of the dimensions must match the shape of the multi-dimensional array. A NetCDF variable is roughly equivalent to either a TileDB attribute in a sparse or dense TileDB array or a TileDB dimension in a sparse TileDB array.
15 | 
16 | * **Attribute**: An attribute is a key-value pair that is associated with either a group or variable. Attributes are used to store (typically small) metadata. NetCDF attributes are roughly equivalent to TileDB metadata.
17 | 
18 | * **Group**: A NetCDF group is a collection of dimensions, variables, and attributes. A simple NetCDF group might map to a TileDB array. A more complex group would need to be mapped to a TileDB group.
19 | 
20 | 
21 | ## NetCDF-to-TileDB Compatibility
22 | 
23 | The TileDB-CF package provides an interface for generating TileDB groups from NetCDF datasets using the TileDB-CF Dataspace convention. The CF Dataspace model supports the classic NetCDF-4 data model by mapping:
24 | 
25 | * NetCDF groups to TileDB groups;
26 | * NetCDF dimensions to TileDB dimensions;
27 | * NetCDF variables to TileDB attributes or TileDB dimensions;
28 | * NetCDF attributes to TileDB group or array metadata.
29 | 
30 | Some features and use cases do not directly transfer or may need to be modified before use in TileDB.
31 | 
32 | * **Coordinates**: In NetCDF, it is a common convention to name a one-dimensional variable with the same name as its dimension to signify it as a "coordinate" or independent variable other variables are defined on. In TileDB, a variable and dimension in the same array cannot have the same name. This can be handled by renaming either the dimension or the variable when copying to TileDB.
33 | 
34 | * **Unlimited Dimensions**: TileDB can support unlimited dimensions by creating the domain on a dimension larger than the initial data. The domain must be set at creation time, and cannot be modified after array creation.
35 | 
36 | * **Compound data types**: As of TileDB version 2.16, compound data types are not directly supported in TileDB. Compound data types can be broken into their constituent parts; however, this breaks storage locality. Variable, opaque, and string data types are supported.
37 | 
38 | 
39 | ## Programmatic Interface
40 | 
41 | The `NetCDFConverterEngine` is a configurable tool for ingesting data from NetCDF into TileDB. The class can be manually constructed, or it can be auto-generated from a NetCDF file or group.
42 | 
43 | ## Command-Line Interface
44 | 
45 | TileDB-CF provides a command line interface to the NetCDF converter engine. It contains the following options:
46 | 
47 | ```bash
48 |     Usage: tiledb-cf netcdf-convert [OPTIONS]
49 | 
50 |         Converts a NetCDF input file to nested TileDB groups.
51 | 
52 |     Options:
53 |         -i, --input-file TEXT           The path or URI to the NetCDF file that will be converted.  [required]
54 | 
55 |         -o, --output-uri TEXT           The URI for the output TileDB group. [required]
56 | 
57 |         --input-group-path TEXT         The path in the input NetCDF for the root group that will be converted.  [default: /]
58 | 
59 |         --recursive / --no-recursive    Recursively convert all groups contained in the input group path.  [default: True]
60 | 
61 |         -k, --output-key TEXT           Key for the generated TileDB arrays.
62 | 
63 |         --unlimited-dim-size INTEGER    Size to convert unlimited dimensions to. [default: 10000]
64 | 
65 |         --dim-dtype [int8|int16|int32|int64|uint8|uint16|uint32|uint64]
66 |                                   The data type for TileDB dimensions created from converted NetCDF.  [default: uint64]
67 | 
68 |         --help                          Show this message and exit.
69 | ```
70 | 


--------------------------------------------------------------------------------
/tiledb/cf/netcdf_engine/api.py:
--------------------------------------------------------------------------------
  1 | """Functions for converting NetCDF files to TileDB."""
  2 | 
  3 | from pathlib import Path
  4 | from typing import Dict, Optional, Sequence, Union
  5 | 
  6 | import numpy as np
  7 | 
  8 | import tiledb
  9 | 
 10 | _DEFAULT_INDEX_DTYPE = np.dtype("uint64")
 11 | 
 12 | 
 13 | def from_netcdf(
 14 |     input_file: Union[str, Path],
 15 |     output_uri: str,
 16 |     input_group_path: str = "/",
 17 |     recursive: bool = True,
 18 |     output_key: Optional[str] = None,
 19 |     output_ctx: Optional[tiledb.Ctx] = None,
 20 |     unlimited_dim_size: int = 10000,
 21 |     dim_dtype: np.dtype = _DEFAULT_INDEX_DTYPE,
 22 |     tiles_by_var: Optional[Dict[str, Dict[str, Optional[Sequence[int]]]]] = None,
 23 |     tiles_by_dims: Optional[
 24 |         Dict[str, Dict[Sequence[str], Optional[Sequence[int]]]]
 25 |     ] = None,
 26 |     coords_to_dims: bool = False,
 27 |     collect_attrs: bool = True,
 28 |     unpack_vars: bool = False,
 29 |     offsets_filters: Optional[tiledb.FilterList] = None,
 30 |     attrs_filters: Optional[tiledb.FilterList] = None,
 31 |     copy_metadata: bool = True,
 32 | ):
 33 |     """Converts a NetCDF input file to nested TileDB CF dataspaces.
 34 | 
 35 |     See ``tiledb.cf.NetCDF4ConverterEngine`` for more information on the backend
 36 |     converter engine used for the conversion.
 37 | 
 38 |     Parameters
 39 |     ----------
 40 |     input_file
 41 |         The input NetCDF file to generate the converter engine from.
 42 |     output_uri
 43 |         The uniform resource identifier for the TileDB group to be created.
 44 |     input_group_path:
 45 |         The path to the NetCDF group to copy data from. Use ``'/'`` for the root group.
 46 |     recursive
 47 |         If ``True``, recursively convert groups in a NetCDF file. Otherwise, only
 48 |         convert group provided.
 49 |     output_key
 50 |         If not ``None``, encryption key to decrypt arrays.
 51 |     output_ctx
 52 |         If not ``None``, TileDB context wrapper for a TileDB storage manager.
 53 |     unlimited_dim_size:
 54 |         The size of the domain for TileDB dimensions created from unlimited NetCDF
 55 |         dimensions.
 56 |     dim_dtype
 57 |         The numpy dtype for TileDB dimensions.
 58 |     tiles_by_var
 59 |         A map from the name of a NetCDF variable to the tiles of the dimensions of the
 60 |         variable in the generated TileDB array.
 61 |     tiles_by_dims
 62 |         A map from the name of NetCDF dimensions defining a variable to the tiles of
 63 |         those dimensions in the generated TileDB array.
 64 |     coords_to_dims
 65 |         If ``True``, convert the NetCDF coordinate variable into a TileDB dimension for
 66 |         sparse arrays. Otherwise, convert the coordinate dimension into a TileDB
 67 |         dimension and the coordinate variable into a TileDB attribute.
 68 |     collect_attrs
 69 |         If ``True``, store all attributes with the same dimensions in the same array.
 70 |         Otherwise, store each attribute in a scalar array.
 71 |     unpack_vars
 72 |         Unpack NetCDF variables with NetCDF attributes ``scale_factor`` or
 73 |         ``add_offset`` using the transformation ``scale_factor * value + unpack``.
 74 |     offsets_filters
 75 |         Default filters for all offsets for variable attributes and dimensions.
 76 |     attrs_filters
 77 |         Default filters for all attributes.
 78 |     copy_metadata
 79 |         If  ``True`` copy NetCDF group and variable attributes to TileDB metadata. If
 80 |         ``False`` do not copy metadata.
 81 |     """
 82 |     from .converter import NetCDF4ConverterEngine, open_netcdf_group
 83 | 
 84 |     output_uri = output_uri if not output_uri.endswith("/") else output_uri[:-1]
 85 | 
 86 |     if tiles_by_var is None:
 87 |         tiles_by_var = {}
 88 |     if tiles_by_dims is None:
 89 |         tiles_by_dims = {}
 90 | 
 91 |     def recursive_convert(netcdf_group):
 92 |         converter = NetCDF4ConverterEngine.from_group(
 93 |             netcdf_group,
 94 |             unlimited_dim_size,
 95 |             dim_dtype,
 96 |             tiles_by_var.get(netcdf_group.path),
 97 |             tiles_by_dims.get(netcdf_group.path),
 98 |             coords_to_dims=coords_to_dims,
 99 |             collect_attrs=collect_attrs,
100 |             unpack_vars=unpack_vars,
101 |             offsets_filters=offsets_filters,
102 |             attrs_filters=attrs_filters,
103 |         )
104 |         group_uri = output_uri + netcdf_group.path
105 |         converter.convert_to_group(
106 |             group_uri,
107 |             output_key,
108 |             output_ctx,
109 |             input_netcdf_group=netcdf_group,
110 |             copy_metadata=copy_metadata,
111 |         )
112 |         if recursive:
113 |             for subgroup in netcdf_group.groups.values():
114 |                 recursive_convert(subgroup)
115 | 
116 |     with open_netcdf_group(
117 |         input_file=input_file,
118 |         group_path=input_group_path,
119 |     ) as dataset:
120 |         recursive_convert(dataset)
121 | 


--------------------------------------------------------------------------------
/quarto-materials/tiledb.scss:
--------------------------------------------------------------------------------
  1 | /*
  2 | Cloned from https://github.com/TileDB-Inc/tiledb-quarto-template
  3 | 
  4 | tiledb light blue #4d9fff
  5 | tiledb dark  blue #0a2580
  6 | */
  7 | 
  8 | /*-- scss:defaults --*/
  9 | /*
 10 | $navbar-bg: #800000;
 11 | $navbar-fg: #eeeeee;
 12 | $sidebar-fg: #800000;
 13 | $footer-bg: #800000;
 14 | $footer-fg: #eeeeee;
 15 | */
 16 | 
 17 | /*-- scss:rules --*/
 18 | h1,
 19 | h2,
 20 | h3 {
 21 |   font-weight: 600; /* semibold */
 22 |   /*
 23 |   color: #4d9fff;
 24 |   background-color: #e0e0e0;
 25 |   */
 26 |   padding: 6px;
 27 | }
 28 | 
 29 | h1,
 30 | h2,
 31 | h3 {
 32 |   line-height: 1.38;
 33 | }
 34 | 
 35 | p,
 36 | ul {
 37 |   line-height: 1.74 !important;
 38 |   border-radius: 0 !important;
 39 | }
 40 | 
 41 | li {
 42 |   line-height: 2.06 !important;
 43 | }
 44 | 
 45 | .navbar-dark {
 46 |   background-image: url('./quarto-materials/Background-tdb-header.jpg');
 47 |   background-position: center;
 48 |   background-size: cover;
 49 | }
 50 | 
 51 | .navbar-nav .nav-link {
 52 |   border-radius: 8px !important;
 53 | }
 54 | 
 55 | .nav-link {
 56 |   padding-left: 1rem !important;
 57 |   padding-right: 1rem !important;
 58 | }
 59 | 
 60 | .navbar-dark .navbar-nav .nav-link {
 61 |   color: white;
 62 |   font-weight: 500;
 63 |   &:hover {
 64 |     color: rgba(255, 255, 255, 1) !important;
 65 |     background-color: rgba(255, 255, 255, 0.1);
 66 |   }
 67 |   transition: all 0.3s ease;
 68 | }
 69 | 
 70 | .navbar-dark .navbar-nav .nav-link:focus {
 71 |   color: white !important;
 72 | }
 73 | 
 74 | .navbar #quarto-search.type-overlay .aa-Autocomplete svg.aa-SubmitIcon {
 75 |   color: white;
 76 | }
 77 | 
 78 | .sidebar.sidebar-navigation .active,
 79 | .sidebar.sidebar-navigation .show > .nav-link {
 80 |   color: #2c4396 !important;
 81 | }
 82 | 
 83 | .sidebar-item {
 84 |   margin-bottom: 0.75em;
 85 | }
 86 | 
 87 | .sidebar-section {
 88 |   margin-top: 0.75em;
 89 |   padding-bottom: 0.75em;
 90 |   padding-left: 1.25em;
 91 |   border-left: 1px solid rgba(0, 0, 0, 0.1);
 92 | }
 93 | 
 94 | .sidebar-item-container a {
 95 |   color: rgba(0, 0, 0, 0.6) !important;
 96 |   &:hover {
 97 |     color: rgba(0, 0, 0, 0.8) !important;
 98 |   }
 99 |   &:visited {
100 |     color: rgba(0, 0, 0, 0.4) !important;
101 |   }
102 |   transition: 0.3s ease color;
103 | }
104 | 
105 | .sidebar.sidebar-navigation .active {
106 |   font-weight: 600;
107 |   color: rgba(0, 0, 0, 0.7) !important;
108 |   background-color: transparent !important;
109 |   padding-left: 0px !important;
110 | }
111 | 
112 | .figure-img {
113 |   border-radius: 6px;
114 |   box-shadow: 0 6px 18px 0 rgba(0, 0, 0, 0.06);
115 |   border: 1px solid rgba(0, 0, 0, 0.1);
116 | }
117 | 
118 | .nav-page span {
119 |   color: rgba(0, 0, 0, 0.8) !important;
120 | }
121 | 
122 | .nav-page i {
123 |   transition: color 0.3s ease;
124 | }
125 | 
126 | code,
127 | p code:not(.sourceCode),
128 | li code:not(.sourceCode),
129 | kbd,
130 | pre {
131 |   font-size: 14px !important;
132 |   padding: 0.3em 0.8em !important;
133 |   border-radius: 6px !important;
134 | }
135 | 
136 | .sidebar nav[role="doc-toc"] > ul li a {
137 |   padding: 0.6rem 0.8rem;
138 |   font-weight: 500;
139 |   color: rgba(0, 0, 0, 0.6);
140 |   &:hover {
141 |     color: rgba(0, 0, 0, 0.8);
142 |   }
143 |   transition: 0.3s ease color;
144 | }
145 | 
146 | .sidebar nav[role="doc-toc"] ul > li > ul > li > a.active {
147 |   color: rgba(0, 0, 0, 0.8) !important;
148 | }
149 | 
150 | .sidebar nav[role="doc-toc"] ul > li > a.active {
151 |   color: rgba(0, 0, 0, 0.8) !important;
152 | }
153 | 
154 | .sidebar.sidebar-navigation > * {
155 |   padding-top: 1.6em !important;
156 | }
157 | 
158 | .nav-tabs .nav-link:hover,
159 | .nav-tabs .nav-link:focus {
160 |   color: rgba(0, 0, 0, 0.6);
161 | }
162 | 
163 | .navbar-dark .navbar-toggler {
164 |   color: white !important;
165 |   border-color: white !important;
166 |   background-color: rgba(255, 255, 255, 0.94);
167 | }
168 | 
169 | // Search form
170 | 
171 | .aa-DetachedContainer {
172 |   background-color: transparent !important;
173 | }
174 | 
175 | .aa-DetachedContainer--modal {
176 |   border-radius: 12px !important;
177 | }
178 | 
179 | .aa-DetachedFormContainer {
180 |   border-bottom: none !important;
181 |   background-color: rgba(255, 255, 255, 0.7);
182 |   backdrop-filter: blur(5px);
183 |   padding: 12px !important;
184 | }
185 | 
186 | .aa-Autocomplete .aa-Form .aa-InputWrapper .aa-Input,
187 | .aa-DetachedFormContainer .aa-Form .aa-InputWrapper .aa-Input {
188 |   height: calc(2.5em + (0.1rem + 2px)) !important;
189 | }
190 | 
191 | .aa-Autocomplete .aa-Form,
192 | .aa-DetachedFormContainer .aa-Form {
193 |   background-color: rgba(255, 255, 255, 0.9) !important;
194 |   box-shadow: 0 2px 6px 0 rgba(0, 0, 0, 0.4);
195 |   border: none !important;
196 |   border-radius: 8px !important;
197 |   transition: box-shadow 0.3s ease;
198 |   padding-left: 8px !important;
199 |   padding-right: 8px !important;
200 | }
201 | 
202 | .aa-Autocomplete .aa-Form:focus-within,
203 | .aa-DetachedFormContainer .aa-Form:focus-within {
204 |   box-shadow: 0 1px 3px 0 rgba(0, 0, 0, 0.5);
205 | }
206 | 


--------------------------------------------------------------------------------
/tests/core/test_group.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pytest
  3 | 
  4 | import tiledb
  5 | from tiledb.cf import create_group, open_group_array
  6 | 
  7 | _row = tiledb.Dim(
  8 |     name="rows",
  9 |     domain=(1, 4),
 10 |     tile=4,
 11 |     dtype=np.uint64,
 12 |     filters=tiledb.FilterList([tiledb.ZstdFilter()]),
 13 | )
 14 | _col = tiledb.Dim(
 15 |     name="cols",
 16 |     domain=(1, 4),
 17 |     tile=4,
 18 |     dtype=np.uint64,
 19 |     filters=tiledb.FilterList([tiledb.ZstdFilter()]),
 20 | )
 21 | 
 22 | 
 23 | _attr_a = tiledb.Attr(name="a", dtype=np.uint64)
 24 | _attr_b = tiledb.Attr(name="b", dtype=np.float64)
 25 | _attr_c = tiledb.Attr(name="c", dtype=np.dtype("U"))
 26 | _array_schema_1 = tiledb.ArraySchema(
 27 |     domain=tiledb.Domain(_row, _col),
 28 |     attrs=[_attr_a],
 29 | )
 30 | _array_schema_2 = tiledb.ArraySchema(
 31 |     domain=tiledb.Domain(_row),
 32 |     sparse=True,
 33 |     attrs=[_attr_b, _attr_c],
 34 | )
 35 | _array_schema_3 = tiledb.ArraySchema(
 36 |     domain=tiledb.Domain(_row, _col),
 37 |     attrs=[_attr_c],
 38 | )
 39 | 
 40 | 
 41 | class TestCreateGroup:
 42 |     _array_schemas = {"A1": _array_schema_1, "A2": _array_schema_2}
 43 |     _key = None
 44 | 
 45 |     @pytest.fixture(scope="class")
 46 |     def group_uri(self, tmpdir_factory):
 47 |         """Creates a TileDB Group from a mapping of arrays and returns scenario dict."""
 48 |         uri = str(tmpdir_factory.mktemp("group1"))
 49 |         ctx = None
 50 |         create_group(uri, self._array_schemas, key=self._key, ctx=ctx)
 51 |         return uri
 52 | 
 53 |     def test_array_schemas(self, group_uri):
 54 |         uri = group_uri
 55 |         assert tiledb.object_type(uri) == "group"
 56 |         for name, schema in self._array_schemas.items():
 57 |             with tiledb.Group(uri) as group:
 58 |                 assert tiledb.ArraySchema.load(group[name].uri) == schema
 59 | 
 60 | 
 61 | class TestGroupWithArrays:
 62 |     _A1_data = np.array(
 63 |         ([1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12], [13, 14, 15, 16]), dtype=np.uint64
 64 |     )
 65 | 
 66 |     @pytest.fixture(scope="class")
 67 |     def group_uri(self, tmpdir_factory):
 68 |         uri = str(tmpdir_factory.mktemp("simple_group"))
 69 |         tiledb.group_create(uri)
 70 |         tiledb.Array.create(uri + "/A1", _array_schema_1)
 71 |         with tiledb.DenseArray(uri + "/A1", mode="w") as array:
 72 |             array[:] = self._A1_data
 73 |         tiledb.Array.create(uri + "/A2", _array_schema_2)
 74 |         tiledb.Array.create(uri + "/A3", _array_schema_3)
 75 |         with tiledb.Group(uri, mode="w") as group:
 76 |             group.add(uri="A1", name="A1", relative=True)
 77 |             group.add(uri="A2", name="A2", relative=True)
 78 |             group.add(uri="A3", name="A3", relative=True)
 79 |         filesystem = tiledb.VFS()
 80 |         filesystem.create_dir(uri + "/empty_dir")
 81 |         return uri
 82 | 
 83 |     def test_open_array_from_group(self, group_uri):
 84 |         with tiledb.Group(group_uri) as group:
 85 |             with open_group_array(group, array="A1") as array:
 86 |                 assert isinstance(array, tiledb.Array)
 87 |                 assert array.mode == "r"
 88 |                 np.testing.assert_equal(array[:, :]["a"], self._A1_data)
 89 | 
 90 |     def test_open_attr(self, group_uri):
 91 |         with tiledb.Group(group_uri) as group:
 92 |             with open_group_array(group, attr="a") as array:
 93 |                 assert isinstance(array, tiledb.Array)
 94 |                 assert array.mode == "r"
 95 |                 np.testing.assert_equal(array[:, :], self._A1_data)
 96 | 
 97 |     def test_no_array_with_attr_exception(self, group_uri):
 98 |         with tiledb.Group(group_uri) as group:
 99 |             with pytest.raises(KeyError):
100 |                 open_group_array(group, attr="bad_name")
101 | 
102 |     def test_ambiguous_array_exception(self, group_uri):
103 |         with tiledb.Group(group_uri) as group:
104 |             with pytest.raises(ValueError):
105 |                 open_group_array(group, attr="c")
106 | 
107 |     def test_no_values_error(self, group_uri):
108 |         with tiledb.Group(group_uri) as group:
109 |             with pytest.raises(ValueError):
110 |                 open_group_array(group)
111 | 
112 | 
113 | def test_append_group(tmpdir):
114 |     uri = str(tmpdir.mkdir("append_group_test"))
115 |     create_group(uri, {"A1": _array_schema_1})
116 |     create_group(uri, {"A2": _array_schema_2}, append=True)
117 |     with tiledb.Group(uri) as group:
118 |         assert group["A1"].type == tiledb.libtiledb.Array
119 |         assert group["A2"].type == tiledb.libtiledb.Array
120 |         a1_schema = tiledb.ArraySchema.load(group["A1"].uri)
121 |         a2_schema = tiledb.ArraySchema.load(group["A2"].uri)
122 |         assert a1_schema == _array_schema_1
123 |         assert a2_schema == _array_schema_2
124 | 
125 | 
126 | def test_append_group_array_exists_error(tmpdir):
127 |     uri = str(tmpdir.mkdir("append_group_test"))
128 |     create_group(uri, {"A1": _array_schema_1})
129 |     with pytest.raises(ValueError):
130 |         create_group(uri, {"A1": _array_schema_1}, append=True)
131 | 


--------------------------------------------------------------------------------
/examples/netcdf_engine/netcdf-to-tiledb-set-max-fragment-size.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# NetCDF-to-TileDB: How to set the max fragment size for copying data\n",
  8 |     "\n",
  9 |     "## About this Example\n",
 10 |     "\n",
 11 |     "### What it Shows\n",
 12 |     "\n",
 13 |     "This shows a simple example of copying a NetCDF file in multiple chunks by setting the maximum fragment size for arrays in the `NetCDF4ConverterEngine`.\n",
 14 |     "\n",
 15 |     "### Example dataset\n",
 16 |     "\n",
 17 |     "* Dimensions:\n",
 18 |     "    * x: size=8\n",
 19 |     "    * y: size=8\n",
 20 |     "    * z: size=8\n",
 21 |     "* Variables:\n",
 22 |     "    *  f(x, y, z) = [0, ..., 511]\n",
 23 |     "\n",
 24 |     "### Set-up Requirements\n",
 25 |     "\n",
 26 |     "This example requires the following python packages are installed: netCDF4, numpy, tiledb, and tiledb-cf"
 27 |    ]
 28 |   },
 29 |   {
 30 |    "cell_type": "code",
 31 |    "execution_count": null,
 32 |    "metadata": {},
 33 |    "outputs": [],
 34 |    "source": [
 35 |     "import netCDF4\n",
 36 |     "import numpy as np\n",
 37 |     "import tiledb\n",
 38 |     "import tiledb.cf"
 39 |    ]
 40 |   },
 41 |   {
 42 |    "cell_type": "code",
 43 |    "execution_count": null,
 44 |    "metadata": {},
 45 |    "outputs": [],
 46 |    "source": [
 47 |     "# Set names for the output generated by the example.\n",
 48 |     "output_dir = \"output/netcdf-to-tiledb-set-max-fragment-size\"\n",
 49 |     "netcdf_file = f\"{output_dir}/simple1.nc\"\n",
 50 |     "array_uri = f\"{output_dir}/simple_copy_chunks\""
 51 |    ]
 52 |   },
 53 |   {
 54 |    "cell_type": "code",
 55 |    "execution_count": null,
 56 |    "metadata": {},
 57 |    "outputs": [],
 58 |    "source": [
 59 |     "# Reset output folder\n",
 60 |     "import os\n",
 61 |     "import shutil\n",
 62 |     "\n",
 63 |     "shutil.rmtree(output_dir, ignore_errors=True)\n",
 64 |     "os.mkdir(output_dir)"
 65 |    ]
 66 |   },
 67 |   {
 68 |    "cell_type": "code",
 69 |    "execution_count": null,
 70 |    "metadata": {},
 71 |    "outputs": [],
 72 |    "source": [
 73 |     "with netCDF4.Dataset(netcdf_file, mode=\"w\") as dataset:\n",
 74 |     "    dataset.setncatts({\"title\": \"Simple dataset for examples\"})\n",
 75 |     "    dataset.createDimension(\"x\", 8)\n",
 76 |     "    dataset.createDimension(\"y\", 8)\n",
 77 |     "    dataset.createDimension(\"z\", 8)\n",
 78 |     "    f = dataset.createVariable(\"f\", np.int64, (\"x\", \"y\", \"z\"))\n",
 79 |     "    f[:, :, :] = np.reshape(np.arange(512), (8, 8, 8))\n",
 80 |     "print(f\"Created example NetCDF file `{netcdf_file}`.\")"
 81 |    ]
 82 |   },
 83 |   {
 84 |    "cell_type": "code",
 85 |    "execution_count": null,
 86 |    "metadata": {},
 87 |    "outputs": [],
 88 |    "source": [
 89 |     "# Create NetCDF4 converter and print output\n",
 90 |     "converter = tiledb.cf.NetCDF4ConverterEngine.from_file(netcdf_file)\n",
 91 |     "converter"
 92 |    ]
 93 |   },
 94 |   {
 95 |    "cell_type": "code",
 96 |    "execution_count": null,
 97 |    "metadata": {},
 98 |    "outputs": [],
 99 |    "source": [
100 |     "# Set max_fragment_shape for array\n",
101 |     "converter.get_array_creator(\"array0\").domain_creator.max_fragment_shape = (4, 8, 2)\n",
102 |     "converter"
103 |    ]
104 |   },
105 |   {
106 |    "cell_type": "code",
107 |    "execution_count": null,
108 |    "metadata": {},
109 |    "outputs": [],
110 |    "source": [
111 |     "# Run conversion (using `convert_to_array` since there is only 1 array in the group)\n",
112 |     "# Consolidate fragment metadata (recommended for copying multiple fragments)\n",
113 |     "converter.convert_to_array(array_uri)\n",
114 |     "tiledb.consolidate(\n",
115 |     "    array_uri, config=tiledb.Config({\"sm.consolidation.mode\": \"fragment_meta\"})\n",
116 |     ")"
117 |    ]
118 |   },
119 |   {
120 |    "cell_type": "code",
121 |    "execution_count": null,
122 |    "metadata": {},
123 |    "outputs": [],
124 |    "source": [
125 |     "# View fragments information to confirm multiple separate chunks were copied\n",
126 |     "fragment_info = tiledb.FragmentInfoList(array_uri)\n",
127 |     "print(f\"Number of fragments: {len(fragment_info)}\")\n",
128 |     "for frag in fragment_info:\n",
129 |     "    print(\n",
130 |     "        f\"Fragment {frag.num}: nonempty_domain={frag.nonempty_domain}, has_consolidated_metadata={frag.has_consolidated_metadata}\"\n",
131 |     "    )"
132 |    ]
133 |   }
134 |  ],
135 |  "metadata": {
136 |   "interpreter": {
137 |    "hash": "022b808d35d9188bc114e3dbdd31978ae285e77fefec36d9e39c13a87da8d5e5"
138 |   },
139 |   "kernelspec": {
140 |    "display_name": "Python 3.9.0 64-bit ('cf-3.9.0': pyenv)",
141 |    "name": "python3"
142 |   },
143 |   "language_info": {
144 |    "codemirror_mode": {
145 |     "name": "ipython",
146 |     "version": 3
147 |    },
148 |    "file_extension": ".py",
149 |    "mimetype": "text/x-python",
150 |    "name": "python",
151 |    "nbconvert_exporter": "python",
152 |    "pygments_lexer": "ipython3",
153 |    "version": "3.11.3"
154 |   },
155 |   "orig_nbformat": 4
156 |  },
157 |  "nbformat": 4,
158 |  "nbformat_minor": 2
159 | }
160 | 


--------------------------------------------------------------------------------
/tests/netcdf_engine/conftest.py:
--------------------------------------------------------------------------------
  1 | from dataclasses import dataclass, field
  2 | from typing import Any, Dict, Optional, Sequence, Tuple
  3 | 
  4 | import numpy as np
  5 | import pytest
  6 | 
  7 | 
  8 | @dataclass(frozen=True)
  9 | class NetCDFSingleGroupExample:
 10 |     """Dataclass the holds values required to generate NetCDF test cases
 11 | 
 12 |     name: name of the test case
 13 |     dimension_args: sequence of arguments required to create NetCDF4 dimensions
 14 |     variable_args: sequence of arguments required to create NetCDF4 variables
 15 |     variable_data: dict of variable data by variable name
 16 |     variable_matadata: dict of variable metadata key-value pairs by variable name
 17 |     group_metadata: group metadata key-value pairs
 18 |     """
 19 | 
 20 |     name: str
 21 |     directory_path: str
 22 |     dimension_args: Sequence[Tuple[str, Optional[int]]]
 23 |     variable_kwargs: Sequence[Dict[str, Any]]
 24 |     variable_data: Dict[str, np.ndarray]
 25 |     variable_metadata: Dict[str, Dict[str, Any]] = field(default_factory=dict)
 26 |     group_metadata: Dict[str, Any] = field(default_factory=dict)
 27 | 
 28 |     def __post_init__(self):
 29 |         netCDF4 = pytest.importorskip("netCDF4")
 30 |         with netCDF4.Dataset(self.filepath, mode="w") as dataset:
 31 |             if self.group_metadata:
 32 |                 dataset.setncatts(self.group_metadata)
 33 |             for dim_args in self.dimension_args:
 34 |                 dataset.createDimension(*dim_args)
 35 |             for var_kwargs in self.variable_kwargs:
 36 |                 variable = dataset.createVariable(**var_kwargs)
 37 |                 variable[...] = self.variable_data[variable.name]
 38 |                 if variable.name in self.variable_metadata:
 39 |                     variable.setncatts(self.variable_metadata[variable.name])
 40 | 
 41 |     @property
 42 |     def filepath(self):
 43 |         return self.directory_path.join(f"{self.name}.nc")
 44 | 
 45 | 
 46 | @pytest.fixture(scope="session")
 47 | def simple1_netcdf_file(tmpdir_factory):
 48 |     directory_path = tmpdir_factory.mktemp("sample_netcdf")
 49 |     example = NetCDFSingleGroupExample(
 50 |         "simple1",
 51 |         directory_path,
 52 |         dimension_args=[
 53 |             ("row", 8),
 54 |         ],
 55 |         variable_kwargs=[
 56 |             {"varname": "x1", "datatype": np.float64, "dimensions": ("row",)},
 57 |         ],
 58 |         variable_data={"x1": np.linspace(1.0, 4.0, 8)},
 59 |     )
 60 |     return example
 61 | 
 62 | 
 63 | @pytest.fixture(scope="session")
 64 | def simple2_netcdf_file(tmpdir_factory):
 65 |     directory_path = tmpdir_factory.mktemp("sample_netcdf")
 66 |     xdata = np.linspace(0.0, 1.0, 8)
 67 |     example = NetCDFSingleGroupExample(
 68 |         "simple2",
 69 |         directory_path,
 70 |         dimension_args=[("row", 8)],
 71 |         variable_kwargs=[
 72 |             {"varname": "x1", "datatype": np.float64, "dimensions": ("row",)},
 73 |             {"varname": "x2", "datatype": np.float64, "dimensions": ("row",)},
 74 |         ],
 75 |         variable_data={"x1": xdata, "x2": xdata**2},
 76 |         group_metadata={"name": "simple2"},
 77 |     )
 78 |     return example
 79 | 
 80 | 
 81 | @pytest.fixture(scope="session")
 82 | def group1_netcdf_file(tmpdir_factory):
 83 |     """Sample NetCDF file with groups
 84 | 
 85 |     root:
 86 |       dimensions:  row(8)
 87 |       variables: x1(row) = np.linspace(-1.0, 1.0, 8)
 88 |       group1:
 89 |         variables: x2(row) = 2 * np.linspace(-1.0, 1.0, 8)
 90 |         group2:
 91 |           dimensions: col(4)
 92 |           variables: y1(col) = np.linspace(-1.0, 1.0, 4)
 93 |       group3:
 94 |           dimensions: row(4), col(4)
 95 |           variables:
 96 |             A1[:, :] = np.outer(y1, y1)
 97 |             A2[:, :] = np.zeros((4,4), dtype=np.float64)
 98 |             A3[:, :] = np.identity(4)
 99 |     """
100 |     netCDF4 = pytest.importorskip("netCDF4")
101 |     filepath = str(tmpdir_factory.mktemp("sample_netcdf").join("simple1.nc"))
102 |     x = np.linspace(-1.0, 1.0, 8)
103 |     y = np.linspace(-1.0, 1.0, 4)
104 |     with netCDF4.Dataset(filepath, mode="w") as dataset:
105 |         dataset.createDimension("row", 8)
106 |         x1 = dataset.createVariable("x1", np.float64, ("row",))
107 |         x1[:] = x
108 |         group1 = dataset.createGroup("group1")
109 |         x2 = group1.createVariable("x2", np.float64, ("row",))
110 |         x2[:] = 2.0 * x
111 |         group2 = group1.createGroup("group2")
112 |         group2.createDimension("col", 4)
113 |         y1 = group2.createVariable("y1", np.float64, ("col",))
114 |         y1[:] = y
115 |         group3 = dataset.createGroup("group3")
116 |         group3.createDimension("row", 4)
117 |         group3.createDimension("col", 4)
118 |         A1 = group3.createVariable("A1", np.float64, ("row", "col"))
119 |         A2 = group3.createVariable("A2", np.float64, ("row", "col"))
120 |         A3 = group3.createVariable("A3", np.int32, ("row", "col"))
121 |         A1[:, :] = np.outer(y, y)
122 |         A2[:, :] = np.zeros((4, 4), dtype=np.float64)
123 |         A3[:, :] = np.identity(4)
124 |     return filepath
125 | 
126 | 
127 | @pytest.fixture
128 | def netcdf_test_case(tmpdir_factory, request):
129 |     """Creates a NetCDF file and returns the filepath stem, filepath, and dict of
130 |     expected attribtues.
131 |     """
132 |     return NetCDFSingleGroupExample(
133 |         **request.param,
134 |         directory_path=tmpdir_factory.mktemp("sample_netcdf"),
135 |     )
136 | 


--------------------------------------------------------------------------------
/tiledb/cf/core/_attr_creator.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | from typing import Optional, Sequence, Union
  4 | 
  5 | import numpy as np
  6 | from tiledb.datatypes import DataType
  7 | from typing_extensions import Protocol
  8 | 
  9 | import tiledb
 10 | 
 11 | from .._utils import DType
 12 | from ._fragment_writer import FragmentWriter
 13 | from .registry import RegisteredByNameMixin
 14 | from .source import FieldData, create_field_data
 15 | 
 16 | 
 17 | class AttrRegistry(Protocol):
 18 |     def __delitem__(self, name: str):
 19 |         """Delete the element with the provided name."""
 20 | 
 21 |     def __getitem__(self, name: str) -> AttrCreator:
 22 |         """Get the element with the provided name."""
 23 | 
 24 |     def __setitem__(self, name: str, value: AttrCreator):
 25 |         """Set the elemetn with the provided name to the provided value."""
 26 | 
 27 |     def set_writer_data(
 28 |         self, writer_index: Optional[int], attr_name: str, data: FieldData
 29 |     ):
 30 |         """Set the data to the requested frgament writer."""
 31 | 
 32 |     def rename(self, old_name: str, new_name: str):
 33 |         """Rename an element of the registry.
 34 | 
 35 |         If the rename fails, the registry should be left unchanged.
 36 |         """
 37 | 
 38 | 
 39 | class AttrCreator(RegisteredByNameMixin):
 40 |     """Creator for a TileDB attribute.
 41 | 
 42 |     Parameters
 43 |     ----------
 44 |     name
 45 |         Name of the attribute that will be created.
 46 |     dtype
 47 |         The datatype of the attribute that will be created.
 48 |     fill
 49 |         Optional fill value for the attribute that will be created.
 50 |     var
 51 |         Specifies if the attribute that will be created will be variable length
 52 |         (automatic for byte/strings).
 53 |     nullable
 54 |         Specifies if the attribute that will be created will be nullable using
 55 |         validity tiles.
 56 |     filters
 57 |         Filter pipeline to apply to the attribute.
 58 |     registry
 59 |         Registry for this attribute creator.
 60 |     fragment_writers
 61 |         Fragment writers for this attribute creator.
 62 | 
 63 |     Attributes
 64 |     ----------
 65 |     dtype: np.dtype
 66 |         Numpy dtype of the attribute.
 67 |     fill: int or float or str, optional
 68 |         Fill value for unset cells.
 69 |     var: bool
 70 |         Specifies if the attribute is variable length (automatic for
 71 |         byte/strings).
 72 |     nullable: bool
 73 |         Specifies if the attribute is nullable using validity tiles.
 74 |     filters: tiledb.FilterList, optional
 75 |         Specifies compression filters for the attribute.
 76 |     """
 77 | 
 78 |     def __init__(
 79 |         self,
 80 |         name: str,
 81 |         dtype: np.dtype,
 82 |         *,
 83 |         fill: Optional[DType] = None,
 84 |         var: bool = False,
 85 |         nullable: bool = False,
 86 |         filters: Optional[tiledb.FilterList] = None,
 87 |         registry: Optional[AttrRegistry] = None,
 88 |         fragment_writers: Optional[Sequence[FragmentWriter]] = None,
 89 |     ):
 90 |         self.dtype = DataType.from_numpy(dtype).np_dtype
 91 |         self.fill = fill
 92 |         self.var = var
 93 |         self.nullable = nullable
 94 |         self.filters = filters
 95 |         self._fragment_writers = fragment_writers
 96 |         super().__init__(name, registry)
 97 | 
 98 |     def __repr__(self):
 99 |         filters_str = f", filters=FilterList({self.filters})" if self.filters else ""
100 |         return (
101 |             f"AttrCreator(name={self.name}, dtype='{self.dtype!s}', var={self.var}, "
102 |             f"nullable={self.nullable}{filters_str})"
103 |         )
104 | 
105 |     def html_summary(self) -> str:
106 |         """Returns a string HTML summary of the ``AttrCreator``."""
107 |         filters_str = f", filters=FilterList({self.filters})" if self.filters else ""
108 |         return (
109 |             f" &rarr; tiledb.Attr(name={self.name}, dtype='{self.dtype!s}', "
110 |             f"var={self.var}, nullable={self.nullable}{filters_str})"
111 |         )
112 | 
113 |     def set_writer_data(
114 |         self,
115 |         attr_data: Union[np.ndarray, FieldData],
116 |         *,
117 |         writer_index: Optional[int] = None,
118 |     ):
119 |         """Set attribute data to the specified fragment writer.
120 | 
121 |         Parameters
122 |         ----------
123 |         attr_data
124 |             Attribute data to add to the writer.
125 |         writer_index
126 |             The index of the fragment writer to add to.
127 |         """
128 |         if self._registry is None:
129 |             raise ValueError("Attribute creator is not registered to an array.")
130 |         data = create_field_data(attr_data, self.dtype)
131 |         self._registry.set_writer_data(writer_index, self.name, data)
132 | 
133 |     def to_tiledb(self, ctx: Optional[tiledb.Ctx] = None) -> tiledb.Attr:
134 |         """Returns a :class:`tiledb.Attr` using the current properties.
135 | 
136 |         Parameters
137 |         ----------
138 |         ctx
139 |             If not ``None``, TileDB context wrapper for a TileDB storage manager.
140 | 
141 |         Returns
142 |         -------
143 |         tiledb.Attr
144 |             An attribute with the properties defined in this attribute creator.
145 |         """
146 |         return tiledb.Attr(
147 |             name=self.name,
148 |             dtype=self.dtype,
149 |             fill=self.fill,
150 |             var=self.var,
151 |             nullable=self.nullable,
152 |             filters=self.filters,
153 |             ctx=ctx,
154 |         )
155 | 


--------------------------------------------------------------------------------
/tests/xarray_engine/conftest.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pytest
  3 | 
  4 | import tiledb
  5 | 
  6 | 
  7 | @pytest.fixture
  8 | def create_tiledb_group_example(tmpdir):
  9 |     xr = pytest.importorskip("xarray")
 10 | 
 11 |     # Define data
 12 |     data = {
 13 |         "pressure": np.linspace(
 14 |             -1.0, 1.0, num=32, endpoint=True, dtype=np.float64
 15 |         ).reshape(8, 4),
 16 |         "count": np.arange(0, 32, dtype=np.int32).reshape(8, 4),
 17 |     }
 18 | 
 19 |     # Create expected dataset
 20 |     expected = xr.Dataset(
 21 |         data_vars={
 22 |             "pressure": xr.DataArray(
 23 |                 data=data["pressure"],
 24 |                 dims=["time", "x"],
 25 |                 attrs={"long_name": "example float data"},
 26 |             ),
 27 |             "count": xr.DataArray(
 28 |                 data=data["count"],
 29 |                 dims=["time", "x"],
 30 |                 attrs={"long_name": "example int data"},
 31 |             ),
 32 |         },
 33 |         attrs={"global_1": "value1", "global_2": "value2"},
 34 |     )
 35 | 
 36 |     # Create the TileDB group
 37 |     group_uri = str(tmpdir.join("tiledb_group_example_1"))
 38 |     count_uri = str(tmpdir.join("count_array"))
 39 |     pressure_uri = str(tmpdir.join("pressure_array"))
 40 |     count_schema = tiledb.ArraySchema(
 41 |         domain=tiledb.Domain(
 42 |             tiledb.Dim(name="time", domain=(0, 7), tile=4, dtype=np.int32),
 43 |             tiledb.Dim(name="x", domain=(0, 3), tile=4, dtype=np.int32),
 44 |         ),
 45 |         sparse=False,
 46 |         attrs=[tiledb.Attr(name="count", dtype=np.int32)],
 47 |     )
 48 | 
 49 |     pressure_schema = tiledb.ArraySchema(
 50 |         domain=tiledb.Domain(
 51 |             tiledb.Dim(name="time", domain=(0, 7), tile=4, dtype=np.int32),
 52 |             tiledb.Dim(name="x", domain=(0, 3), tile=4, dtype=np.int32),
 53 |         ),
 54 |         sparse=False,
 55 |         attrs=[tiledb.Attr(name="pressure", dtype=np.float64)],
 56 |     )
 57 | 
 58 |     # Create and write to arrays.
 59 |     tiledb.Array.create(count_uri, count_schema)
 60 |     with tiledb.open(count_uri, mode="w") as array:
 61 |         array[:, :] = data["count"]
 62 |         array.meta["__tiledb_attr.count.long_name"] = "example int data"
 63 |     tiledb.Array.create(pressure_uri, pressure_schema)
 64 |     with tiledb.open(pressure_uri, mode="w") as array:
 65 |         array[:, :] = data["pressure"]
 66 |         array.meta["__tiledb_attr.pressure.long_name"] = "example float data"
 67 | 
 68 |     # Create group and add arrays and metadata.
 69 |     tiledb.Group.create(group_uri)
 70 |     with tiledb.Group(group_uri, mode="w") as group:
 71 |         group.add(pressure_uri)
 72 |         group.add(count_uri)
 73 |         group.meta["global_1"] = "value1"
 74 |         group.meta["global_2"] = "value2"
 75 |     return group_uri, expected
 76 | 
 77 | 
 78 | @pytest.fixture
 79 | def create_tiledb_example(tmpdir):
 80 |     xr = pytest.importorskip("xarray")
 81 |     # Define data
 82 |     float_data = np.linspace(
 83 |         -1.0, 1.0, num=32, endpoint=True, dtype=np.float64
 84 |     ).reshape(8, 4)
 85 |     int_data = np.arange(0, 32, dtype=np.int32).reshape(8, 4)
 86 |     # Create expected dataset
 87 |     expected = xr.Dataset(
 88 |         data_vars={
 89 |             "pressure": xr.DataArray(
 90 |                 data=float_data,
 91 |                 dims=["time", "x"],
 92 |                 attrs={"long_name": "example float data"},
 93 |             ),
 94 |             "count": xr.DataArray(
 95 |                 data=int_data,
 96 |                 dims=["time", "x"],
 97 |                 attrs={"long_name": "example int data"},
 98 |             ),
 99 |         },
100 |         attrs={"global_1": "value1", "global_2": "value2"},
101 |     )
102 |     array_uri = str(tmpdir.join("tiledb_example_1"))
103 |     schema = tiledb.ArraySchema(
104 |         domain=tiledb.Domain(
105 |             tiledb.Dim(name="time", domain=(0, 7), tile=4, dtype=np.int32),
106 |             tiledb.Dim(name="x", domain=(0, 3), tile=4, dtype=np.int32),
107 |         ),
108 |         sparse=False,
109 |         attrs=[
110 |             tiledb.Attr(name="count", dtype=np.int32),
111 |             tiledb.Attr(name="pressure", dtype=np.float64),
112 |         ],
113 |     )
114 |     tiledb.Array.create(array_uri, schema)
115 |     with tiledb.open(array_uri, mode="w") as array:
116 |         array[:, :] = {
117 |             "pressure": float_data,
118 |             "count": int_data,
119 |         }
120 |         array.meta["global_1"] = "value1"
121 |         array.meta["global_2"] = "value2"
122 |         array.meta["__tiledb_attr.pressure.long_name"] = "example float data"
123 |         array.meta["__tiledb_attr.count.long_name"] = "example int data"
124 |     return array_uri, expected
125 | 
126 | 
127 | @pytest.fixture
128 | def create_tiledb_datetime_example(tmpdir):
129 |     xr = pytest.importorskip("xarray")
130 |     data = np.linspace(-1.0, 20.0, num=16, endpoint=True, dtype=np.float64)
131 |     date = np.arange(np.datetime64("2000-01-01"), np.datetime64("2000-01-17"))
132 |     # Create expected dataset
133 |     expected = xr.Dataset(
134 |         data_vars={"temperature": xr.DataArray(data=data, dims="date")},
135 |         coords={"date": date},
136 |     )
137 |     # Create TileDB array
138 |     array_uri = str(tmpdir.join("tiledb_example_2"))
139 |     schema = tiledb.ArraySchema(
140 |         domain=tiledb.Domain(
141 |             tiledb.Dim(
142 |                 name="date",
143 |                 domain=(np.datetime64("2000-01-01"), np.datetime64("2000-01-16")),
144 |                 tile=np.timedelta64(4, "D"),
145 |                 dtype=np.datetime64("", "D"),
146 |             ),
147 |         ),
148 |         attrs=[tiledb.Attr(name="temperature", dtype=np.float64)],
149 |     )
150 |     tiledb.DenseArray.create(array_uri, schema)
151 |     with tiledb.DenseArray(array_uri, mode="w") as array:
152 |         array[:] = {"temperature": data}
153 |     return array_uri, expected
154 | 


--------------------------------------------------------------------------------
/tests/netcdf_engine/test_netcdf4_converter_array.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pytest
  3 | 
  4 | import tiledb
  5 | from tiledb.cf.core._shared_dim import SharedDim
  6 | 
  7 | netCDF4 = pytest.importorskip("netCDF4")
  8 | netcdf_engine = pytest.importorskip("tiledb.cf.netcdf_engine")
  9 | 
 10 | 
 11 | class TestAttrsFilters:
 12 |     """Collection of tests for setting default attribute filters."""
 13 | 
 14 |     def test_default_filter(self):
 15 |         """Tests new attribute filter is set to the attrs_filters value if the
 16 |         ``filters`` parameter is not specified."""
 17 |         attrs_filters = tiledb.FilterList([tiledb.ZstdFilter()])
 18 |         with netCDF4.Dataset("example.nc", mode="w", diskless=True) as dataset:
 19 |             dim = dataset.createDimension("row", 64)
 20 |             var = dataset.createVariable("x", np.float64, ("row",))
 21 |             shared_dims = [
 22 |                 netcdf_engine.NetCDF4DimToDimConverter.from_netcdf(dim, None, np.uint64)
 23 |             ]
 24 |             converter = netcdf_engine.NetCDF4ArrayConverter(
 25 |                 dim_order=("row",), shared_dims=shared_dims, attrs_filters=attrs_filters
 26 |             )
 27 |             converter.add_var_to_attr_converter(var)
 28 |         assert converter.attr_creator("x").filters == attrs_filters
 29 | 
 30 |     def test_overwrite_default_filters(self):
 31 |         """Tests new attribute filter is set to the provided ``filters`` parameter when
 32 |         ``filters is not ``None``."""
 33 |         attrs_filters = tiledb.FilterList([tiledb.ZstdFilter()])
 34 |         new_filters = tiledb.FilterList([tiledb.GzipFilter(level=5)])
 35 |         with netCDF4.Dataset("example.nc", mode="w", diskless=True) as dataset:
 36 |             dim = dataset.createDimension("row", 64)
 37 |             var = dataset.createVariable("x", np.float64, ("row",))
 38 |             shared_dims = [
 39 |                 netcdf_engine.NetCDF4DimToDimConverter.from_netcdf(dim, None, np.uint64)
 40 |             ]
 41 |             converter = netcdf_engine.NetCDF4ArrayConverter(
 42 |                 dim_order=("row",), shared_dims=shared_dims, attrs_filters=attrs_filters
 43 |             )
 44 |             converter.add_var_to_attr_converter(var, filters=new_filters)
 45 |         assert converter.attr_creator("x").filters == new_filters
 46 | 
 47 | 
 48 | def test_remove_dim_creator_front():
 49 |     """Tests removing a dimension in the front of the domain."""
 50 |     shared_dims = [
 51 |         SharedDim("x0", (0, 7), np.uint32),
 52 |         SharedDim("x1", (0, 7), np.uint32),
 53 |         SharedDim("x2", (0, 4), np.uint32),
 54 |     ]
 55 |     creator = netcdf_engine.NetCDF4ArrayConverter(
 56 |         dim_order=("x0", "x1", "x2"), shared_dims=shared_dims
 57 |     )
 58 |     creator.domain_creator.remove_dim_creator("x0")
 59 |     dim_names = tuple(dim_creator.name for dim_creator in creator.domain_creator)
 60 |     assert dim_names == ("x1", "x2")
 61 | 
 62 | 
 63 | def test_remove_dim_creator_back():
 64 |     """Tests removing a dimension in the back of the domain."""
 65 |     shared_dims = [
 66 |         SharedDim("x1", (0, 7), np.uint32),
 67 |         SharedDim("x2", (0, 7), np.uint32),
 68 |         SharedDim("x3", (0, 4), np.uint32),
 69 |     ]
 70 |     creator = netcdf_engine.NetCDF4ArrayConverter(
 71 |         dim_order=("x1", "x2", "x3"), shared_dims=shared_dims
 72 |     )
 73 |     creator.domain_creator.remove_dim_creator("x3")
 74 |     dim_names = tuple(dim_creator.name for dim_creator in creator.domain_creator)
 75 |     assert dim_names == ("x1", "x2")
 76 | 
 77 | 
 78 | def test_remove_dim_creator_middle():
 79 |     """Tests removing a dimension in the middle of the domain."""
 80 |     shared_dims = [
 81 |         SharedDim("x0", (0, 7), np.uint32),
 82 |         SharedDim("x1", (0, 7), np.uint32),
 83 |         SharedDim("x2", (0, 4), np.uint32),
 84 |     ]
 85 |     creator = netcdf_engine.NetCDF4ArrayConverter(
 86 |         dim_order=("x0", "x1", "x2"), shared_dims=shared_dims
 87 |     )
 88 |     creator.domain_creator.remove_dim_creator("x1")
 89 |     dim_names = tuple(dim_creator.name for dim_creator in creator.domain_creator)
 90 |     assert dim_names == ("x0", "x2")
 91 | 
 92 | 
 93 | def test_remove_dim_creator_key_error():
 94 |     """Tests key error when removing a dimension by name."""
 95 |     shared_dims = [
 96 |         SharedDim("x0", (0, 7), np.uint32),
 97 |         SharedDim("x1", (0, 7), np.uint32),
 98 |         SharedDim("x2", (0, 4), np.uint32),
 99 |     ]
100 |     creator = netcdf_engine.NetCDF4ArrayConverter(
101 |         dim_order=("x0", "x1", "x2"), shared_dims=shared_dims
102 |     )
103 |     with pytest.raises(KeyError):
104 |         creator.domain_creator.remove_dim_creator("x4")
105 | 
106 | 
107 | def test_set_max_fragment_shape_error():
108 |     """Tests raising an error when attempting to set max_fragment_shape with a value
109 |     that is a bad length."""
110 |     shared_dims = [SharedDim("x", (0, 7), np.uint32)]
111 |     creator = netcdf_engine.NetCDF4ArrayConverter(
112 |         dim_order=("x"), shared_dims=shared_dims
113 |     )
114 |     creator.add_attr_creator("y0", dtype=np.dtype("int32"))
115 |     with pytest.raises(ValueError):
116 |         creator.domain_creator.max_fragment_shape = (None, None)
117 | 
118 | 
119 | def test_array_converter_indexer_error():
120 |     """Tests value error when copying with an indexer of bad length."""
121 |     shared_dims = [SharedDim("x", (0, 7), np.uint32)]
122 |     creator = netcdf_engine.NetCDF4ArrayConverter(
123 |         dim_order=("x"), shared_dims=shared_dims
124 |     )
125 |     creator.add_attr_creator("y0", dtype=np.dtype("int32"))
126 |     with netCDF4.Dataset("example.nc", mode="w", diskless=True) as dataset:
127 |         with pytest.raises(ValueError):
128 |             creator.domain_creator.get_query_coordinates(
129 |                 netcdf_group=dataset,
130 |                 sparse=False,
131 |                 indexer=[slice(None), slice(None)],
132 |                 assigned_dim_values={"x": 0},
133 |             )
134 | 


--------------------------------------------------------------------------------
/tests/core/test_fragment_writer.py:
--------------------------------------------------------------------------------
  1 | from collections import OrderedDict
  2 | 
  3 | import numpy as np
  4 | import pytest
  5 | 
  6 | import tiledb
  7 | from tiledb.cf.core._fragment_writer import FragmentWriter
  8 | from tiledb.cf.core._shared_dim import SharedDim
  9 | from tiledb.cf.core.source import NumpyData
 10 | from tiledb.cf.testing import assert_dict_arrays_equal
 11 | 
 12 | 
 13 | def test_fragment_writer_create_dense():
 14 |     dims = (
 15 |         SharedDim("dim1", (0, 100), np.uint32),
 16 |         SharedDim("dim2", (0, 100), np.uint32),
 17 |     )
 18 |     attr_names = ["attr1", "attr2", "attr3", "attr4"]
 19 |     writer = FragmentWriter.create_dense(dims, attr_names, ((0, 10), (0, 100)))
 20 |     assert writer.is_dense_region
 21 |     assert writer.ndim == 2
 22 |     assert writer.nattr == 4
 23 | 
 24 | 
 25 | def test_fragment_writer_create_sparse_coo():
 26 |     dims = (
 27 |         SharedDim("dim1", (0, 100), np.uint32),
 28 |         SharedDim("dim2", (0, 100), np.uint32),
 29 |     )
 30 |     attr_names = ["attr1"]
 31 |     writer = FragmentWriter.create_sparse_coo(dims, attr_names, 8)
 32 |     assert not writer.is_dense_region
 33 |     assert writer.ndim == 2
 34 |     assert writer.nattr == 1
 35 | 
 36 | 
 37 | def test_fragment_writer_create_sparse_row_major():
 38 |     dims = (
 39 |         SharedDim("dim1", (0, 100), np.uint32),
 40 |         SharedDim("dim2", (0, 100), np.uint32),
 41 |     )
 42 |     attr_names = ["attr1", "attr2", "attr3"]
 43 |     writer = FragmentWriter.create_sparse_row_major(
 44 |         dims, attr_names, ((0, 10), (0, 100))
 45 |     )
 46 |     assert not writer.is_dense_region
 47 |     assert writer.ndim == 2
 48 |     assert writer.nattr == 3
 49 | 
 50 | 
 51 | def test_fragment_writer_remove_attr():
 52 |     dims = (
 53 |         SharedDim("dim1", (0, 100), np.uint32),
 54 |         SharedDim("dim2", (0, 100), np.uint32),
 55 |     )
 56 |     attr_names = ["attr1", "attr2", "attr3", "attr4"]
 57 |     writer = FragmentWriter.create_dense(dims, attr_names, ((0, 10), (0, 100)))
 58 |     assert writer.is_dense_region
 59 |     assert writer.nattr == 4
 60 |     writer.remove_attr("attr3")
 61 |     assert writer.nattr == 3
 62 | 
 63 | 
 64 | def test_fragment_writer_dense_1D_full(tmpdir):
 65 |     # Define data.
 66 |     attr_data = np.arange(-3, 5)
 67 | 
 68 |     # Create fragment writer.
 69 |     writer = FragmentWriter.create_dense(
 70 |         (SharedDim("dim1", (0, 7), np.uint32),), [], None
 71 |     )
 72 | 
 73 |     # Check fragment writer.
 74 |     assert writer.ndim == 1
 75 |     assert writer.nattr == 0
 76 | 
 77 |     # Add attribute and check update.
 78 |     writer.add_attr("attr1")
 79 |     assert writer.nattr == 1
 80 | 
 81 |     # Add attribute data.
 82 |     writer.set_attr_data("attr1", NumpyData(attr_data, metadata={"key": "value"}))
 83 | 
 84 |     # Create base array.
 85 |     uri = str(tmpdir.join("test_fragment_writer_dense,_1D_full"))
 86 |     schema = tiledb.ArraySchema(
 87 |         domain=tiledb.Domain(tiledb.Dim("dim1", domain=(0, 7), dtype=np.uint32)),
 88 |         attrs=[tiledb.Attr("attr1", dtype=np.int64)],
 89 |     )
 90 |     tiledb.Array.create(uri, schema)
 91 | 
 92 |     with tiledb.open(uri, "w") as array:
 93 |         writer.write(array)
 94 | 
 95 |     with tiledb.open(uri) as array:
 96 |         result = array[...]
 97 |         meta = dict(array.meta.items())
 98 | 
 99 |     assert_dict_arrays_equal(result, {"attr1": attr_data})
100 |     assert len(meta) == 1
101 |     assert meta["__tiledb_attr.attr1.key"] == "value"
102 | 
103 | 
104 | def test_fragment_writer_sparse_row_major_1D_full(tmpdir):
105 |     # Define data.
106 |     attr_data = np.arange(-3, 5, dtype=np.int64)
107 |     dim_data = np.arange(8, dtype=np.uint32)
108 | 
109 |     # Create fragment writer.
110 |     writer = FragmentWriter.create_sparse_row_major(
111 |         (SharedDim("dim1", (0, 7), np.uint32),), [], (8,)
112 |     )
113 | 
114 |     # Check fragment writer.
115 |     assert writer.ndim == 1
116 |     assert writer.nattr == 0
117 | 
118 |     # Add attribute and check update.
119 |     writer.add_attr("attr1")
120 |     assert writer.nattr == 1
121 | 
122 |     # Add attribute data and dimension data.
123 |     writer.set_attr_data("attr1", NumpyData(attr_data, metadata={"key1": "attr_value"}))
124 |     writer.set_dim_data("dim1", NumpyData(dim_data, metadata={"key2": "dim_value"}))
125 | 
126 |     # Create base array.
127 |     uri = str(tmpdir.join("test_fragment_writer_dense,_1D_full"))
128 |     schema = tiledb.ArraySchema(
129 |         domain=tiledb.Domain(tiledb.Dim("dim1", domain=(0, 7), dtype=np.uint32)),
130 |         attrs=[tiledb.Attr("attr1", dtype=np.int64)],
131 |         sparse=True,
132 |     )
133 |     tiledb.Array.create(uri, schema)
134 | 
135 |     with tiledb.open(uri, "w") as array:
136 |         writer.write(array)
137 | 
138 |     with tiledb.open(uri) as array:
139 |         result = array[...]
140 |         meta = dict(array.meta.items())
141 | 
142 |     assert_dict_arrays_equal(
143 |         result, OrderedDict([("attr1", attr_data), ("dim1", dim_data)]), False
144 |     )
145 |     assert len(meta) == 2
146 |     assert meta["__tiledb_attr.attr1.key1"] == "attr_value"
147 |     assert meta["__tiledb_dim.dim1.key2"] == "dim_value"
148 | 
149 | 
150 | def test_fragment_writer_set_attr_data_key_error():
151 |     dims = (
152 |         SharedDim("dim1", (0, 100), np.uint32),
153 |         SharedDim("dim2", (0, 100), np.uint32),
154 |     )
155 |     attr_names = ["attr1", "attr2"]
156 |     writer = FragmentWriter.create_dense(dims, attr_names, ((0, 10), (0, 0)))
157 |     with pytest.raises(KeyError):
158 |         writer.set_attr_data("attr3", NumpyData(np.arange(11)))
159 | 
160 | 
161 | def test_fragment_writer_set_attr_data_size_value_error():
162 |     dims = (
163 |         SharedDim("dim1", (0, 100), np.uint32),
164 |         SharedDim("dim2", (0, 100), np.uint32),
165 |     )
166 |     attr_names = ["attr1", "attr2", "attr3"]
167 |     writer = FragmentWriter.create_dense(dims, attr_names, ((0, 10), (0, 10)))
168 |     with pytest.raises(ValueError):
169 |         writer.set_attr_data("attr3", NumpyData(np.arange(11)))
170 | 


--------------------------------------------------------------------------------
/examples/xarray_engine/tiledb-xarray-partially-filled-arrays.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# TileDB Backend for xarray (Partially Filled Arrays)\n",
  8 |     "\n",
  9 |     "## About this Example\n",
 10 |     "\n",
 11 |     "### What it shows\n",
 12 |     "\n",
 13 |     "This example shows some of the basic usage for opening a TileDB array in xarray using the TileDB backend when the TileDB array is not fully filled.\n",
 14 |     "\n",
 15 |     "There are two possible values the TileDB-xarray backend can use for a dimension size:\n",
 16 |     "\n",
 17 |     "1. (default) The size of the current non-empty domain when the dataset is first loaded.\n",
 18 |     "2. The size of the full domain of the dimension.\n",
 19 |     "\n",
 20 |     "The default behavior of TileDB is to take the maximum value of all non-empty domains. If you have dimensions with mis-matched domain,\n",
 21 |     "the dimension will never return a size larger than the smallest domain.\n",
 22 |     "\n",
 23 |     "### Set-up Requirements\n",
 24 |     "This example requires `tiledb-cf` to be installed and uses the `tiledb`, `xarray`, and `numpy` libraries. "
 25 |    ]
 26 |   },
 27 |   {
 28 |    "cell_type": "code",
 29 |    "execution_count": null,
 30 |    "metadata": {},
 31 |    "outputs": [],
 32 |    "source": [
 33 |     "import tiledb\n",
 34 |     "import xarray as xr\n",
 35 |     "import numpy as np"
 36 |    ]
 37 |   },
 38 |   {
 39 |    "cell_type": "code",
 40 |    "execution_count": null,
 41 |    "metadata": {},
 42 |    "outputs": [],
 43 |    "source": [
 44 |     "# Set names for the output generated by the example.\n",
 45 |     "output_dir = \"output/tiledb-xarray-partially-filled\"\n",
 46 |     "array_uri = f\"{output_dir}/example1\"\n",
 47 |     "group1_uri = f\"{output_dir}/group1\"\n",
 48 |     "group2_uri = f\"{output_dir}/group2\""
 49 |    ]
 50 |   },
 51 |   {
 52 |    "cell_type": "code",
 53 |    "execution_count": null,
 54 |    "metadata": {},
 55 |    "outputs": [],
 56 |    "source": [
 57 |     "# Reset output folder\n",
 58 |     "import os\n",
 59 |     "import shutil\n",
 60 |     "\n",
 61 |     "shutil.rmtree(output_dir, ignore_errors=True)\n",
 62 |     "os.mkdir(output_dir)"
 63 |    ]
 64 |   },
 65 |   {
 66 |    "cell_type": "markdown",
 67 |    "metadata": {},
 68 |    "source": [
 69 |     "## Example 1: Simple partially-filled 2D array"
 70 |    ]
 71 |   },
 72 |   {
 73 |    "cell_type": "code",
 74 |    "execution_count": null,
 75 |    "metadata": {},
 76 |    "outputs": [],
 77 |    "source": [
 78 |     "# Create array and write data.\n",
 79 |     "tiledb.Array.create(\n",
 80 |     "    array_uri,\n",
 81 |     "    tiledb.ArraySchema(\n",
 82 |     "        domain=tiledb.Domain(\n",
 83 |     "            tiledb.Dim(\"x\", domain=(0, 7), dtype=np.uint64),\n",
 84 |     "            tiledb.Dim(\"y\", domain=(0, 7), dtype=np.uint64),\n",
 85 |     "        ),\n",
 86 |     "        attrs=[tiledb.Attr(\"z\", np.float64)],\n",
 87 |     "    ),\n",
 88 |     ")\n",
 89 |     "with tiledb.open(array_uri, mode=\"w\") as array:\n",
 90 |     "    array[0:4, 0:4] = np.reshape(np.arange(16), (4, 4))"
 91 |    ]
 92 |   },
 93 |   {
 94 |    "cell_type": "code",
 95 |    "execution_count": null,
 96 |    "metadata": {},
 97 |    "outputs": [],
 98 |    "source": [
 99 |     "# Print non-empty domain and data.\n",
100 |     "with tiledb.open(array_uri) as array:\n",
101 |     "    print(f\"Non-empty domain: {array.nonempty_domain()}\")\n",
102 |     "    print(f\"Data in non-empty domain:\\n {array.multi_index[:, :]['z']}\")\n",
103 |     "    print(f\"All data: \\n {array[:, :]['z']}\")"
104 |    ]
105 |   },
106 |   {
107 |    "cell_type": "code",
108 |    "execution_count": null,
109 |    "metadata": {},
110 |    "outputs": [],
111 |    "source": [
112 |     "# By default, xarray will only open the non-empty domain\n",
113 |     "xr.open_dataset(array_uri, engine=\"tiledb\")"
114 |    ]
115 |   },
116 |   {
117 |    "cell_type": "markdown",
118 |    "metadata": {},
119 |    "source": [
120 |     "## Example 2: Fixed dimensions\n",
121 |     "We can create a group that always reads some or all of the dimensions as full dimensions."
122 |    ]
123 |   },
124 |   {
125 |    "cell_type": "code",
126 |    "execution_count": null,
127 |    "metadata": {},
128 |    "outputs": [],
129 |    "source": [
130 |     "# Set `x` to be a fixed-size dimension.\n",
131 |     "tiledb.Group.create(group1_uri)\n",
132 |     "with tiledb.Group(group1_uri, mode=\"w\") as group:\n",
133 |     "    group.add(uri=array_uri, name=\"z\")\n",
134 |     "    group.meta[\"__tiledb_array_fixed_dimensions.z\"] = \"x\""
135 |    ]
136 |   },
137 |   {
138 |    "cell_type": "code",
139 |    "execution_count": null,
140 |    "metadata": {},
141 |    "outputs": [],
142 |    "source": [
143 |     "xr.open_dataset(group1_uri, engine=\"tiledb\")"
144 |    ]
145 |   },
146 |   {
147 |    "cell_type": "code",
148 |    "execution_count": null,
149 |    "metadata": {},
150 |    "outputs": [],
151 |    "source": [
152 |     "# Set `y` to be a fixed-size dimension.\n",
153 |     "tiledb.Group.create(group2_uri)\n",
154 |     "with tiledb.Group(group2_uri, mode=\"w\") as group:\n",
155 |     "    group.add(uri=array_uri, name=\"z\")\n",
156 |     "    group.meta[\"__tiledb_array_fixed_dimensions.z\"] = \"x;y\""
157 |    ]
158 |   },
159 |   {
160 |    "cell_type": "code",
161 |    "execution_count": null,
162 |    "metadata": {},
163 |    "outputs": [],
164 |    "source": [
165 |     "xr.open_dataset(group2_uri, engine=\"tiledb\")"
166 |    ]
167 |   }
168 |  ],
169 |  "metadata": {
170 |   "interpreter": {
171 |    "hash": "022b808d35d9188bc114e3dbdd31978ae285e77fefec36d9e39c13a87da8d5e5"
172 |   },
173 |   "kernelspec": {
174 |    "display_name": "Python 3.9.0 64-bit ('cf-3.9.0': pyenv)",
175 |    "language": "python",
176 |    "name": "python3"
177 |   },
178 |   "language_info": {
179 |    "codemirror_mode": {
180 |     "name": "ipython",
181 |     "version": 3
182 |    },
183 |    "file_extension": ".py",
184 |    "mimetype": "text/x-python",
185 |    "name": "python",
186 |    "nbconvert_exporter": "python",
187 |    "pygments_lexer": "ipython3",
188 |    "version": "3.11.3"
189 |   },
190 |   "orig_nbformat": 4
191 |  },
192 |  "nbformat": 4,
193 |  "nbformat_minor": 2
194 | }
195 | 


--------------------------------------------------------------------------------
/tiledb/cf/core/_metadata.py:
--------------------------------------------------------------------------------
  1 | """Classes for additional group and metadata support useful for the TileDB-CF data
  2 | model."""
  3 | 
  4 | from __future__ import annotations
  5 | 
  6 | from collections.abc import MutableMapping
  7 | from typing import Any, Iterator, Optional, TypeVar, Union
  8 | 
  9 | import tiledb
 10 | 
 11 | DType = TypeVar("DType", covariant=True)
 12 | ATTR_METADATA_FLAG = "__tiledb_attr."
 13 | DIM_METADATA_FLAG = "__tiledb_dim."
 14 | 
 15 | 
 16 | class Metadata(MutableMapping):
 17 |     """Class for accessing Metadata using the standard MutableMapping API.
 18 | 
 19 |     Parameters
 20 |     ----------
 21 |     metadata
 22 |         TileDB array metadata object.
 23 |     """
 24 | 
 25 |     def __init__(self, metadata: tiledb.Metadata):
 26 |         self._metadata = metadata
 27 | 
 28 |     def __iter__(self) -> Iterator[str]:
 29 |         """Iterates over all metadata keys."""
 30 |         for tiledb_key in self._metadata.keys():
 31 |             key = self._from_tiledb_key(tiledb_key)
 32 |             if key is not None:
 33 |                 yield key
 34 | 
 35 |     def __len__(self) -> int:
 36 |         """Returns the number of metadata items."""
 37 |         return sum(1 for _ in self)
 38 | 
 39 |     def __getitem__(self, key: str) -> Any:
 40 |         """Implementation of [key] -> val (dict item retrieval).
 41 | 
 42 |         Parameters
 43 |         ----------
 44 |         key
 45 |             Key to find value from.
 46 | 
 47 |         Returns
 48 |         -------
 49 |         Any
 50 |             Value stored with provided key.
 51 |         """
 52 |         return self._metadata[self._to_tiledb_key(key)]
 53 | 
 54 |     def __setitem__(self, key: str, value: Any):
 55 |         """Implementation of [key] <- val (dict item assignment).
 56 | 
 57 |         Paremeters
 58 |         ----------
 59 |         key
 60 |             Key to set
 61 |         value
 62 |             Corresponding value
 63 |         """
 64 |         self._metadata[self._to_tiledb_key(key)] = value
 65 | 
 66 |     def __delitem__(self, key):
 67 |         """Implementation of del [key] (dict item deletion).
 68 | 
 69 |         Parameters
 70 |         ----------
 71 |         key
 72 |             Key to remove.
 73 |         """
 74 |         del self._metadata[self._to_tiledb_key(key)]
 75 | 
 76 |     def _to_tiledb_key(self, key: str) -> str:
 77 |         """Map an external user metadata key to an internal tiledb key."""
 78 |         return key  # pragma: no cover
 79 | 
 80 |     def _from_tiledb_key(self, tiledb_key: str) -> Optional[str]:
 81 |         """Map an internal tiledb key to an external user metadata key.
 82 | 
 83 |         Parameters
 84 |         ----------
 85 |         tiledb_key
 86 |             Internal key to use for metadata.
 87 | 
 88 |         Returns
 89 |         -------
 90 |         Optional[str]
 91 |             The external user metadata key corresponding to `tiledb_key`,
 92 |             or None if there is no such corresponding key.
 93 |         """
 94 |         return tiledb_key  # pragma: no cover
 95 | 
 96 | 
 97 | class ArrayMetadata(Metadata):
 98 |     """Class for accessing array-related metadata from a TileDB metadata object.
 99 | 
100 |     This class provides a way for accessing the TileDB array metadata that excludes
101 |     attribute and dimension specific metadata.
102 |     """
103 | 
104 |     def _to_tiledb_key(self, key: str) -> str:
105 |         if key.startswith(ATTR_METADATA_FLAG):
106 |             raise KeyError("Key is reserved for attribute metadata.")
107 |         if key.startswith(DIM_METADATA_FLAG):
108 |             raise KeyError("Key is reserved for dimension metadata.")
109 |         return key
110 | 
111 |     def _from_tiledb_key(self, tiledb_key: str) -> Optional[str]:
112 |         if not (
113 |             tiledb_key.startswith(ATTR_METADATA_FLAG)
114 |             or tiledb_key.startswith(DIM_METADATA_FLAG)
115 |         ):
116 |             return tiledb_key
117 |         return None
118 | 
119 | 
120 | class AttrMetadata(Metadata):
121 |     """Metadata wrapper for accessing attribute metadata.
122 | 
123 |     This class allows access to the metadata for an attribute stored in the metadata
124 |     for a TileDB array.
125 | 
126 |     Parameters
127 |     ----------
128 |     metadata
129 |         TileDB array metadata for the array containing the desired attribute.
130 |     attr
131 |         Name or index of the arrary attribute being requested.
132 |     """
133 | 
134 |     def __init__(self, metadata: tiledb.Metadata, attr: Union[str, int]):
135 |         super().__init__(metadata)
136 |         try:
137 |             attr_name = metadata.array.attr(attr).name
138 |         except tiledb.TileDBError as err:
139 |             raise KeyError(f"Attribute `{attr}` not found in array.") from err
140 |         self._key_prefix = ATTR_METADATA_FLAG + attr_name + "."
141 | 
142 |     def _to_tiledb_key(self, key: str) -> str:
143 |         return self._key_prefix + key
144 | 
145 |     def _from_tiledb_key(self, tiledb_key: str) -> Optional[str]:
146 |         if tiledb_key.startswith(self._key_prefix):
147 |             return tiledb_key[len(self._key_prefix) :]
148 |         return None
149 | 
150 | 
151 | class DimMetadata(Metadata):
152 |     """Metadata wrapper for accessing dimension metadata.
153 | 
154 |     This class allows access to the metadata for a dimension stored in the metadata
155 |     for a TileDB array.
156 | 
157 |     Parameters
158 |     ----------
159 |     metadata
160 |         TileDB array metadata for the array containing the desired attribute.
161 |     dim
162 |         Name or index of the arrary attribute being requested.
163 |     """
164 | 
165 |     def __init__(self, metadata: tiledb.Metadata, dim: Union[str, int]):
166 |         super().__init__(metadata)
167 |         try:
168 |             dim_name = metadata.array.dim(dim).name
169 |         except tiledb.TileDBError as err:
170 |             raise KeyError(f"Dimension `{dim}` not found in array.") from err
171 |         self._key_prefix = DIM_METADATA_FLAG + dim_name + "."
172 | 
173 |     def _to_tiledb_key(self, key: str) -> str:
174 |         return self._key_prefix + key
175 | 
176 |     def _from_tiledb_key(self, tiledb_key: str) -> Optional[str]:
177 |         if tiledb_key.startswith(self._key_prefix):
178 |             return tiledb_key[len(self._key_prefix) :]
179 |         return None
180 | 


--------------------------------------------------------------------------------
/documentation/xarray-engine.md:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: TileDB-CF Xarray Engine
  3 | ---
  4 | 
  5 | 
  6 | ## Reading from TileDB with Xarray
  7 | 
  8 | Xarray uses a plugin infrastructure that allows third-party developers to create their own backend engines for reading data into xarray. TileDB-CF contains one such backend. To use the backend, make sure `tiledb-cf` is installed in your current Python environment, and use the `tiledb` engine:
  9 | 
 10 | 
 11 | ```python
 12 | import xarray as xr
 13 | 
 14 | xr.open_dataset(tiledb_uri, engine="tiledb")
 15 | ```
 16 | 
 17 | The TileDB engine can be used to open either a TileDB array or a TileDB group. See the requirements on the arrays below.
 18 | 
 19 | The backend engine will open the group or array as a dataset with TileDB dimensions mapping to dataset dimensions, TileDB attributes mapping to dataset variables/DataArrays, and TileDB metadata mapping to dataset attributes.
 20 | 
 21 | 
 22 | For a TileDB array to be readable by xarray, the following must be satisfied:
 23 | 
 24 | * The array must be dense.
 25 | * All dimensions on the array must be either signed or unsigned integers.
 26 | * Add dimensions must have a domain that starts at `0`.
 27 | 
 28 | For a TileDB group to be readable by xarray, the following must be satisfied:
 29 | 
 30 | * All arrays in the group satisfy the above requirements for the array to be readable.
 31 | * Each attribute has a unique "variable name".
 32 | 
 33 | The TileDB backend engine can be used with the standard xarray keyword arguments. It supports the additional TileDB-specific arguments:
 34 | 
 35 | * `config`: An optional TileDB configuration object to use in arrays and groups.
 36 | * `ctx`: An optional TileDB context object to use for all TileDB operations.
 37 | * `timestamp`: An optional timestamp to open the TileDB array at (not supported on groups).
 38 | 
 39 | 
 40 | ## Writing from Xarray to TileDB
 41 | 
 42 | The xarray writer is stricter than the xarray backend engine (reader). While the reader will attempt to open arrays with multiple attributes, the xarray writer only creates arrays with one attribute per name.
 43 | 
 44 | There are two sets of functions for writing to xarray:
 45 | 
 46 | 1. Single dataset ingestion.
 47 | 
 48 |     * Functions used: `from_xarray`
 49 |     * Useful when copying an entire xarray dataset to a TileDB group in a single function call.
 50 |     * Creates the group and copies all data and metadata to the new group in a single function call.
 51 | 
 52 | 2. Multi-dataset ingestion.
 53 | 
 54 |     * Main functions: `create_group_from_xarray` and `copy_data_from_xarray`.
 55 |     * Additional helper function: `copy_metadata_from_xarray`.
 56 |     * Useful when copying multiple xarray datasets to a single TileDB group.
 57 |     * Creates the group and copies data to the group in separate API calls.
 58 | 
 59 | The xarray to TileDB writer will copy the dataset in the following way:
 60 | 
 61 | * One group is created for the dataset.
 62 | * Dataset "attributes" are copied to group level metadata.
 63 | * Each xarray variable is copied to its own dense TileDB array with a single TileDB attribute.
 64 | 
 65 | The array schema for an xarray variable is generated as follows:
 66 | 
 67 | * TileDB array properties:
 68 | 
 69 |   - The TileDB array is dense.
 70 | 
 71 | * TileDB Domain:
 72 | 
 73 |   - All dimensions have the same datatype determined by the `dim_dtype` encoding.
 74 | 
 75 |   - The dimension names in the TileDB array match the dimension names in the xarray variable.
 76 | 
 77 |   - The dimension tiles are determined by the `tiles` encoding.
 78 | 
 79 |   - The domain of each dimension is set to `[0, max_size - 1]` where `max_size` is computed as follows:
 80 | 
 81 |     1. Use the corresponding element of the  `max_shape` encoding if provided.
 82 | 
 83 |     2. If the `max_shape` encoding is not provided and the xarray dimension is "unlimited", use the largest possible size for this integer type.
 84 | 
 85 |     3. If the `max_shape` encoding is not provided and the xarray dimension is not "unlimited", use the size of the xarray dimension.
 86 | 
 87 | * TileDB Attribute:
 88 | 
 89 |   - The attribute datatype is the same as the variable datatype (after applying xarray encodings).
 90 | 
 91 |   - The attribute name is set using the following:
 92 | 
 93 |     1. Use the name provided by `attr_name` encoding.
 94 | 
 95 |     2. If the `attr_name` encoding is not provided and there is no dimension on this variable with the same name as the variable, use the name of the variable.
 96 | 
 97 |     3. If the `attr_name` encoding is provided and there is a dimension on this variable with the same name as the variable, use the variable name appended with `_`.
 98 | 
 99 |   - The attribute filters are determined by the `filters` encoding.
100 | 
101 | 
102 | 
103 | ### TileDB Encoding
104 | 
105 | The writer takes a dictionary from dataset variable names to a dictionary of encodings for setting TileDB properties. The possible encoding keywords are provided in the table below.
106 | 
107 | +------------------+-----------------------------------------------+--------------------+
108 | | Encoding Keyword | Details                                       | Type               |
109 | +==================+===============================================+====================+
110 | | `attr_name`      | Name to use for the TileDB attribute.         | str                |
111 | +------------------+-----------------------------------------------+--------------------+
112 | | `filters`        | Filter list to apply to the TileDB attribute. | tiledb.FilterList  |
113 | +------------------+-----------------------------------------------+--------------------+
114 | | `tiles`          | Tile sizes to apply to the TileDB dimensions. | tuple of ints      |
115 | +------------------+-----------------------------------------------+--------------------+
116 | | `max_shape`      | Maximum possible size of the TileDB array.    | tuple of ints      |
117 | +------------------+-----------------------------------------------+--------------------+
118 | | `dim_dtype`      | Datatype to use for the TileDB dimensions.    | str or numpy.dtype |
119 | +------------------+-----------------------------------------------+--------------------+
120 | 
121 | 
122 | ### Region to Write
123 | 
124 | If the creating TileDB array's with either unlimited dimensions or with encoded `max_shape` larger than the current size of the xarray variable, then the region to write the data to needs to be provided. This is input as a dictionary from dimension names to slices. The slice uses xarray/numpy conventions and will write to a region that does **not** include the upper bound of the slice.
125 | 
126 | 
127 | ### Creating Multiple Fragments
128 | 
129 | When copying data with either the `from_xarray` or `copy_data_from_xarray` functions, the copy routine will use Xarray chunks for separate writes - creating multiple fragments.
130 | 


--------------------------------------------------------------------------------
/tests/netcdf_engine/test_convert_multifragments.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pytest
  3 | 
  4 | import tiledb
  5 | from tiledb.cf import NetCDF4ConverterEngine, open_group_array
  6 | 
  7 | netCDF4 = pytest.importorskip("netCDF4")
  8 | 
  9 | 
 10 | class TestSimplyCopyChunks:
 11 |     """Test converting a simple NetCDF in chunks.
 12 | 
 13 |     NetCDF File:
 14 | 
 15 |     dimensions:
 16 |         x (8)
 17 |         y (8)
 18 |         z (8)
 19 | 
 20 |     variables:
 21 |         f (x, y, z) = reshape([0, ..., 511], (8, 8, 8))
 22 |     """
 23 | 
 24 |     attr_data = np.reshape(np.arange(512), (8, 8, 8))
 25 | 
 26 |     @pytest.fixture(scope="class")
 27 |     def netcdf_file(self, tmpdir_factory):
 28 |         """Returns the NetCDF file that will be used to test the conversion."""
 29 |         filepath = tmpdir_factory.mktemp("input_file").join("simple_copy_chunks.nc")
 30 |         with netCDF4.Dataset(filepath, mode="w") as dataset:
 31 |             dataset.createDimension("x", 8)
 32 |             dataset.createDimension("y", 8)
 33 |             dataset.createDimension("z", 8)
 34 |             var = dataset.createVariable(
 35 |                 varname="f", datatype=np.int64, dimensions=("x", "y", "z")
 36 |             )
 37 |             var[:, :, :] = self.attr_data
 38 |         return filepath
 39 | 
 40 |     @pytest.mark.parametrize(
 41 |         "sparse,expected_result", ((False, attr_data), (True, np.arange(512)))
 42 |     )
 43 |     def test_convert_chunks(self, netcdf_file, tmpdir, sparse, expected_result):
 44 |         """Test copying NetCDF file in chunks for a simple NetCDF file."""
 45 |         uri = str(tmpdir.mkdir("output").join("simple_copy_chunks"))
 46 |         converter = NetCDF4ConverterEngine.from_file(netcdf_file)
 47 |         array_creator = converter.get_array_creator_by_attr("f")
 48 |         array_creator.sparse = sparse
 49 |         assert array_creator.domain_creator.max_fragment_shape == (None, None, None)
 50 |         array_creator.domain_creator.max_fragment_shape = (4, 8, 2)
 51 |         assert array_creator.domain_creator.max_fragment_shape == (4, 8, 2)
 52 |         converter.convert_to_group(uri)
 53 |         with tiledb.Group(uri) as group:
 54 |             with open_group_array(group, attr="f") as array:
 55 |                 array_uri = array.uri
 56 |                 result = array[...]
 57 |         result = result["f"] if isinstance(result, dict) else result
 58 |         np.testing.assert_equal(result, expected_result)
 59 |         fragment_info = tiledb.FragmentInfoList(array_uri)
 60 |         assert len(fragment_info) == 8
 61 | 
 62 |     @pytest.mark.parametrize(
 63 |         "sparse,expected_result",
 64 |         ((False, np.reshape(np.arange(512), (8, 8, 8))), (True, np.arange(512))),
 65 |     )
 66 |     def test_convert_chunks_with_injected(
 67 |         self, netcdf_file, tmpdir, sparse, expected_result
 68 |     ):
 69 |         """Test copying NetCDF file in chunks for a simple NetCDF file with externally
 70 |         provided dimension and attribute values."""
 71 |         uri = str(tmpdir.mkdir("output").join("simple_copy_chunks"))
 72 |         converter = NetCDF4ConverterEngine.from_file(netcdf_file)
 73 |         converter.add_shared_dim("t", domain=(0, 3), dtype=np.uint64)
 74 |         array_creator = converter.get_array_creator_by_attr("f")
 75 |         array_creator.sparse = sparse
 76 |         array_creator.add_attr_creator(name="g", dtype=np.float64)
 77 |         array_creator.domain_creator.inject_dim_creator("t", 0)
 78 |         array_creator.domain_creator.max_fragment_shape = (1, 4, 8, 2)
 79 |         # Define data for extra variable
 80 |         g_data = np.reshape(np.random.random_sample((512)), (1, 8, 8, 8))
 81 |         converter.convert_to_group(
 82 |             uri,
 83 |             assigned_dim_values={"t": 0},
 84 |             assigned_attr_values={"g": g_data},
 85 |         )
 86 |         with tiledb.Group(uri) as group:
 87 |             with open_group_array(group, array="array0") as array:
 88 |                 array_uri = array.uri
 89 |                 result = array[0, :, :, :]
 90 |         f_result = result["f"]
 91 |         np.testing.assert_equal(f_result, expected_result)
 92 |         g_result = np.reshape(result["g"], (1, 8, 8, 8))
 93 |         np.testing.assert_equal(g_data, g_result)
 94 |         fragment_info = tiledb.FragmentInfoList(array_uri)
 95 |         assert len(fragment_info) == 8
 96 | 
 97 | 
 98 | class TestCoordinateCopyChunks:
 99 |     """Test converting a simple NetCDF in chunks.
100 | 
101 |     NetCDF File:
102 | 
103 |     dimensions:
104 |         x (8)
105 |         y (8)
106 | 
107 |     variables:
108 |         x (x) = linspace(-1, 1, 8)
109 |         y (y) = linspace(0, 2, 8)
110 |         f (x, y) = [[0, 1, ...],...,[...,62,63]]
111 |     """
112 | 
113 |     x_data = np.arange(-4, 4)
114 |     y_data = np.arange(10, 81, 10)
115 |     attr_data = np.reshape(np.arange(64), (8, 8))
116 | 
117 |     @pytest.fixture(scope="class")
118 |     def netcdf_file(self, tmpdir_factory):
119 |         """Returns the NetCDF file that will be used to test the conversion."""
120 |         filepath = tmpdir_factory.mktemp("input_file").join("simple_copy_chunks.nc")
121 |         with netCDF4.Dataset(filepath, mode="w") as dataset:
122 |             dataset.createDimension("x", 8)
123 |             dataset.createDimension("y", 8)
124 |             var = dataset.createVariable(
125 |                 varname="f", datatype=np.int64, dimensions=("x", "y")
126 |             )
127 |             var[:, :] = self.attr_data
128 |             var = dataset.createVariable(
129 |                 varname="x", datatype=np.int64, dimensions=("x")
130 |             )
131 |             var[:] = self.x_data
132 |             var = dataset.createVariable(
133 |                 varname="y", datatype=np.int64, dimensions=("y")
134 |             )
135 |             var[:] = self.y_data
136 |         return filepath
137 | 
138 |     def test_convert_chunks(self, netcdf_file, tmpdir):
139 |         """Test copying NetCDF file in chunks for a NetCDF to TileDB conversion that
140 |         maps NetCDF coordinates to dimensions."""
141 |         uri = str(tmpdir.mkdir("output").join("simple_copy_chunks"))
142 |         converter = NetCDF4ConverterEngine.from_file(netcdf_file, coords_to_dims=True)
143 |         converter.get_shared_dim("x").domain = (-4, 3)
144 |         converter.get_shared_dim("y").domain = (10, 80)
145 |         array_creator = converter.get_array_creator_by_attr("f")
146 |         array_creator.domain_creator.max_fragment_shape = (4, 4)
147 |         converter.convert_to_group(uri)
148 |         with tiledb.Group(uri) as group:
149 |             with open_group_array(group, attr="f") as array:
150 |                 array_uri = array.uri
151 |                 result = array[...]
152 |         for x_value, y_value, f_value in zip(result["x"], result["y"], result["f"]):
153 |             ix = np.argwhere(self.x_data == x_value)
154 |             assert len(ix) == 1
155 |             assert 0 <= ix[0] <= 7
156 |             iy = np.argwhere(self.y_data == y_value)
157 |             assert len(iy) == 1
158 |             assert 0 <= iy[0] <= 7
159 |             f_expected = self.attr_data[ix[0], iy[0]]
160 |             assert f_value == f_expected
161 |         fragment_info = tiledb.FragmentInfoList(array_uri)
162 |         assert len(fragment_info) == 4
163 | 


--------------------------------------------------------------------------------
/documentation/contributing.md:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: Contributing to TileDB-CF-Py
  3 | ---
  4 | 
  5 | Thank you for your interest in contributing to TileDB-CF-Py. The following notes are intended to help you file issues, bug reports, or contribute code to this open source project.
  6 | 
  7 | ## Contributing Checklist
  8 | 
  9 | * Reporting a bug?  Please read [how to file a bug report](#reporting-a-bug) section to make sure sufficient information is included.
 10 | 
 11 | * Contributing code? You rock! Be sure to [review the contributor section](#contributing-code) for helpful tips on the tools we use to build this project, format code, and issue pull requests (PR)'s.
 12 | 
 13 | Note: All participants in TileDB spaces are expected to adhere to a high standard of profectionalism in all interactions. See the [code of conduct](code-of-conduct.md) for more information.
 14 | 
 15 | ## Reporting a Bug
 16 | 
 17 | A useful bug report filed as a GitHub issue provides information about how to reproduce the error.
 18 | 
 19 | 1. Before opening a new [GitHub issue](https://github.com/TileDB-Inc/TileDB-CF-Py/issues) try searching the existing issues to see if someone else has already noticed the same problem.
 20 | 
 21 | 2. When filing a bug report, provide where possible:
 22 | 
 23 |     * The version of TileDB-CF-Py or if a `dev` version, the specific commit that triggers the error.
 24 |     * The full error message, including the backtrace (if possible).
 25 |     * A minimal working example, i.e. the smallest chunk of code that triggers the error. Ideally, this should be code that can be a small reduced python file. If the code to reproduce is somewhat long, consider putting it in a [gist](https://gist.github.com).
 26 | 
 27 | 3. When pasting code blocks or output, put triple backquotes (\`\`\`) around the text so GitHub will format it nicely. Code statements should be surrounded by single backquotes (\`). See [GitHub's guide on Markdown](https://guides.github.com/features/mastering-markdown) for more formatting tricks.
 28 | 
 29 | ## Contributing Code
 30 | 
 31 | *By contributing code to TileDB-CF-Py, you are agreeing to release it under the [MIT License](https://github.com/TileDB-Inc/TileDB/tree/dev/LICENSE).*
 32 | 
 33 | ### Quickstart Workflow
 34 | 
 35 | [From a fork of TileDB-CF-Py](https://help.github.com/articles/fork-a-repo/)
 36 | 
 37 | ```bash
 38 | git clone https://github.com/username/TileDB-CF-Py
 39 | pip install -e '.[parallel]'
 40 | git checkout -b <my_initials>/<my_bugfix/feature_branch>
 41 | # ... code changes ...
 42 | ./tools/lint.sh # run linters
 43 | git commit -a -m "descriptive commit message"
 44 | git push --set-upstream origin <my_initials>/<my_bugfix_branch>
 45 | ```
 46 | 
 47 | [Issue a PR from your updated TileDB-CF-Py fork](https://help.github.com/articles/creating-a-pull-request-from-a-fork/)
 48 | 
 49 | Branch conventions:
 50 | 
 51 | * `dev` is the development branch of TileDB-CF-Py, all PR's are merged into `dev`.
 52 | * `release-x.y.z` are major / bugfix release branches.
 53 | 
 54 | ### Building Locally for Development
 55 | 
 56 | This project uses setuptools for its build system, and can be built locally using pip. It is recommended you set-up a Python virtual environment with your preferred method before installing. Once the virtual environment is activated, install `tiledb.cf` as 'editable' using pip:
 57 | 
 58 | ```bash
 59 | pip install -e .
 60 | ```
 61 | 
 62 | The following tools are used for testing, linting, and formatting. You may want to install them either in the local virtual environment or as command line tools for you system:
 63 | 
 64 | * black
 65 | * flake8
 66 | * mypy
 67 | * pytest (with pytest-cov)
 68 | 
 69 | 
 70 | ### Formatting, Style, and Linting
 71 | 
 72 | * 4 spaces per indentation level not tabs
 73 | * class names use `CamelCase`
 74 | * member functions, variables use `snake_case`
 75 | * private module or class member use a leading underscore `_local_variable`
 76 | * comments are good, the project uses Google-style docstrings with type hints
 77 | * format code using [black](https://pypi.org/project/black/) and [isort](https://pypi.org/project/isort/)
 78 | * lint code using [flake8](https://pypi.org/project/flake8/) and [mypy](https://pypi.org/project/mypy/)
 79 | 
 80 | It is highly recommended to run formatting and linting tools before every commit. This can be automated by activating the pre-commit hook `tools/hooks/pre-commit.sh`. To do this symlink or copy `tools/hooks/pre-commit.sh` to `.git/hooks/pre-commit` in the local directory. Note that the pre-commit hook may fail due to unstaged changes. You may wish to stash these changes before committing. This can be done as follows:
 81 | 
 82 | ```bash
 83 | git add <files-to-be-added>
 84 | git stash --keep-index
 85 | git commit
 86 | git stash pop
 87 | ```
 88 | 
 89 | ### Testing
 90 | 
 91 | The testing for this project uses pytest and GitHub workflows for testing. The test suite will be run on GitHub when you submit your pull request.
 92 | 
 93 | ### API Documentation
 94 | 
 95 | To build the API documentation do the following from this projects root directory:
 96 | 
 97 | 1. Install required packages:
 98 |    ```bash
 99 |    python3 -m pip install tiledb-cf[docs]
100 |    ```
101 | 2. Make the HTML document:
102 |    ```bash
103 |    make -C docs/ html
104 |    ```
105 | 3. Open [docs/_build/html/index.html](./docs/_build/html/index.html) in a web browser of your choice.
106 | 
107 | 
108 | ### Pull Requests
109 | 
110 | * `dev` is the development branch, all PR’s should be rebased on top of the latest `dev` commit.
111 | 
112 | * Commit changes to a local branch.  The convention is to use your initials to identify branches.  Branch names should be identifiable and reflect the feature or bug that they want to address / fix. This helps in deleting old branches later.
113 | 
114 | * When ready to submit a PR, `git rebase` the branch on top of the latest `dev` commit.  Be sure to squash / cleanup the commit history so that the PR preferably one, or a couple commits at most.  All commits will be squashed into a single commit upon merging.
115 | 
116 | * Run the formatting (`isort`, `black`) and linting tools (`flake8`, `mypy`) before submitting a final PR. Make sure that your contribution generally follows the format and naming conventions used by surrounding code.
117 | 
118 | * Update the HISTORY with any changes/adds/removes to user-facing API or system behavior. Make sure to note any non-backward compatible changes as a breaking change.
119 | 
120 | * Submit a PR, writing a descriptive message.  If a PR closes an open issue, reference the issue in the PR message (e.g. If an issue closes issue number 10, you would write `closes #10`)
121 | 
122 | * Make sure CI (continuous integration) is passing for your PR.
123 | 
124 | ### Resources
125 | 
126 | * TileDB-CF-Py
127 |   * [Issues](https://github.com/TileDB-Inc/TileDB-CF-Py/issues)
128 |   * [Documentation](https://docs.tiledb.com/geospatial)
129 | 
130 | * TileDB
131 |   * [Homepage](https://tiledb.com)
132 |   * [Documentation](https://docs.tiledb.com/main/)
133 |   * [Forum](https://forum.tiledb.io/)
134 |   * [Organization](https://github.com/TileDB-Inc/)
135 | 
136 | * Github / Git
137 |   * [Git cheatsheet](https://services.github.com/on-demand/downloads/github-git-cheat-sheet/)
138 |   * [Github Documentation](https://help.github.com/)
139 |   * [Forking a Repo](https://help.github.com/articles/fork-a-repo/)
140 |   * [More Learning Resources](https://help.github.com/articles/git-and-github-learning-resources/)
141 | 


--------------------------------------------------------------------------------
/tests/core/test_write_array.py:
--------------------------------------------------------------------------------
  1 | from collections import OrderedDict
  2 | 
  3 | import numpy as np
  4 | 
  5 | import tiledb
  6 | from tiledb.cf.core._array_creator import ArrayCreator
  7 | from tiledb.cf.core._shared_dim import SharedDim
  8 | from tiledb.cf.testing import assert_dict_arrays_equal
  9 | 
 10 | 
 11 | def test_write_array_dense_1D_full(tmpdir):
 12 |     uri = str(tmpdir.mkdir("output").join("dense_1D_full"))
 13 |     attr_data = np.arange(-3, 5)
 14 | 
 15 |     creator = ArrayCreator(
 16 |         dim_order=("dim1",),
 17 |         shared_dims=[SharedDim("dim1", (0, 7), np.uint32)],
 18 |     )
 19 |     creator.add_dense_fragment_writer()
 20 |     creator.add_attr_creator("attr1", dtype=np.int64)
 21 |     creator["attr1"].set_writer_data(attr_data)
 22 | 
 23 |     creator.write(uri)
 24 | 
 25 |     with tiledb.open(uri) as array:
 26 |         result = array[...]
 27 | 
 28 |     assert_dict_arrays_equal(result, {"attr1": attr_data})
 29 | 
 30 | 
 31 | def test_write_array_sparse_1D_dense_region_full(tmpdir):
 32 |     uri = str(tmpdir.mkdir("output").join("sparse_1D_dense_full"))
 33 |     attr_data = np.arange(-3, 5)
 34 | 
 35 |     creator = ArrayCreator(
 36 |         dim_order=("dim1",),
 37 |         shared_dims=[SharedDim("dim1", (0, 7), np.uint32)],
 38 |         sparse=True,
 39 |     )
 40 |     creator.add_attr_creator("attr1", dtype=np.int64)
 41 |     creator.add_dense_fragment_writer()
 42 |     creator["attr1"].set_writer_data(attr_data)
 43 | 
 44 |     creator.write(uri)
 45 | 
 46 |     with tiledb.open(uri) as array:
 47 |         result = array[...]
 48 | 
 49 |     expected = OrderedDict()
 50 |     expected["dim1"] = np.arange(8, dtype=np.uint32)
 51 |     expected["attr1"] = attr_data
 52 |     assert_dict_arrays_equal(result, expected)
 53 | 
 54 | 
 55 | def test_write_array_sparse_1D_sparse_coo_region(tmpdir):
 56 |     uri = str(tmpdir.mkdir("output").join("sparse_1D_sparse_region"))
 57 |     dim_data = np.array([7, 1, 5, 3], dtype=np.uint32)
 58 |     attr_data = np.array([-3, 0, 100, -100], dtype=np.int64)
 59 | 
 60 |     creator = ArrayCreator(
 61 |         dim_order=("dim1",),
 62 |         shared_dims=[SharedDim("dim1", (0, 7), np.uint32)],
 63 |         sparse=True,
 64 |     )
 65 |     creator.add_attr_creator("attr1", dtype=np.int64)
 66 |     creator.add_sparse_fragment_writer(size=4)
 67 |     creator["attr1"].set_writer_data(attr_data)
 68 |     creator.domain_creator["dim1"].set_writer_data(dim_data)
 69 | 
 70 |     creator.write(uri)
 71 | 
 72 |     with tiledb.open(uri) as array:
 73 |         result = array.multi_index[:]
 74 | 
 75 |     expected = OrderedDict()
 76 |     expected["dim1"] = dim_data
 77 |     expected["attr1"] = attr_data
 78 |     assert_dict_arrays_equal(result, expected, False)
 79 | 
 80 | 
 81 | def test_write_array_sparse_1D_sparse_row_major_region(tmpdir):
 82 |     uri = str(tmpdir.mkdir("output").join("sparse_1D_sparse_row_major_region"))
 83 |     dim_data = np.array([7, 1, 5, 3], dtype=np.uint32)
 84 |     attr_data = np.array([-3, 0, 100, -100], dtype=np.int64)
 85 | 
 86 |     creator = ArrayCreator(
 87 |         dim_order=("dim1",),
 88 |         shared_dims=[SharedDim("dim1", (0, 7), np.uint32)],
 89 |         sparse=True,
 90 |     )
 91 |     creator.add_attr_creator("attr1", dtype=np.int64)
 92 |     creator.add_sparse_fragment_writer(shape=(4,), form="row-major")
 93 |     creator["attr1"].set_writer_data(attr_data)
 94 |     creator.domain_creator["dim1"].set_writer_data(dim_data)
 95 | 
 96 |     creator.write(uri)
 97 | 
 98 |     with tiledb.open(uri) as array:
 99 |         result = array.multi_index[:]
100 | 
101 |     expected = OrderedDict()
102 |     expected["dim1"] = dim_data
103 |     expected["attr1"] = attr_data
104 |     assert_dict_arrays_equal(result, expected, False)
105 | 
106 | 
107 | def test_write_array_dense_2D_full(tmpdir):
108 |     uri = str(tmpdir.mkdir("output").join("dense_2D_full"))
109 |     attr_data = np.resize(np.arange(-3, 28), (8, 4))
110 | 
111 |     creator = ArrayCreator(
112 |         dim_order=("dim1", "dim2"),
113 |         shared_dims=[
114 |             SharedDim("dim1", (0, 7), np.uint32),
115 |             SharedDim("dim2", (0, 3), np.uint32),
116 |         ],
117 |     )
118 |     creator.add_attr_creator("attr1", dtype=np.int64)
119 |     creator.add_dense_fragment_writer()
120 |     creator["attr1"].set_writer_data(attr_data)
121 | 
122 |     creator.write(uri)
123 | 
124 |     with tiledb.open(uri) as array:
125 |         result = array[...]
126 | 
127 |     assert_dict_arrays_equal(result, {"attr1": attr_data})
128 | 
129 | 
130 | def test_write_array_sparse_2D_dense_region_full(tmpdir):
131 |     uri = str(tmpdir.mkdir("output").join("sparse_2D_dense_full"))
132 |     attr_data = np.resize(np.arange(-3, 28), (8, 4))
133 | 
134 |     creator = ArrayCreator(
135 |         dim_order=("dim1", "dim2"),
136 |         shared_dims=[
137 |             SharedDim("dim1", (0, 7), np.uint32),
138 |             SharedDim("dim2", (0, 3), np.uint32),
139 |         ],
140 |         sparse=True,
141 |     )
142 |     creator.add_attr_creator("attr1", dtype=np.int64)
143 |     creator.add_dense_fragment_writer()
144 |     creator["attr1"].set_writer_data(attr_data)
145 | 
146 |     creator.write(uri)
147 | 
148 |     with tiledb.open(uri) as array:
149 |         result = array[...]
150 | 
151 |     expected = OrderedDict()
152 |     dim1_coords, dim2_coords = np.meshgrid(
153 |         np.arange(8, dtype=np.uint32), np.arange(4, dtype=np.uint32), indexing="ij"
154 |     )
155 |     expected["dim1"] = dim1_coords.reshape(-1)
156 |     expected["dim2"] = dim2_coords.reshape(-1)
157 |     expected["attr1"] = attr_data.reshape(-1)
158 |     assert_dict_arrays_equal(result, expected)
159 | 
160 | 
161 | def test_write_array_sparse_2D_sparse_coo_region(tmpdir):
162 |     uri = str(tmpdir.mkdir("output").join("sparse_1D_sparse_region"))
163 |     dim1_data = np.array([7, 1, 5, 3], dtype=np.uint32)
164 |     dim2_data = np.array([0, 1, 1, 0], dtype=np.uint32)
165 |     attr_data = np.array([-3, 0, 100, -100], dtype=np.int64)
166 | 
167 |     creator = ArrayCreator(
168 |         dim_order=("dim1", "dim2"),
169 |         shared_dims=[
170 |             SharedDim("dim1", (0, 7), np.uint32),
171 |             SharedDim("dim2", (0, 3), np.uint32),
172 |         ],
173 |         sparse=True,
174 |     )
175 |     creator.add_attr_creator("attr1", dtype=np.int64)
176 |     creator.add_sparse_fragment_writer(size=4)
177 |     creator["attr1"].set_writer_data(attr_data)
178 |     creator.domain_creator["dim1"].set_writer_data(dim1_data)
179 |     creator.domain_creator["dim2"].set_writer_data(dim2_data)
180 | 
181 |     creator.write(uri)
182 | 
183 |     with tiledb.open(uri) as array:
184 |         result = array.multi_index[:]
185 | 
186 |     expected = OrderedDict()
187 |     expected["dim1"] = dim1_data
188 |     expected["dim2"] = dim2_data
189 |     expected["attr1"] = attr_data
190 |     assert_dict_arrays_equal(result, expected, False)
191 | 
192 | 
193 | def test_write_array_sparse_2D_sparse_row_major_region(tmpdir):
194 |     uri = str(tmpdir.mkdir("output").join("sparse_1D_sparse_region"))
195 |     dim1_data = np.array([7, 1, 5], dtype=np.uint32)
196 |     dim2_data = np.array([0, 3, 1, 2], dtype=np.uint32)
197 |     attr_data = np.arange(-6, 6, dtype=np.int64)
198 | 
199 |     creator = ArrayCreator(
200 |         dim_order=("dim1", "dim2"),
201 |         shared_dims=[
202 |             SharedDim("dim1", (0, 7), np.uint32),
203 |             SharedDim("dim2", (0, 3), np.uint32),
204 |         ],
205 |         sparse=True,
206 |     )
207 |     creator.add_attr_creator("attr1", dtype=np.int64)
208 |     creator.add_sparse_fragment_writer(shape=(3, 4), form="row-major")
209 |     creator["attr1"].set_writer_data(attr_data)
210 |     creator.domain_creator["dim1"].set_writer_data(dim1_data)
211 |     creator.domain_creator["dim2"].set_writer_data(dim2_data)
212 | 
213 |     creator.write(uri)
214 | 
215 |     with tiledb.open(uri) as array:
216 |         result = array.multi_index[:]
217 | 
218 |     expected = OrderedDict()
219 |     expected["dim1"] = np.repeat(dim1_data, 4)
220 |     print(expected["dim1"])
221 |     expected["dim2"] = np.tile(dim2_data, 3)
222 |     expected["attr1"] = attr_data
223 |     assert_dict_arrays_equal(result, expected, False)
224 | 


--------------------------------------------------------------------------------
/tiledb/cf/xarray_engine/engine.py:
--------------------------------------------------------------------------------
  1 | """Module for xarray backend plugin using the TileDB-Xarray Convention.
  2 | 
  3 | Example:
  4 |   Open a TileDB group with the xarray engine::
  5 | 
  6 |     import xarray as xr
  7 |     dataset = xr.open_dataset(
  8 |         "dataset.tiledb",
  9 |         backend_kwargs={"Ctx": ctx},
 10 |         engine="tiledb"
 11 |     )
 12 | 
 13 | 
 14 | """
 15 | from __future__ import annotations
 16 | 
 17 | import os
 18 | import warnings
 19 | from typing import ClassVar, Iterable
 20 | 
 21 | from xarray.backends.common import BACKEND_ENTRYPOINTS, BackendEntrypoint
 22 | from xarray.backends.store import StoreBackendEntrypoint
 23 | from xarray.core.dataset import Dataset
 24 | from xarray.core.utils import close_on_error
 25 | 
 26 | import tiledb
 27 | 
 28 | from ._backend_store import TileDBXarrayStore
 29 | from ._deprecated_backend_store import TileDBDataStore
 30 | 
 31 | 
 32 | class TileDBXarrayBackendEntrypoint(BackendEntrypoint):
 33 |     """TileDB backend for xarray."""
 34 | 
 35 |     open_dataset_parameters: ClassVar[tuple | None] = [
 36 |         "filename_or_obj",
 37 |         "config",
 38 |         "ctx",
 39 |         "timestamp",
 40 |     ]
 41 |     description: ClassVar[
 42 |         str
 43 |     ] = "TileDB backend for xarray for opening TileDB arrays and groups"
 44 |     url: ClassVar[str] = "https://github.com/TileDB-Inc/TileDB-CF-Py"
 45 | 
 46 |     def open_dataset(
 47 |         self,
 48 |         filename_or_obj,
 49 |         *,
 50 |         config=None,
 51 |         ctx=None,
 52 |         timestamp=None,
 53 |         use_deprecated_engine=None,
 54 |         key=None,
 55 |         encode_fill=None,
 56 |         coord_dims=None,
 57 |         open_full_domain=None,
 58 |         mask_and_scale=True,
 59 |         decode_times=True,
 60 |         concat_characters=True,
 61 |         decode_coords=True,
 62 |         drop_variables: str | Iterable[str] | None = None,
 63 |         use_cftime=None,
 64 |         decode_timedelta=None,
 65 |     ) -> Dataset:
 66 |         """
 67 |         Open a TileDB group or array as an xarray dataset.
 68 | 
 69 |         Parameters
 70 |         ----------
 71 |         filename_or_obj
 72 |             TileDB URI for the group or array to open in xarray.
 73 |         config
 74 |             TileDB config object to pass to TileDB objects.
 75 |         ctx
 76 |             TileDB context to use for TileDB operations.
 77 |         timestamp
 78 |             Timestamp to open the TileDB array at. Not valid for groups.
 79 |         key
 80 |             [Deprecated] Encryption key to use for the backend array.
 81 |         encode_fill
 82 |             [Deprecated] Encode the TileDB fill value.
 83 |         coord_dims
 84 |             [Deprecated] List of dimensions to convert to coordinates.
 85 |         open_full_domain
 86 |             [Deprecated] Open the full TileDB domain instead of the non-empty domain.
 87 |         mask_and_scale
 88 |             xarray decoder that masks fill value and applies float-scale filter using
 89 |             variable metadata.
 90 |         decode_times
 91 |             xarray decoder that converts variables with NetCDF CF-Convention time
 92 |             metadata to a numpy.datetime64 datatype.
 93 |         concat_characters
 94 |             xarray decoder not supported by TileDB.
 95 |         decode_coords
 96 |             xarray decoder that controls which variables are set as coordinate
 97 |             variables.
 98 |         drop_variables
 99 |             A variable or list of variables to exclude from being opened from the
100 |             dataset.
101 |         use_cftime
102 |             xarray decoder option. Uses cftime for datetime decoding.
103 |         decode_timedelta
104 |             xarray decoder that converts variables with time units to a
105 |             numpy.timedelta64 datatype.
106 |         """
107 | 
108 |         deprecated_kwargs = {
109 |             "key": key,
110 |             "encode_fill": encode_fill,
111 |             "open_full_domain": open_full_domain,
112 |         }
113 | 
114 |         # If deprecated keyword aguments were set, then switch to the deprecated engine.
115 |         if use_deprecated_engine is None:
116 | 
117 |             def check_use_deprecated(key_name, key_value):
118 |                 if key_value is not None:
119 |                     warnings.warn(
120 |                         f"Deprecated keyword '{key_name}' provided; deprecated engine "
121 |                         f"is enabled.",
122 |                         DeprecationWarning,
123 |                         stacklevel=1,
124 |                     )
125 |                     return True
126 | 
127 |             use_deprecated_engine = any(
128 |                 check_use_deprecated(key, val)
129 |                 for (key, val) in deprecated_kwargs.items()
130 |             )
131 | 
132 |         # Use the deprecated xarray engine for opening the array.
133 |         if use_deprecated_engine:
134 |             warnings.warn(
135 |                 "Using deprecated TileDB-Xarray plugin",
136 |                 DeprecationWarning,
137 |                 stacklevel=1,
138 |             )
139 | 
140 |             # Create the deprecated store.
141 |             encode_fill = False if encode_fill is None else encode_fill
142 |             open_full_domain = False if open_full_domain is None else open_full_domain
143 |             datastore = TileDBDataStore(
144 |                 uri=filename_or_obj,
145 |                 key=key,
146 |                 timestamp=timestamp,
147 |                 ctx=ctx,
148 |                 encode_fill=encode_fill,
149 |                 open_full_domain=open_full_domain,
150 |                 coord_dims=coord_dims,
151 |             )
152 | 
153 |             # Use xarray indirection to open dataset defined in a plugin.
154 |             store_entrypoint = StoreBackendEntrypoint()
155 |             with close_on_error(datastore):
156 |                 dataset = store_entrypoint.open_dataset(
157 |                     datastore,
158 |                     mask_and_scale=mask_and_scale,
159 |                     decode_times=decode_times,
160 |                     concat_characters=concat_characters,
161 |                     decode_coords=decode_coords,
162 |                     drop_variables=drop_variables,
163 |                     use_cftime=use_cftime,
164 |                     decode_timedelta=decode_timedelta,
165 |                 )
166 |             return dataset
167 | 
168 |         # Using new engine: warn if any deprecated keyword arguments were set.
169 |         for arg_name, arg_value in deprecated_kwargs.items():
170 |             if arg_value is not None:
171 |                 warnings.warn(
172 |                     f"Skipping deprecated keyword '{arg_name}' used when "
173 |                     f"`use_deprecated_engine=False`.",
174 |                     DeprecationWarning,
175 |                     stacklevel=1,
176 |                 )
177 | 
178 |         # Create the TileDB backend store.
179 |         datastore = TileDBXarrayStore(
180 |             filename_or_obj, config=config, ctx=ctx, timestamp=timestamp
181 |         )
182 | 
183 |         # Use xarray indirection to open dataset defined in a plugin.
184 |         store_entrypoint = StoreBackendEntrypoint()
185 |         with close_on_error(datastore):
186 |             dataset = store_entrypoint.open_dataset(
187 |                 datastore,
188 |                 mask_and_scale=mask_and_scale,
189 |                 decode_times=decode_times,
190 |                 concat_characters=concat_characters,
191 |                 decode_coords=decode_coords,
192 |                 drop_variables=drop_variables,
193 |                 use_cftime=use_cftime,
194 |                 decode_timedelta=decode_timedelta,
195 |             )
196 |         return dataset
197 | 
198 |     def guess_can_open(self, filename_or_obj) -> bool:
199 |         """Check for datasets that can be opened with this backend."""
200 |         if isinstance(filename_or_obj, (str, os.PathLike)):
201 |             _, ext = os.path.splitext(filename_or_obj)
202 |             if ext in {".tiledb", ".tdb"}:
203 |                 return True
204 |         try:
205 |             return tiledb.object_type(filename_or_obj) in {"array", "group"}
206 |         except tiledb.TileDBError:
207 |             return False
208 | 
209 | 
210 | BACKEND_ENTRYPOINTS["tiledb"] = ("tiledb", TileDBXarrayBackendEntrypoint)
211 | 


--------------------------------------------------------------------------------
/tiledb/cf/netcdf_engine/_utils.py:
--------------------------------------------------------------------------------
  1 | """Class for helper functions for NetCDF to TileDB conversion."""
  2 | 
  3 | import time
  4 | import warnings
  5 | from contextlib import contextmanager
  6 | from pathlib import Path
  7 | from typing import Any, Optional, Sequence, Tuple, Union
  8 | 
  9 | import netCDF4
 10 | import numpy as np
 11 | 
 12 | import tiledb
 13 | 
 14 | from .._utils import safe_set_metadata
 15 | 
 16 | _DEFAULT_INDEX_DTYPE = np.dtype("uint64")
 17 | COORDINATE_SUFFIX = ".data"
 18 | 
 19 | 
 20 | def copy_group_metadata(netcdf_group: netCDF4.Group, meta: tiledb.libtiledb.Metadata):
 21 |     """Copy all NetCDF group attributs to a the metadata in a TileDB array."""
 22 |     for key in netcdf_group.ncattrs():
 23 |         value = netcdf_group.getncattr(key)
 24 |         if key == "history":
 25 |             value = f"{value} - TileDB array created on {time.ctime(time.time())}"
 26 |         safe_set_metadata(meta, key, value)
 27 | 
 28 | 
 29 | def get_netcdf_metadata(
 30 |     netcdf_item, key: str, default: Any = None, is_number: bool = False
 31 | ) -> Any:
 32 |     """Returns a NetCDF attribute value from a key if it exists and the default value
 33 |     otherwise.
 34 | 
 35 |     If ``is_number=True``, the result is only returned if it is a numpy number. If the
 36 |     key exists but is not a numpy number, then a warning is raised. If the key exists
 37 |     and is an array of length 1, the scalar value is returned.
 38 | 
 39 |     Parameters
 40 |     ----------
 41 |     key
 42 |         NetCDF attribute name to return.
 43 |     default
 44 |         Default value to return if the attribute is not found.
 45 |     is_number
 46 |         If ``True``, the result is only returned if it is a numpy number.
 47 | 
 48 |     Returns
 49 |     -------
 50 |     Any
 51 |         The NetCDF attribute value, if found. Otherwise, return the default value.
 52 |     """
 53 |     if key in netcdf_item.ncattrs():
 54 |         value = netcdf_item.getncattr(key)
 55 |         if is_number:
 56 |             if (
 57 |                 isinstance(value, str)
 58 |                 or not np.issubdtype(value.dtype, np.number)
 59 |                 or np.size(value) != 1
 60 |             ):
 61 |                 with warnings.catch_warnings():
 62 |                     warnings.warn(
 63 |                         f"Attribute '{key}' has value='{value}' that not a number. "
 64 |                         f"Using default {key}={default} instead.",
 65 |                         stacklevel=3,
 66 |                     )
 67 |                 return default
 68 |             if not np.isscalar(value):
 69 |                 value = value.item()
 70 |         return value
 71 |     return default
 72 | 
 73 | 
 74 | def get_unpacked_dtype(variable: netCDF4.Variable) -> np.dtype:
 75 |     """Returns the Numpy data type of a variable after it has been unpacked by applying
 76 |     any scale_factor or add_offset.
 77 | 
 78 |     Parameters
 79 |     ----------
 80 |     variable
 81 |         The NetCDF variable to get the unpacked data type of.
 82 | 
 83 |     Returns
 84 |     -------
 85 |     np.dtype
 86 |         The unpacked data from the NetCDF variable.
 87 |     """
 88 |     input_dtype = np.dtype(variable.dtype)
 89 |     if not np.issubdtype(input_dtype, np.number):
 90 |         raise ValueError(
 91 |             f"Unpacking only support NetCDF variables with integer or floating-point "
 92 |             f"data. Input variable has datatype {input_dtype}."
 93 |         )
 94 |     test = np.array(0, dtype=input_dtype)
 95 |     scale_factor = get_netcdf_metadata(variable, "scale_factor", is_number=True)
 96 |     add_offset = get_netcdf_metadata(variable, "add_offset", is_number=True)
 97 |     if scale_factor is not None:
 98 |         test = scale_factor * test
 99 |     if add_offset is not None:
100 |         test = test + add_offset
101 |     return test.dtype
102 | 
103 | 
104 | def get_variable_values(
105 |     variable: netCDF4.Variable,
106 |     indexer: Union[slice, Sequence[slice]],
107 |     fill: Optional[Union[int, float, str]],
108 |     unpack: bool,
109 | ) -> np.ndarray:
110 |     """Returns the values for a NetCDF variable at the requested indices.
111 | 
112 |     Parameters
113 |     ----------
114 |     variable
115 |         NetCDF variable to get values from.
116 |     indexer
117 |         Sequence of slices used to index the NetCDF variable.
118 |     fill
119 |         If not ``None``, the fill value to use for the output data.
120 |     unpack
121 |         If ``True``, unpack the variable if it contains a ``scale_factor`` or
122 |         ``add_offset``.
123 | 
124 |     Returns
125 |     -------
126 |     np.ndarray
127 |         The data from the NetCDF variable.
128 |     """
129 |     values = variable.getValue() if variable.ndim == 0 else variable[indexer]
130 |     netcdf_fill = get_netcdf_metadata(variable, "_FillValue")
131 |     if fill is not None and netcdf_fill is not None and fill != netcdf_fill:
132 |         np.putmask(values, values == netcdf_fill, fill)
133 |     if unpack:
134 |         scale_factor = get_netcdf_metadata(variable, "scale_factor", is_number=True)
135 |         if scale_factor is not None:
136 |             values = scale_factor * values
137 |         add_offset = get_netcdf_metadata(variable, "add_offset", is_number=True)
138 |         if add_offset is not None:
139 |             values = values + add_offset
140 |     return values
141 | 
142 | 
143 | def get_variable_chunks(
144 |     variable: netCDF4.Variable, unlimited_dim_size
145 | ) -> Optional[Tuple[int, ...]]:
146 |     """
147 |     Returns the chunks from a NetCDF variable if chunked and ``None`` otherwise.
148 | 
149 |     If one of the dimensions has a unlimited dimension, the chunk size will be
150 |     reduced to the unlimited_dim_size.
151 | 
152 |     Parameters
153 |     ----------
154 |     variable
155 |         The variable to get chunks from.
156 |     unlimited_dim_size
157 |         The size to use for unlimited dimensions.
158 | 
159 |     Returns
160 |     -------
161 |     Tuple[int, ...], optional
162 |         Chunks from the NetCDF variable if it is chunked and ``None`` otherwise.
163 |     """
164 |     chunks = variable.chunking()
165 |     if chunks is None or chunks == "contiguous":
166 |         return None
167 |     return tuple(
168 |         min(ck, dim.size if unlimited_dim_size is None else unlimited_dim_size)
169 |         if dim.isunlimited()
170 |         else ck
171 |         for ck, dim in zip(chunks, variable.get_dims())
172 |     )
173 | 
174 | 
175 | @contextmanager
176 | def open_netcdf_group(
177 |     group: Optional[Union[netCDF4.Dataset, netCDF4.Group]] = None,
178 |     input_file: Optional[Union[str, Path]] = None,
179 |     group_path: Optional[str] = None,
180 | ):
181 |     """Context manager for opening a NetCDF group.
182 | 
183 |     If both an input file and group are provided, this function will prioritize
184 |     opening from the group.
185 | 
186 |     Parameters
187 |     ----------
188 |     group
189 |         A NetCDF group to read from.
190 |     input_file
191 |         A NetCDF file to read from.
192 |     group_path
193 |         The path to the NetCDF group to read from in a NetCDF file. Use ``'/'`` to
194 |         specify the root group.
195 |     """
196 |     if group is not None:
197 |         if not isinstance(group, (netCDF4.Dataset, netCDF4.Group)):
198 |             raise TypeError(
199 |                 f"Invalid input: group={group} of type {type(group)} is not a netCDF "
200 |                 f"Group or or Dataset."
201 |             )
202 |         yield group
203 |     else:
204 |         if input_file is None:
205 |             raise ValueError(
206 |                 "An input file must be provided; no default input file was set."
207 |             )
208 |         if group_path is None:
209 |             raise ValueError(
210 |                 "A group path must be provided; no default group path was set. Use "
211 |                 "``'/'`` for the root group."
212 |             )
213 |         root_group = netCDF4.Dataset(input_file)
214 |         root_group.set_auto_maskandscale(False)
215 |         try:
216 |             netcdf_group = root_group
217 |             if group_path != "/":
218 |                 for child_group_name in group_path.strip("/").split("/"):
219 |                     netcdf_group = netcdf_group.groups[child_group_name]
220 |             yield netcdf_group
221 |         finally:
222 |             root_group.close()
223 | 


--------------------------------------------------------------------------------
/documentation/tiledb-cf-spec.md:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: TileDB-CF Dataspace Specification
  3 | ---
  4 | 
  5 | ::: {.callout-warning}
  6 | The current dataspace specification is not stable. Backwards compatibility is not guaranteed for specification less than 1.0.0.
  7 | :::
  8 | 
  9 | ## Current TileDB-CF Dataspace Specification
 10 | 
 11 | * The current TileDB-CF format version number is **0.3.0**.
 12 | 
 13 | ### TileDB-CF Dataspace 0.3.0
 14 | 
 15 | A TileDB CF dataspace is a TileDB group with arrays, attributes, and dimensions that satisfy the following rules.
 16 | 
 17 | #### Terminology
 18 | 
 19 | * **Collection of dimensions**: A set of TileDB dimensions with the same name, data type, and domain.
 20 | 
 21 | #### CF Dataspace
 22 | 
 23 | **Requirements for Dimensions**
 24 | 
 25 | 1. All dimensions that share a name must belong to the same collection (they must have the same domain and data type).
 26 | 
 27 | **Requirements for Metadata**
 28 | 
 29 | 1. Attribute metadata is stored in the same array as the attribute. The metadata key must use the prefix `__tiledb_attr.{attr_name}.` where `{attr_name}` is the full name of the attribute.
 30 | 2. Dimension metadata is stored in the same array as the dimension. The metadata key must use the prefix `__tiledb_dim.{dim_name}.` where `{dim_name}` is the full name of the dimension.
 31 | 
 32 | ### Simple CF Dataspace
 33 | 
 34 | A simple CF dataspace is a direct implementation of the NetCDF data model in TileDB. It follows the same rules as a CF dataspace along with the following requirements:
 35 | 
 36 | **Additional Requirements for Dimensions**
 37 | 
 38 | 1. All dimensions use integer indices and have a domain with lower bound of 0.
 39 | 
 40 | **Additional Requirements for Arrays**
 41 | 
 42 | 1. All arrays in the group are named and have a single attribute.
 43 | 
 44 | **Additional Requirements for Metadata**
 45 | 
 46 | 1. There is only group and attribute level metadata.
 47 | 
 48 | ## Specification Q&A
 49 | 
 50 | 1. Why have a special specification for the TileDB-CF library?
 51 | 
 52 |     The TileDB data model is very general and can be used to support a wide-range of applications. However, there is always a push-and-pull between how general your data model is and enabling specific behavior or interpretations for the data. The purpose of the TileDB-CF specification is to handle the case where we have multiple TileDB arrays defined on the same underlying dimensions. By creating a specificiation we make our assumptions explicit and let users know exactly what they must do to use this tool.
 53 | 
 54 | 
 55 | 2. Is the specification backwards compatible?
 56 | 
 57 |     Not yet. This library and data model are still under initial development. When the data model has stabalized we will release a 1.0.0 version.
 58 | 
 59 | 3. Why is there both a library version and a specification version?
 60 | 
 61 |     The TileDB-CF python package will update much more frequently the specification. The specification is more-or-less just a summary of the conventions the TileDB-CF library is using. As such, a change to the specification version will always coincide to a change to the library version, but the library version can update without effecting the specification.
 62 | 
 63 | 4. What version is my current data?
 64 | 
 65 |     The TileDB-CF dataspace specification is fairly minimal. Your data may satisfy multiple versions. Currently, we do not provide support for checking your data satisfies the TileDB-CF dataspace convention, but some such tooling will be implemented before the 1.0.0 release of this specification.
 66 | 
 67 | 
 68 | ## Changelog
 69 | 
 70 | ### Version 0.3.0
 71 | 
 72 | * Terminology
 73 | 
 74 |     - Remove notion of a dataspace name.
 75 | 
 76 | * CF Dataspace
 77 | 
 78 |     - Remove requirement that all attributes and dimension are named (allow anonymous attributes and dimensions).
 79 |     - Remove group metadata array. Group metadata is now directly supported in the TileDB core engine.
 80 |     - Remove the notion of a dataspace name and the associated requirements.
 81 |     - Remove requirement attributes have unique dataspace names for general CF Datspace.
 82 |     - Remove requirement of `_FillValue`.
 83 |     - Add dimension-level metadata.
 84 | 
 85 | * Simple CF Dataspace
 86 | 
 87 |     - Remove requiment all collections of dimension have a unique dataspace name.
 88 |     - Add requirement all arrays are uniquely named and have a single attribute.
 89 |     - Add restriction that metadata only exists for attributes and groups.
 90 | 
 91 | ### Version 0.2.0
 92 | 
 93 | - Major revision. See appendix for full specification.
 94 | 
 95 | ### Version 0.1.0
 96 | 
 97 | - Initial release. See appendix for full specification.
 98 | 
 99 | 
100 | ## Appendix
101 | 
102 | ### TileDB-CF Dataspace 0.1.0
103 | 
104 | #### Terminology
105 | 
106 | * **Index dimension**: A TileDB dimension with an integer data type and domain with `0` as its lower bound.
107 | * **Data dimension**: Any TileDB dimension that is not an index dimension.
108 | * **Dataspace name**: The name of an attribute or dimension stripped of an optional suffix of `.index` or `.data`.
109 | 
110 | #### CF Dataspace
111 | 
112 | **Requirements for Attributes and Dimensions**
113 | 
114 | 1. All attributes and dimension must be named (there must not be any anonymous attributes or dimensions).
115 | 2. All dimensions that share a name must have the same domain and data type.
116 | 3. All attributes must have a unique dataspace name.
117 | 4. If an attribute and data dimension share the same dataspace name, they must share the same full name and data type.
118 | 
119 | **Requirements for Metadata**
120 | 
121 | 1. Group metadata is stored in a special metadata array named `__tiledb_group` inside the TileDB group.
122 | 2. Attribute metadata is stored in the array the attribute is in using the prefix `__tiledb_attr.{attr_name}.` for the attribute key where `{attr_name}` is the full name of the attribute.
123 | 3. If the metadata key `_FillValue` exists for an attribute; it must have the same value as the fill value for the attribute.
124 | 
125 | #### Indexable CF Dataspace
126 | 
127 | A CF Dataspace is said to be indexable if it satisfies all requirements of a CF Dataspace along with the following condition:
128 | 
129 | * All data dimensions must have an axis label that maps an index dimension with the same dataspace name as the data dimension to an attribute with the same full name and data type as the data dimension.
130 | 
131 | 
132 | ### TileDB-CF Dataspace 0.2.0
133 | 
134 | A TileDB CF dataspace is a TileDB group with arrays, attributes, and dimensions that satisfy the following rules.
135 | 
136 | #### Terminology
137 | 
138 | * **Dataspace name**: The name of an attribute or dimension stripped of an optional suffix of `.index` or `.data`.
139 | * **Collection of dimensions**: A set of TileDB dimensions with the same name, data type, and domain.
140 | 
141 | #### CF Dataspace
142 | 
143 | A CF Dataspace is a TileDB group that follows certain requirements in order to provide additional relational context to dimensions and attributes using naming conventions. In a CF Dataspace, TileDB attributes within the entire group are unique and TileDB dimensions that share the same name are considered the same object.
144 | 
145 | **Requirements for Attributes and Dimensions**
146 | 
147 | 1. All attributes and dimension must be named (there must not be any anonymous attributes or dimensions).
148 | 2. All dimensions that share a name must belong to the same collection (they must have the same domain and data type).
149 | 3. All attributes must have a unique dataspace name.
150 | 
151 | **Requirements for Metadata**
152 | 
153 | 1. Group metadata is stored in a special metadata array named `__tiledb_group` inside the TileDB group.
154 | 2. Attribute metadata is stored in the same array the attribute is stored in. The metadata key must use the prefix `__tiledb_attr.{attr_name}.` where `{attr_name}` is the full name of the attribute.
155 | 3. If the metadata key `_FillValue` exists for an attribute; it must have the same value as the fill value for the attribute.
156 | 
157 | ### Simple CF Dataspace
158 | 
159 | A simple CF dataspace is a direct implementation of the NetCDF data model in TileDB. It follows the same rules as a CF dataspace along with the following requirements:
160 | 
161 | **Additional Requirements for Dimensions**
162 | 
163 | 1. All dimensions use integer indices and have a domain with lower bound of 0.
164 | 2. All collections of dimensions must have a unique dataspace name.
165 | 


--------------------------------------------------------------------------------
/tiledb/cf/xarray_engine/_encoding.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | import tiledb
  4 | 
  5 | # Generic xarray encoding
  6 | _UNLIMITED_DIMS_ENCODING = "unlimited_dims"
  7 | _FILL_VALUE_ENCODING = "_FillValue"
  8 | 
  9 | # TileDB specific variable encoding
 10 | _ATTR_NAME_ENCODING = "attr_name"
 11 | _ATTR_FILTERS_ENCODING = "filters"
 12 | _TILE_SIZES_ENCODING = "tiles"
 13 | _MAX_SHAPE_ENCODING = "max_shape"
 14 | _DIM_DTYPE_ENCODING = "dim_dtype"
 15 | 
 16 | 
 17 | class TileDBVariableEncoder:
 18 |     """Class for encoding array variables.
 19 | 
 20 |     Parameters
 21 |     ----------
 22 |     name
 23 |         Name of the variable.
 24 |     variable
 25 |         Xarray variable to encode.
 26 |     encoding
 27 |         Dictionary of TileDB encoding keywords.
 28 |     unlimited_dims
 29 |         Unlimited dimensions. Only used if max_shape is not provided in the encoding.
 30 |     ctx
 31 |         Context object for TileDB operations.
 32 |     """
 33 | 
 34 |     valid_encoding_keys = {
 35 |         _ATTR_FILTERS_ENCODING,
 36 |         _ATTR_NAME_ENCODING,
 37 |         _DIM_DTYPE_ENCODING,
 38 |         _MAX_SHAPE_ENCODING,
 39 |         _TILE_SIZES_ENCODING,
 40 |     }
 41 | 
 42 |     def __init__(self, name, variable, encoding, unlimited_dims, ctx):
 43 |         # Set initial class properties.
 44 |         self._ctx = ctx
 45 |         self._name = name
 46 |         self._variable = variable
 47 |         self._encoding = dict()
 48 | 
 49 |         # Check the input encoding data is valid.
 50 |         for key in encoding:
 51 |             if key not in self.valid_encoding_keys:
 52 |                 raise KeyError(
 53 |                     "Encoding error on variable '{self._name}'. Invalid encoding key "
 54 |                     f"``{key}``."
 55 |                 )
 56 | 
 57 |         # Initialize encoding values.
 58 |         try:
 59 |             # Set attribute encodings: attr_name and attr_filters.
 60 |             self.attr_name = encoding.get(
 61 |                 _ATTR_NAME_ENCODING,
 62 |                 f"{self._name}_" if self._name in variable.dims else self._name,
 63 |             )
 64 |             self.filters = encoding.get(
 65 |                 _ATTR_FILTERS_ENCODING,
 66 |                 tiledb.FilterList(
 67 |                     (tiledb.ZstdFilter(level=5, ctx=self._ctx),), ctx=self._ctx
 68 |                 ),
 69 |             )
 70 | 
 71 |             # Set domain encodings: dim_dtype, tiles, and max_shape.
 72 |             self.dim_dtype = encoding.get(_DIM_DTYPE_ENCODING, np.dtype(np.uint32))
 73 |             if _MAX_SHAPE_ENCODING in encoding:
 74 |                 self.max_shape = encoding.get(_MAX_SHAPE_ENCODING)
 75 |             else:
 76 |                 # Set unlimited dimensions to max possible size for datatype and
 77 |                 # remaining dimensions to the size of the variable dimension.
 78 |                 unlimited_dims = {
 79 |                     dim_name for dim_name in variable.dims if dim_name in unlimited_dims
 80 |                 }
 81 |                 unlim = np.iinfo(self.dim_dtype).max
 82 |                 self.max_shape = tuple(
 83 |                     unlim if dim_name in unlimited_dims else dim_size
 84 |                     for dim_name, dim_size in zip(variable.dims, variable.shape)
 85 |                 )
 86 |             self.tiles = encoding.get(_TILE_SIZES_ENCODING, None)
 87 |         except ValueError as err:
 88 |             raise ValueError(f"Encoding error for variable '{self._name}'.") from err
 89 | 
 90 |     @property
 91 |     def attr_name(self):
 92 |         return self._encoding.get(_ATTR_NAME_ENCODING, self._name)
 93 | 
 94 |     @attr_name.setter
 95 |     def attr_name(self, name):
 96 |         if name in self._variable.dims:
 97 |             raise ValueError(
 98 |                 f"Attribute name '{name}' is already a dimension name. Attribute names "
 99 |                 f"must be unique."
100 |             )
101 |         self._encoding[_ATTR_NAME_ENCODING] = name
102 | 
103 |     @property
104 |     def dim_dtype(self):
105 |         return self._encoding[_DIM_DTYPE_ENCODING]
106 | 
107 |     @dim_dtype.setter
108 |     def dim_dtype(self, dim_dtype):
109 |         if dim_dtype.kind not in ("i", "u"):
110 |             raise ValueError(
111 |                 f"Dimension dtype ``{dim_dtype}`` is not a valid signed or unsigned "
112 |                 f"integer dtype."
113 |             )
114 |         self._encoding[_DIM_DTYPE_ENCODING] = dim_dtype
115 | 
116 |     @property
117 |     def dtype(self):
118 |         return self._variable.dtype
119 | 
120 |     @property
121 |     def fill(self):
122 |         fill = self._variable.encoding.get(_FILL_VALUE_ENCODING, None)
123 |         if fill is np.nan:
124 |             return None
125 |         return fill
126 | 
127 |     @property
128 |     def filters(self):
129 |         return self._encoding[_ATTR_FILTERS_ENCODING]
130 | 
131 |     @filters.setter
132 |     def filters(self, filters):
133 |         self._encoding[_ATTR_FILTERS_ENCODING] = filters
134 | 
135 |     def create_array_schema(self):
136 |         """Returns a TileDB attribute from the provided variable and encodings."""
137 |         attr = tiledb.Attr(
138 |             name=self.attr_name,
139 |             dtype=self.dtype,
140 |             fill=self.fill,
141 |             filters=self.filters,
142 |             ctx=self._ctx,
143 |         )
144 |         tiles = self.tiles
145 |         max_shape = self.max_shape
146 |         dims = tuple(
147 |             tiledb.Dim(
148 |                 name=dim_name,
149 |                 dtype=self.dim_dtype,
150 |                 domain=(0, max_shape[index] - 1),
151 |                 tile=None if tiles is None else tiles[index],
152 |                 ctx=self._ctx,
153 |             )
154 |             for index, dim_name in enumerate(self._variable.dims)
155 |         )
156 |         return tiledb.ArraySchema(
157 |             domain=tiledb.Domain(*dims, ctx=self._ctx),
158 |             attrs=(attr,),
159 |             ctx=self._ctx,
160 |         )
161 | 
162 |     def get_encoding_metadata(self):
163 |         meta = dict()
164 |         return meta
165 | 
166 |     @property
167 |     def max_shape(self):
168 |         return self._encoding[_MAX_SHAPE_ENCODING]
169 | 
170 |     @max_shape.setter
171 |     def max_shape(self, max_shape):
172 |         if len(max_shape) != self._variable.ndim:
173 |             raise ValueError(
174 |                 f"Incompatible shape {max_shape} for variable with "
175 |                 f"{self._variable.ndim} dimensions."
176 |             )
177 |         if any(
178 |             dim_size < var_size
179 |             for dim_size, var_size in zip(max_shape, self._variable.shape)
180 |         ):
181 |             raise ValueError(
182 |                 f"Incompatible max shape {max_shape} for variable with shape "
183 |                 f"{self._variable.shape}. Max shape must be greater than or equal "
184 |                 f"to the variable shape for all dimensions."
185 |             )
186 |         if (
187 |             _TILE_SIZES_ENCODING in self._encoding
188 |             and self.tiles is not None
189 |             and any(
190 |                 dim_size < tile_size
191 |                 for tile_size, dim_size in zip(self.tiles, max_shape)
192 |             )
193 |         ):
194 |             raise ValueError(
195 |                 f"Incompatible max shape {max_shape} provied for a variable with tiles "
196 |                 f"{self.tiles}. Each tile must be less than or equal to the "
197 |                 f"max size of the dimension it is on."
198 |             )
199 | 
200 |         self._encoding[_MAX_SHAPE_ENCODING] = max_shape
201 | 
202 |     @property
203 |     def encoding(self):
204 |         return self._encoding
205 | 
206 |     @property
207 |     def tiles(self):
208 |         return self._encoding[_TILE_SIZES_ENCODING]
209 | 
210 |     @tiles.setter
211 |     def tiles(self, tiles):
212 |         if tiles is not None:
213 |             if len(tiles) != self._variable.ndim:
214 |                 raise ValueError(
215 |                     f"Incompatible number of tiles given. {len(tiles)} tiles provided "
216 |                     f"for a variable with {self._variable.ndim} dimensions. There must "
217 |                     f"be exactly one tile per dimension."
218 |                 )
219 |             if _MAX_SHAPE_ENCODING in self._encoding and any(
220 |                 dim_size < tile_size
221 |                 for tile_size, dim_size in zip(tiles, self.max_shape)
222 |             ):
223 |                 raise ValueError(
224 |                     f"Incompatible tiles {tiles} provied for a variable with max shape "
225 |                     f"{self.max_shape}. Each tile must be less than or equal to the "
226 |                     f"max size of the dimension it is on."
227 |                 )
228 |         self._encoding[_TILE_SIZES_ENCODING] = tiles
229 | 
230 |     @property
231 |     def variable_name(self):
232 |         return self._name
233 | 


--------------------------------------------------------------------------------
/tiledb/cf/xarray_engine/_array_wrapper.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from xarray.backends.common import BackendArray
  3 | 
  4 | import tiledb
  5 | 
  6 | from .._utils import safe_set_metadata
  7 | from ._common import _ATTR_PREFIX
  8 | 
  9 | 
 10 | def _to_zero_based_tiledb_index(dim_name, dim_size, index):
 11 |     """Converts an xarray integer, array, or slice to an index object usable by the
 12 |     TileDB multi_index function. Only for dimensions with integer domains that start
 13 |     at zero.
 14 | 
 15 |     The following is assumed about xarray indices:
 16 |        * An index may be an integer, a slice, or a Numpy array of integer indices.
 17 |        * An integer index or component of an array is such that -size <= value < size.
 18 |          * Non-negative values are a standard zero-based index.
 19 |          * Negative values count backwards from the end of the array with the last value
 20 |            of the array starting at -1.
 21 | 
 22 |     Parameters
 23 |     ----------
 24 |     dim_name
 25 |         Name of the dimension. Used for errors.
 26 |     dim_size
 27 |         Size of the dimension as interpreted by xarray. May be smaller than the
 28 |         full domain of the TileDB dimension.
 29 |     index
 30 |         An integer index, array of integer indices, or a slice for indexing an
 31 |         xarray dimension.
 32 | 
 33 |     Returns
 34 |     -------
 35 |     Union[int, List[int], slice]
 36 |         An integer, a list of integer values, or a slice for indexing a
 37 |         TileDB dimension using mulit_index.
 38 |     """
 39 |     if np.isscalar(index):
 40 |         # Convert xarray index to TileDB dimension coordinate
 41 |         if not -dim_size <= index < dim_size:
 42 |             raise IndexError(
 43 |                 f"Index {index} out of bounds for dimension '{dim_name}' with size "
 44 |                 f"{dim_size}."
 45 |             )
 46 |         return index if index >= 0 else index + dim_size - 1
 47 | 
 48 |     if isinstance(index, slice):
 49 |         # Using range handles negative numbers and `None` values.
 50 |         index = range(dim_size)[index]
 51 |         if index.step in (1, None):
 52 |             # Convert from index slice to coordinate slice (note that xarray
 53 |             # includes the starting point and excludes the ending point vs. TileDB
 54 |             # multi_index which includes both the staring point and ending point).
 55 |             return slice(index.start, index.stop - 1)
 56 |         # This can be replaced with a proper slice when TileDB supports steps.
 57 |         return list(np.arange(index.start, index.stop, index.step))
 58 | 
 59 |     if isinstance(index, np.ndarray):
 60 |         # Check numpy array has valid data.
 61 |         if index.ndim != 1:
 62 |             raise TypeError(
 63 |                 f"Invalid indexer array for dimension '{dim_name}'. Input array index "
 64 |                 f"must have exactly 1 dimension."
 65 |             )
 66 |         if not ((-dim_size <= index).all() and (index < dim_size).all()):
 67 |             raise IndexError(
 68 |                 f"Index {index} out of bounds for dimension '{dim_name}' with size "
 69 |                 f"{dim_size}."
 70 |             )
 71 |         # Convert negative indices to positive indices and return as a list of
 72 |         # values.
 73 |         return list(index + np.where(index >= 0, 0, dim_size - 1))
 74 |     raise TypeError(
 75 |         f"Unexpected indexer type {type(index)} for dimension '{dim_name}'."
 76 |     )
 77 | 
 78 | 
 79 | class TileDBArrayWrapper(BackendArray):
 80 |     """Wrapper that allows xarray to access a TileDB array."""
 81 | 
 82 |     __slots__ = (
 83 |         "dtype",
 84 |         "shape",
 85 |         "variable_name",
 86 |         "_array_kwargs",
 87 |         "_attr_name",
 88 |         "_dim_names",
 89 |         "_fill",
 90 |         "_index_converters",
 91 |     )
 92 | 
 93 |     def __init__(
 94 |         self,
 95 |         *,
 96 |         variable_name,
 97 |         uri,
 98 |         schema,
 99 |         attr_key,
100 |         config,
101 |         ctx,
102 |         timestamp,
103 |         fixed_dimensions,
104 |         dimension_sizes,
105 |     ):
106 |         # Set basic properties.
107 |         self.variable_name = variable_name
108 |         self._array_kwargs = {
109 |             "uri": uri,
110 |             "config": config,
111 |             "ctx": ctx,
112 |             "timestamp": timestamp,
113 |         }
114 |         self._dim_names = tuple(dim.name for dim in schema.domain)
115 | 
116 |         # Check the array.
117 |         if schema.sparse:
118 |             raise ValueError(
119 |                 f"Error for variable '{self.variable_name}'; sparse arrays are not "
120 |                 f"supported."
121 |             )
122 | 
123 |         # Check dimensions and get the array shape.
124 |         for dim in schema.domain:
125 |             if dim.domain[0] != 0:
126 |                 raise ValueError(
127 |                     f"Error for variable '{self.variable_name}'; dimension "
128 |                     f"'{dim.name}' does not have a domain with lower bound of 0."
129 |                 )
130 |             if dim.dtype.kind not in ("i", "u"):
131 |                 raise ValueError(
132 |                     f"Error for variable '{self.variable_name}'. Dimension "
133 |                     f"'{dim.name}' has unsupported dtype={dim.dtype}."
134 |                 )
135 | 
136 |         # Set TileDB attribute properties.
137 |         _attr = schema.attr(attr_key)
138 |         self._attr_name = _attr.name
139 |         self.dtype = _attr.dtype
140 |         self._fill = _attr.fill
141 | 
142 |         # Get the shape.
143 |         if dimension_sizes is None:
144 |             self.shape = schema.shape
145 |         else:
146 |             self.shape = tuple(
147 |                 int(dim.domain[1]) + 1
148 |                 if dim.name in fixed_dimensions
149 |                 else dimension_sizes.get(dim.name, int(dim.domain[1]) + 1)
150 |                 for dim in schema.domain
151 |             )
152 | 
153 |     def __getitem__(self, key):
154 |         # Check the length of the input.
155 |         indices = key.tuple
156 |         if len(indices) != len(self.shape):
157 |             ndim = len(self.shape)
158 |             raise ValueError(
159 |                 f"key of length {len(indices)} cannot be used for a TileDB array"
160 |                 f" with {ndim} {'dimension' if ndim == 1 else 'dimensions'}"
161 |             )
162 | 
163 |         # Compute the shape of the output, collapsing any dimensions with scalar input.
164 |         # If a dimension is of length zero, return an appropriately shaped enpty array.
165 |         shape = tuple(
166 |             len(range(dim_size)[index] if isinstance(index, slice) else index)
167 |             for dim_size, index in zip(self.shape, indices)
168 |             if not np.isscalar(index)
169 |         )
170 |         if 0 in shape:
171 |             return np.zeros(shape)
172 | 
173 |         # Get data from the TileDB array.
174 |         tiledb_indices = tuple(
175 |             _to_zero_based_tiledb_index(self._dim_names[idim], dim_size, index)
176 |             for idim, (dim_size, index) in enumerate(zip(self.shape, indices))
177 |         )
178 |         with tiledb.open(**self._array_kwargs, attr=self._attr_name) as array:
179 |             result = array.multi_index[tiledb_indices][self._attr_name]
180 | 
181 |         # TileDB multi_index returns the same number of dimensions as the initial array.
182 |         # To match the expected xarray output, we need to reshape the result to remove
183 |         # any dimensions corresponding to scalar-valued input.
184 |         return result.reshape(shape)
185 | 
186 |     def __setitem__(self, key, value):
187 |         with tiledb.open(**self._array_kwargs, mode="w") as array:
188 |             array[key] = value.astype(dtype=self.dtype)
189 | 
190 |     @property
191 |     def dim_names(self):
192 |         """A tuple of the dimension names."""
193 |         return self._dim_names
194 | 
195 |     def get_metadata(self):
196 |         """Returns a dictionary of the variable metadata including xarray specific
197 |         encodings.
198 |         """
199 |         full_key_prefix = f"{_ATTR_PREFIX}{self._attr_name}."
200 |         with tiledb.open(**self._array_kwargs) as array:
201 |             variable_metadata = {"_FillValue": self._fill}
202 |             for key in array.meta:
203 |                 if key.startswith(full_key_prefix) and not len(key) == len(
204 |                     full_key_prefix
205 |                 ):
206 |                     variable_metadata[key[len(full_key_prefix) :]] = array.meta[key]
207 |             return variable_metadata
208 | 
209 |     def set_metadata(self, input_meta):
210 |         key_prefix = f"{_ATTR_PREFIX}{self._attr_name}"
211 |         with tiledb.open(**self._array_kwargs, mode="w") as array:
212 |             for key, value in input_meta.items():
213 |                 safe_set_metadata(array.meta, f"{key_prefix}.{key}", value)
214 | 


--------------------------------------------------------------------------------