├── .gitignore ├── conftest.py ├── CONTRIBUTING.md ├── setup.py ├── README.md ├── .github └── workflows │ └── tests.yml ├── LICENSE ├── xarray_tensorstore_test.py └── xarray_tensorstore.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.egg-info 2 | .DS_Store 3 | build 4 | dist 5 | docs/.ipynb_checkpoints 6 | docs/_build 7 | docs/_autosummary 8 | docs/*.zarr 9 | __pycache__ 10 | -------------------------------------------------------------------------------- /conftest.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # https://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Configure FLAGS with default values for absltest.""" 15 | from absl import app 16 | 17 | try: 18 | app.run(lambda argv: None) 19 | except SystemExit: 20 | pass 21 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # How to contribute 2 | 3 | We'd love to accept your patches and contributions to this project. 4 | 5 | ## Before you begin 6 | 7 | ### Sign our Contributor License Agreement 8 | 9 | Contributions to this project must be accompanied by a 10 | [Contributor License Agreement](https://cla.developers.google.com/about) (CLA). 11 | You (or your employer) retain the copyright to your contribution; this simply 12 | gives us permission to use and redistribute your contributions as part of the 13 | project. 14 | 15 | If you or your current employer have already signed the Google CLA (even if it 16 | was for a different project), you probably don't need to do it again. 17 | 18 | Visit to see your current agreements or to 19 | sign a new one. 20 | 21 | ### Review our community guidelines 22 | 23 | This project follows 24 | [Google's Open Source Community Guidelines](https://opensource.google/conduct/). 25 | 26 | ## Contribution process 27 | 28 | ### Code reviews 29 | 30 | All submissions, including submissions by project members, require review. We 31 | use GitHub pull requests for this purpose. Consult 32 | [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more 33 | information on using pull requests. -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # https://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Setup Xarray-Tensorstore.""" 16 | import setuptools 17 | 18 | 19 | setuptools.setup( 20 | name='xarray-tensorstore', 21 | version='0.3.0', # keep in sync with xarray_tensorstore.py 22 | license='Apache-2.0', 23 | author='Google LLC', 24 | author_email='noreply@google.com', 25 | install_requires=['numpy', 'xarray', 'zarr', 'tensorstore',], 26 | extras_require={ 27 | 'tests': ['absl-py', 'pandas', 'pytest', 'dask'], 28 | }, 29 | urls={'source': 'https://github.com/google/xarray-tensorstore'}, 30 | py_modules=['xarray_tensorstore'], 31 | python_requires='>=3.10', 32 | ) 33 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Xarray-TensorStore 2 | 3 | Xarray-TensorStore is a small library that allows opening Zarr arrays into 4 | Xarray via TensorStore, instead of the standard Zarr-Python library. In some 5 | cases, we've found it to be considerably faster. 6 | 7 | **Warning**: Xarray-TensorStore relies upon internal Xarray APIs that will 8 | likely change in 9 | [future versions of Xarray](https://github.com/pydata/xarray/issues/3981), 10 | precisely to accommodate these sorts of use-cases. Expect that the current 11 | version of Xarray-TensorStore will break at some point in the future and require 12 | updates for a new Xarray release. 13 | 14 | ## Installation 15 | 16 | Xarray-TensorStore is available on pypi: 17 | ``` 18 | pip install xarray-tensorstore 19 | ``` 20 | 21 | ## Usage 22 | 23 | Open a Zarr file into an `xarray.Dataset` using `open_zarr()`, and then use 24 | `read()` to start reading data in the background: 25 | 26 | ```python 27 | import xarray_tensorstore 28 | 29 | ds = xarray_tensorstore.open_zarr(path) 30 | 31 | # As with xarray.open_zarr(), indexing & transposing is lazy 32 | example = ds.sel(time='2020-01-01').transpose('longitude', 'latitude', ...) 33 | 34 | # Optional: start reading data in all arrays asynchronously 35 | read_example = xarray_tensorstore.read(example) 36 | 37 | # Blocking conversion of the data into NumPy arrays. This happens sequentially, 38 | # one array at a time, unless you call read() first. 39 | numpy_example = read_example.compute() 40 | ``` 41 | 42 | Open a list of Zarr files and concatenate them along a single dimension using 43 | `open_concatenated_zarrs()`. The returned `xarray.Dataset` behaves exactly as above. 44 | This function requires the Dask package to be installed. 45 | 46 | ```python 47 | import xarray_tensorstore 48 | 49 | ds = xarray_tensorstore.open_concatenated_zarrs( 50 | paths=[path1, path2], 51 | concat_dim="time", 52 | ) 53 | ``` 54 | 55 | ## Limitations 56 | 57 | - Xarray-TensorStore still uses Zarr-Python under the covers to open Zarr 58 | groups and read coordinate data (TensorStore does not yet support Zarr 59 | groups). 60 | - Unlike `xarray.open_zarr`, decoding of data arrays according to CF Conventions 61 | (e.g., `scale` and `add_offset` attributes) is not supported. 62 | -------------------------------------------------------------------------------- /.github/workflows/tests.yml: -------------------------------------------------------------------------------- 1 | name: tests 2 | 3 | on: 4 | # Triggers the workflow on push or pull request events but only for the main branch 5 | push: 6 | branches: [ main ] 7 | pull_request: 8 | branches: [ main ] 9 | # Allows you to run this workflow manually from the Actions tab 10 | workflow_dispatch: 11 | 12 | jobs: 13 | tests: 14 | name: "python=${{ matrix.python-version }} zarr=${{ matrix.zarr-version }} tests" 15 | runs-on: ubuntu-latest 16 | strategy: 17 | fail-fast: false 18 | matrix: 19 | python-version: ["3.11", "3.12", "3.13"] 20 | zarr-version: [">=2,<3", ">=3"] 21 | steps: 22 | - name: Cancel previous 23 | uses: styfle/cancel-workflow-action@0.7.0 24 | with: 25 | access_token: ${{ github.token }} 26 | if: ${{github.ref != 'refs/head/main'}} 27 | - uses: actions/checkout@v4 28 | - name: Set up Python ${{ matrix.python-version }} 29 | uses: actions/setup-python@v5 30 | with: 31 | python-version: ${{ matrix.python-version }} 32 | - name: Get pip cache dir 33 | id: pip-cache 34 | run: | 35 | python -m pip install --upgrade pip wheel 36 | echo "::set-output name=dir::$(pip cache dir)" 37 | - name: pip cache 38 | uses: actions/cache@v4 39 | with: 40 | path: ${{ steps.pip-cache.outputs.dir }} 41 | key: ${{ runner.os }}-pip-${{ hashFiles('**/setup.py') }} 42 | - name: Install Xarray-Tensorstore 43 | run: | 44 | pip install -e .[tests] "zarr${{ matrix.zarr-version }}" 45 | - name: Run unit tests 46 | run: | 47 | pytest . 48 | 49 | # Auto-publish when version is increased 50 | publish: 51 | # Only try to publish if: 52 | # * Repo is self (prevents running from forks) 53 | # * Branch is `main` 54 | if: | 55 | github.repository == 'google/xarray-Tensorstore' 56 | && github.ref == 'refs/heads/main' 57 | needs: tests # Only publish after tests are successful 58 | runs-on: ubuntu-latest 59 | permissions: 60 | contents: write 61 | timeout-minutes: 30 62 | 63 | steps: 64 | # Publish the package (if local `__version__` > pip version) 65 | - uses: etils-actions/pypi-auto-publish@v1 66 | with: 67 | pypi-token: ${{ secrets.PYPI_API_TOKEN }} 68 | gh-token: ${{ secrets.GITHUB_TOKEN }} 69 | parse-changelog: false 70 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /xarray_tensorstore_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the 'License'); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # https://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an 'AS IS' BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | from absl.testing import absltest 15 | from absl.testing import parameterized 16 | import numpy as np 17 | import packaging 18 | import pandas as pd 19 | import pytest 20 | import tensorstore 21 | import xarray 22 | import xarray_tensorstore 23 | import zarr 24 | 25 | 26 | _USING_ZARR_PYTHON_3 = packaging.version.parse(zarr.__version__).major >= 3 27 | 28 | test_cases = [ 29 | { 30 | 'testcase_name': 'base', 31 | 'transform': lambda ds: ds, 32 | }, 33 | { 34 | 'testcase_name': 'transposed', 35 | 'transform': lambda ds: ds.transpose('z', 'x', 'y'), 36 | }, 37 | { 38 | 'testcase_name': 'basic_int', 39 | 'transform': lambda ds: ds.isel(y=1), 40 | }, 41 | { 42 | 'testcase_name': 'negative_int', 43 | 'transform': lambda ds: ds.isel(y=-1), 44 | }, 45 | { 46 | 'testcase_name': 'basic_slice', 47 | 'transform': lambda ds: ds.isel(z=slice(2)), 48 | }, 49 | { 50 | 'testcase_name': 'full_slice', 51 | 'transform': lambda ds: ds.isel(z=slice(0, 4)), 52 | }, 53 | { 54 | 'testcase_name': 'out_of_bounds_slice', 55 | 'transform': lambda ds: ds.isel(z=slice(0, 10)), 56 | }, 57 | { 58 | 'testcase_name': 'strided_slice', 59 | 'transform': lambda ds: ds.isel(z=slice(0, None, 2)), 60 | }, 61 | { 62 | 'testcase_name': 'negative_stride_slice', 63 | 'transform': lambda ds: ds.isel(z=slice(None, None, -1)), 64 | }, 65 | { 66 | 'testcase_name': 'repeated_indexing', 67 | 'transform': lambda ds: ds.isel(z=slice(1, None)).isel(z=0), 68 | }, 69 | { 70 | 'testcase_name': 'oindex', 71 | # includes repeated, negative and out of order indices 72 | 'transform': lambda ds: ds.isel(x=[0], y=[1, 1], z=[1, -1, 0]), 73 | }, 74 | { 75 | 'testcase_name': 'vindex', 76 | 'transform': lambda ds: ds.isel(x=('w', [0, 1]), y=('w', [1, 2])), 77 | }, 78 | { 79 | 'testcase_name': 'mixed_indexing_types', 80 | 'transform': lambda ds: ds.isel(x=0, y=slice(2), z=[-1]), 81 | }, 82 | { 83 | 'testcase_name': 'select_a_variable', 84 | 'transform': lambda ds: ds['foo'], 85 | }, 86 | ] 87 | 88 | 89 | class XarrayTensorstoreTest(parameterized.TestCase): 90 | 91 | # TODO(shoyer): consider using hypothesis to convert these into 92 | # property-based tests 93 | @parameterized.named_parameters(test_cases) 94 | def test_open_zarr(self, transform): 95 | source = xarray.Dataset( 96 | { 97 | 'foo': (('x',), np.arange(2), {'local': 'local metadata'}), 98 | 'bar': (('x', 'y'), np.arange(6).reshape(2, 3)), 99 | 'baz': (('x', 'y', 'z'), np.arange(24).reshape(2, 3, 4)), 100 | }, 101 | coords={ 102 | 'x': [1, 2], 103 | 'y': pd.to_datetime(['2000-01-01', '2000-01-02', '2000-01-03']), 104 | 'z': ['a', 'b', 'c', 'd'], 105 | }, 106 | attrs={'global': 'global metadata'}, 107 | ) 108 | path = self.create_tempdir().full_path 109 | source.chunk().to_zarr(path) 110 | 111 | expected = transform(source) 112 | actual = transform(xarray_tensorstore.open_zarr(path)).compute() 113 | xarray.testing.assert_identical(actual, expected) 114 | 115 | @parameterized.named_parameters(test_cases) 116 | def test_open_concatenated_zarrs(self, transform): 117 | sources = [ 118 | xarray.Dataset( 119 | { 120 | 'foo': (('x',), x, {'local': 'local metadata'}), 121 | 'bar': (('x', 'y'), np.arange(6).reshape(2, 3)), 122 | 'baz': (('x', 'y', 'z'), np.arange(24).reshape(2, 3, 4)), 123 | }, 124 | coords={ 125 | 'x': [1, 2], 126 | 'y': pd.to_datetime(['2000-01-01', '2000-01-02', '2000-01-03']), 127 | 'z': ['a', 'b', 'c', 'd'], 128 | }, 129 | attrs={'global': 'global metadata'}, 130 | ) 131 | for x in [range(0, 2), range(3, 5)] 132 | ] 133 | 134 | zarr_dir = self.create_tempdir().full_path 135 | paths = [f'{zarr_dir}/{i}' for i in range(len(sources))] 136 | for source, path in zip(sources, paths, strict=True): 137 | source.chunk().to_zarr(path) 138 | 139 | expected = transform(xarray.concat(sources, dim='x')) 140 | actual = transform( 141 | xarray_tensorstore.open_concatenated_zarrs(paths, concat_dim='x') 142 | ).compute() 143 | xarray.testing.assert_identical(actual, expected) 144 | 145 | @parameterized.parameters( 146 | {'deep': True}, 147 | {'deep': False}, 148 | ) 149 | def test_copy(self, deep): 150 | source = xarray.Dataset({'foo': (('x',), np.arange(10))}) 151 | path = self.create_tempdir().full_path 152 | source.to_zarr(path) 153 | opened = xarray_tensorstore.open_zarr(path) 154 | copied = opened.copy(deep=deep) 155 | xarray.testing.assert_identical(copied, source) 156 | 157 | def test_sortby(self): 158 | # regression test for https://github.com/google/xarray-tensorstore/issues/1 159 | x = np.arange(10) 160 | source = xarray.Dataset({'foo': (('x',), x)}, {'x': x[::-1]}) 161 | path = self.create_tempdir().full_path 162 | source.to_zarr(path) 163 | opened = xarray_tensorstore.open_zarr(path) 164 | opened.sortby('x') # should not crash 165 | 166 | def test_compute(self): 167 | # verify that get_duck_array() is working properly 168 | source = xarray.Dataset({'foo': (('x',), np.arange(10))}) 169 | path = self.create_tempdir().full_path 170 | source.to_zarr(path) 171 | opened = xarray_tensorstore.open_zarr(path) 172 | computed = opened.compute() 173 | computed_data = computed['foo'].variable._data 174 | self.assertNotIsInstance(computed_data, tensorstore.TensorStore) 175 | 176 | def test_open_zarr_from_uri(self): 177 | source = xarray.Dataset( 178 | {'baz': (('x', 'y', 'z'), np.arange(24).reshape(2, 3, 4))} 179 | ) 180 | path = self.create_tempdir().full_path 181 | source.chunk().to_zarr(path) 182 | 183 | opened = xarray_tensorstore.open_zarr('file://' + path) 184 | xarray.testing.assert_identical(source, opened) 185 | 186 | @parameterized.parameters( 187 | {'zarr_format': 2, 'consolidated': True}, 188 | {'zarr_format': 3, 'consolidated': True}, 189 | {'zarr_format': 2, 'consolidated': False}, 190 | {'zarr_format': 3, 'consolidated': False}, 191 | ) 192 | def test_read_dataset(self, zarr_format: int, consolidated: bool): 193 | if not _USING_ZARR_PYTHON_3 and zarr_format == 3: 194 | self.skipTest('zarr format 3 is not supported in zarr < 3.0.0') 195 | source = xarray.Dataset( 196 | {'baz': (('x', 'y', 'z'), np.arange(24).reshape(2, 3, 4))}, 197 | coords={'x': np.arange(2)}, 198 | ) 199 | path = self.create_tempdir().full_path 200 | source.chunk().to_zarr( 201 | path, zarr_format=zarr_format, consolidated=consolidated 202 | ) 203 | 204 | opened = xarray_tensorstore.open_zarr(path) 205 | read = xarray_tensorstore.read(opened) 206 | 207 | self.assertIsNone(opened.variables['baz']._data.future) 208 | self.assertIsNotNone(read.variables['baz']._data.future) 209 | xarray.testing.assert_identical(read, source) 210 | 211 | @parameterized.parameters( 212 | {'zarr_format': 2}, 213 | {'zarr_format': 3}, 214 | ) 215 | def test_read_dataarray(self, zarr_format: int): 216 | if not _USING_ZARR_PYTHON_3 and zarr_format == 3: 217 | self.skipTest('zarr format 3 is not supported in zarr < 3.0.0') 218 | source = xarray.DataArray( 219 | np.arange(24).reshape(2, 3, 4), 220 | dims=('x', 'y', 'z'), 221 | name='baz', 222 | coords={'x': np.arange(2)}, 223 | ) 224 | path = self.create_tempdir().full_path 225 | source.to_dataset().chunk().to_zarr(path, zarr_format=zarr_format) 226 | 227 | opened = xarray_tensorstore.open_zarr(path)['baz'] 228 | read = xarray_tensorstore.read(opened) 229 | 230 | self.assertIsNone(opened.variable._data.future) 231 | self.assertIsNotNone(read.variable._data.future) 232 | xarray.testing.assert_identical(read, source) 233 | 234 | def test_mask_and_scale(self): 235 | source = xarray.DataArray( 236 | np.arange(24).reshape(2, 3, 4), 237 | dims=('x', 'y', 'z'), 238 | name='baz', 239 | coords={'x': np.arange(2)}, 240 | ) 241 | 242 | # invalid fill-value 243 | source.encoding = {'_FillValue': -1} 244 | path = self.create_tempdir().full_path 245 | source.to_dataset().chunk().to_zarr(path) 246 | expected_msg = ( 247 | 'variable baz has non-NaN fill value, which is not supported by' 248 | ' xarray-tensorstore: -1. Consider re-opening with' 249 | ' xarray_tensorstore.open_zarr(..., mask_and_scale=False), or falling' 250 | ' back to use xarray.open_zarr().' 251 | ) 252 | with self.assertRaisesWithLiteralMatch(ValueError, expected_msg): 253 | xarray_tensorstore.open_zarr(path) 254 | 255 | actual = xarray_tensorstore.open_zarr(path, mask_and_scale=False)['baz'] 256 | xarray.testing.assert_equal(actual, source) # no values are masked 257 | 258 | # invalid scaling 259 | source.encoding = {'scale_factor': 10.0} 260 | path = self.create_tempdir().full_path 261 | source.to_dataset().chunk().to_zarr(path) 262 | expected_msg = 'variable baz uses scale/offset encoding' 263 | with self.assertRaisesRegex(ValueError, expected_msg): 264 | xarray_tensorstore.open_zarr(path) 265 | 266 | actual = xarray_tensorstore.open_zarr(path, mask_and_scale=False)['baz'] 267 | self.assertFalse(actual.equals(source)) # not scaled properly 268 | 269 | # valid offset (coordinate only) 270 | source.encoding = {} 271 | source.coords['x'].encoding = {'add_offset': -1} 272 | path = self.create_tempdir().full_path 273 | source.to_dataset().chunk().to_zarr(path) 274 | actual = xarray_tensorstore.open_zarr(path, mask_and_scale=True)['baz'] 275 | xarray.testing.assert_identical(actual, source) 276 | self.assertEqual(actual.coords['x'].encoding['add_offset'], -1) 277 | 278 | @parameterized.named_parameters( 279 | { 280 | 'testcase_name': 'basic_indexing', 281 | 'key': (slice(1, None), slice(None), slice(None)), 282 | 'value': np.full((1, 2, 3), -1), 283 | }, 284 | { 285 | 'testcase_name': 'outer_indexing', 286 | 'key': (np.array([0]), np.array([1]), slice(None)), 287 | 'value': np.full((1, 1, 3), -2), 288 | }, 289 | { 290 | 'testcase_name': 'vectorized_indexing', 291 | 'key': (np.array([0]), np.array([0, 1]), slice(None)), 292 | 'value': np.full((2, 3), -3), 293 | }, 294 | ) 295 | def test_setitem(self, key, value): 296 | source_data = np.array([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]]) 297 | source = xarray.DataArray( 298 | source_data, 299 | dims=('x', 'y', 'z'), 300 | name='baz', 301 | ) 302 | path = self.create_tempdir().full_path 303 | source.to_dataset().chunk().to_zarr(path) 304 | 305 | opened = xarray_tensorstore.open_zarr(path, write=True)['baz'] 306 | 307 | opened[key] = value 308 | read = xarray_tensorstore.read(opened) 309 | 310 | expected_data = source_data.copy() 311 | expected_data[key] = value 312 | expected = xarray.DataArray( 313 | expected_data, 314 | dims=('x', 'y', 'z'), 315 | name='baz', 316 | ) 317 | 318 | xarray.testing.assert_equal(read, expected) 319 | 320 | def test_setitem_readonly(self): 321 | source = xarray.DataArray( 322 | np.array([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]]), 323 | dims=('x', 'y', 'z'), 324 | name='baz', 325 | ) 326 | path = self.create_tempdir().full_path 327 | source.to_dataset().chunk().to_zarr(path) 328 | 329 | opened = xarray_tensorstore.open_zarr(path)['baz'] 330 | with pytest.raises(ValueError): 331 | opened[1:, ...] = np.full((1, 2, 3), -1) 332 | 333 | 334 | if __name__ == '__main__': 335 | absltest.main() 336 | -------------------------------------------------------------------------------- /xarray_tensorstore.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # https://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Utilities for loading TensorStore data into Xarray.""" 15 | from __future__ import annotations 16 | 17 | import dataclasses 18 | import math 19 | import os.path 20 | import re 21 | from typing import Optional, TypeVar 22 | 23 | import numpy as np 24 | import packaging 25 | import tensorstore 26 | import xarray 27 | from xarray.core import indexing 28 | import zarr 29 | 30 | 31 | __version__ = '0.3.0' # keep in sync with setup.py 32 | 33 | 34 | Index = TypeVar('Index', int, slice, np.ndarray, None) 35 | XarrayData = TypeVar('XarrayData', xarray.Dataset, xarray.DataArray) 36 | 37 | 38 | def _numpy_to_tensorstore_index(index: Index, size: int) -> Index: 39 | """Switch from NumPy to TensorStore indexing conventions.""" 40 | # https://google.github.io/tensorstore/python/indexing.html#differences-compared-to-numpy-indexing 41 | if index is None: 42 | return None 43 | elif isinstance(index, int): 44 | # Negative integers do not count from the end in TensorStore 45 | return index + size if index < 0 else index 46 | elif isinstance(index, slice): 47 | start = _numpy_to_tensorstore_index(index.start, size) 48 | stop = _numpy_to_tensorstore_index(index.stop, size) 49 | if stop is not None: 50 | # TensorStore does not allow out of bounds slicing 51 | stop = min(stop, size) 52 | return slice(start, stop, index.step) 53 | else: 54 | assert isinstance(index, np.ndarray) 55 | return np.where(index < 0, index + size, index) 56 | 57 | 58 | @dataclasses.dataclass(frozen=True) 59 | class _TensorStoreAdapter(indexing.ExplicitlyIndexed): 60 | """TensorStore array that can be wrapped by xarray.Variable. 61 | 62 | We use Xarray's semi-internal ExplicitlyIndexed API so that Xarray will not 63 | attempt to load our array into memory as a NumPy array. In the future, this 64 | should be supported by public Xarray APIs, as part of the refactor discussed 65 | in: https://github.com/pydata/xarray/issues/3981 66 | """ 67 | 68 | array: tensorstore.TensorStore 69 | future: Optional[tensorstore.Future] = None 70 | 71 | @property 72 | def shape(self) -> tuple[int, ...]: 73 | return self.array.shape 74 | 75 | @property 76 | def dtype(self) -> np.dtype: 77 | return self.array.dtype.numpy_dtype 78 | 79 | @property 80 | def ndim(self) -> int: 81 | return len(self.shape) 82 | 83 | @property 84 | def size(self) -> int: 85 | return math.prod(self.shape) 86 | 87 | def __getitem__(self, key: indexing.ExplicitIndexer) -> _TensorStoreAdapter: 88 | index_tuple = tuple(map(_numpy_to_tensorstore_index, key.tuple, self.shape)) 89 | if isinstance(key, indexing.OuterIndexer): 90 | # TODO(shoyer): fix this for newer versions of Xarray. 91 | # We get the error message: 92 | # AttributeError: '_TensorStoreAdapter' object has no attribute 'oindex' 93 | indexed = self.array.oindex[index_tuple] 94 | elif isinstance(key, indexing.VectorizedIndexer): 95 | indexed = self.array.vindex[index_tuple] 96 | else: 97 | assert isinstance(key, indexing.BasicIndexer) 98 | indexed = self.array[index_tuple] 99 | # Translate to the origin so repeated indexing is relative to the new bounds 100 | # like NumPy, not absolute like TensorStore 101 | translated = indexed[tensorstore.d[:].translate_to[0]] 102 | return type(self)(translated) 103 | 104 | def __setitem__(self, key: indexing.ExplicitIndexer, value) -> None: 105 | index_tuple = tuple(map(_numpy_to_tensorstore_index, key.tuple, self.shape)) 106 | if isinstance(key, indexing.OuterIndexer): 107 | self.array.oindex[index_tuple] = value 108 | elif isinstance(key, indexing.VectorizedIndexer): 109 | self.array.vindex[index_tuple] = value 110 | else: 111 | assert isinstance(key, indexing.BasicIndexer) 112 | self.array[index_tuple] = value 113 | # Invalidate the future so that the next read will pick up the new value 114 | object.__setattr__(self, 'future', None) 115 | 116 | # xarray>2024.02.0 uses oindex and vindex properties, which are expected to 117 | # return objects whose __getitem__ method supports the appropriate form of 118 | # indexing. 119 | @property 120 | def oindex(self) -> _TensorStoreAdapter: 121 | return self 122 | 123 | @property 124 | def vindex(self) -> _TensorStoreAdapter: 125 | return self 126 | 127 | def transpose(self, order: tuple[int, ...]) -> _TensorStoreAdapter: 128 | transposed = self.array[tensorstore.d[order].transpose[:]] 129 | return type(self)(transposed) 130 | 131 | def read(self) -> _TensorStoreAdapter: 132 | future = self.array.read() 133 | return type(self)(self.array, future) 134 | 135 | def __array__(self, dtype: Optional[np.dtype] = None) -> np.ndarray: # type: ignore 136 | future = self.array.read() if self.future is None else self.future 137 | return np.asarray(future.result(), dtype=dtype) 138 | 139 | def get_duck_array(self): 140 | # special method for xarray to return an in-memory (computed) representation 141 | return np.asarray(self) 142 | 143 | # Work around the missing __copy__ and __deepcopy__ methods from TensorStore, 144 | # which are needed for Xarray: 145 | # https://github.com/google/tensorstore/issues/109 146 | # TensorStore objects are immutable, so there's no need to actually copy them. 147 | 148 | def __copy__(self) -> _TensorStoreAdapter: 149 | return type(self)(self.array, self.future) 150 | 151 | def __deepcopy__(self, memo) -> _TensorStoreAdapter: 152 | return self.__copy__() 153 | 154 | 155 | def _read_tensorstore( 156 | array: indexing.ExplicitlyIndexed, 157 | ) -> indexing.ExplicitlyIndexed: 158 | """Starts async reading on a TensorStore array.""" 159 | return array.read() if isinstance(array, _TensorStoreAdapter) else array 160 | 161 | 162 | def read(xarraydata: XarrayData, /) -> XarrayData: 163 | """Starts async reads on all TensorStore arrays.""" 164 | # pylint: disable=protected-access 165 | if isinstance(xarraydata, xarray.Dataset): 166 | data = { 167 | name: _read_tensorstore(var.variable._data) 168 | for name, var in xarraydata.data_vars.items() 169 | } 170 | elif isinstance(xarraydata, xarray.DataArray): 171 | data = _read_tensorstore(xarraydata.variable._data) 172 | else: 173 | raise TypeError(f'argument is not a DataArray or Dataset: {xarraydata}') 174 | # pylint: enable=protected-access 175 | return xarraydata.copy(data=data) 176 | 177 | 178 | _DEFAULT_STORAGE_DRIVER = 'file' 179 | 180 | 181 | def _zarr_spec_from_path(path: str, zarr_format: int) -> ...: 182 | if re.match(r'\w+\://', path): # path is a URI 183 | kv_store = path 184 | else: 185 | kv_store = {'driver': _DEFAULT_STORAGE_DRIVER, 'path': path} 186 | return {'driver': f'zarr{zarr_format}', 'kvstore': kv_store} 187 | 188 | 189 | def _raise_if_mask_and_scale_used_for_data_vars(ds: xarray.Dataset): 190 | """Check a dataset for data variables that would need masking or scaling.""" 191 | advice = ( 192 | 'Consider re-opening with xarray_tensorstore.open_zarr(..., ' 193 | 'mask_and_scale=False), or falling back to use xarray.open_zarr().' 194 | ) 195 | for k in ds: 196 | encoding = ds[k].encoding 197 | for attr in ['_FillValue', 'missing_value']: 198 | fill_value = encoding.get(attr, np.nan) 199 | if fill_value == fill_value: # pylint: disable=comparison-with-itself 200 | raise ValueError( 201 | f'variable {k} has non-NaN fill value, which is not supported by' 202 | f' xarray-tensorstore: {fill_value}. {advice}' 203 | ) 204 | for attr in ['scale_factor', 'add_offset']: 205 | if attr in encoding: 206 | raise ValueError( 207 | f'variable {k} uses scale/offset encoding, which is not supported' 208 | f' by xarray-tensorstore: {encoding}. {advice}' 209 | ) 210 | 211 | 212 | def _get_zarr_format(path: str) -> int: 213 | """Returns the Zarr format of the given path.""" 214 | if packaging.version.parse(zarr.__version__).major >= 3: 215 | return zarr.open_group(path, mode='r').metadata.zarr_format 216 | else: 217 | return 2 218 | 219 | 220 | def _open_tensorstore_arrays( 221 | path: str, 222 | names: list[str], 223 | group: zarr.Group | None, 224 | zarr_format: int, 225 | write: bool, 226 | context: tensorstore.Context | None = None, 227 | ) -> dict[str, tensorstore.Future]: 228 | """Open all arrays in a Zarr group using TensorStore.""" 229 | specs = { 230 | k: _zarr_spec_from_path(os.path.join(path, k), zarr_format) for k in names 231 | } 232 | 233 | assume_metadata = False 234 | if packaging.version.parse(zarr.__version__).major >= 3 and group is not None: 235 | consolidated_metadata = group.metadata.consolidated_metadata 236 | if consolidated_metadata is not None: 237 | assume_metadata = True 238 | for name in names: 239 | metadata = consolidated_metadata.metadata[name].to_dict() 240 | metadata.pop('attributes', None) # not supported by TensorStore 241 | specs[name]['metadata'] = metadata 242 | 243 | array_futures = {} 244 | for k, spec in specs.items(): 245 | array_futures[k] = tensorstore.open( 246 | spec, 247 | read=True, 248 | write=write, 249 | open=True, 250 | context=context, 251 | assume_metadata=assume_metadata, 252 | ) 253 | return array_futures 254 | 255 | 256 | def open_zarr( 257 | path: str, 258 | *, 259 | context: tensorstore.Context | None = None, 260 | mask_and_scale: bool = True, 261 | write: bool = False, 262 | consolidated: bool | None = None, 263 | ) -> xarray.Dataset: 264 | """Open an xarray.Dataset from Zarr using TensorStore. 265 | 266 | For best performance, explicitly call `read()` to asynchronously load data 267 | in parallel. Otherwise, xarray's `.compute()` method will load each variable's 268 | data in sequence. 269 | 270 | Example usage: 271 | 272 | import xarray_tensorstore 273 | 274 | ds = xarray_tensorstore.open_zarr(path) 275 | 276 | # indexing & transposing is lazy 277 | example = ds.sel(time='2020-01-01').transpose('longitude', 'latitude', ...) 278 | 279 | # start reading data asynchronously 280 | read_example = xarray_tensorstore.read(example) 281 | 282 | # blocking conversion of the data into NumPy arrays 283 | numpy_example = read_example.compute() 284 | 285 | Args: 286 | path: path or URI to Zarr group to open. 287 | context: TensorStore configuration options to use when opening arrays. 288 | mask_and_scale: if True (default), attempt to apply masking and scaling like 289 | xarray.open_zarr(). This is only supported for coordinate variables and 290 | otherwise will raise an error. 291 | write: Allow write access. Defaults to False. 292 | consolidated: If True, read consolidated metadata. By default, an attempt to 293 | use consolidated metadata is made with a fallback to non-consolidated 294 | metadata, like in Xarray. 295 | 296 | Returns: 297 | Dataset with all data variables opened via TensorStore. 298 | """ 299 | # We use xarray.open_zarr (which uses Zarr Python internally) to open the 300 | # initial version of the dataset for a few reasons: 301 | # 1. TensorStore does not support Zarr groups or array attributes, which we 302 | # need to open in the xarray.Dataset. We use Zarr Python instead of 303 | # parsing the raw Zarr metadata files ourselves. 304 | # 2. TensorStore doesn't support non-standard Zarr dtypes like UTF-8 strings. 305 | # 3. Xarray's open_zarr machinery does some pre-processing (e.g., from numeric 306 | # to datetime64 dtypes) that we would otherwise need to invoke explicitly 307 | # via xarray.decode_cf(). 308 | # 309 | # Fortunately (2) and (3) are most commonly encountered on small coordinate 310 | # arrays, for which the performance advantages of TensorStore are irrelevant. 311 | 312 | if context is None: 313 | context = tensorstore.Context() 314 | 315 | # Open Xarray's backends.ZarrStore directly so we can get access to the 316 | # underlying Zarr group's consolidated metadata. 317 | store = xarray.backends.ZarrStore.open_group( 318 | path, consolidated=consolidated 319 | ) 320 | group = store.zarr_group 321 | ds = xarray.open_dataset( 322 | filename_or_obj='', # ignored in favor of store= 323 | chunks=None, # avoid using dask 324 | mask_and_scale=mask_and_scale, 325 | store=store, 326 | engine='zarr', 327 | ) 328 | 329 | if mask_and_scale: 330 | # Data variables get replaced below with _TensorStoreAdapter arrays, which 331 | # don't get masked or scaled. Raising an error avoids surprising users with 332 | # incorrect data values. 333 | _raise_if_mask_and_scale_used_for_data_vars(ds) 334 | 335 | zarr_format = _get_zarr_format(path) 336 | array_futures = _open_tensorstore_arrays( 337 | path, list(ds), group, zarr_format, write=write, context=context 338 | ) 339 | arrays = {k: v.result() for k, v in array_futures.items()} 340 | new_data = {k: _TensorStoreAdapter(v) for k, v in arrays.items()} 341 | 342 | return ds.copy(data=new_data) 343 | 344 | 345 | def _tensorstore_open_concatenated_zarrs( 346 | paths: list[str], 347 | data_vars: list[str], 348 | concat_axes: list[int], 349 | context: tensorstore.Context, 350 | ) -> dict[str, tensorstore.TensorStore]: 351 | """Open multiple zarrs with TensorStore. 352 | 353 | Args: 354 | paths: List of paths to zarr stores. 355 | data_vars: List of data variable names to open. 356 | concat_axes: List of axes along which to concatenate the data variables. 357 | context: TensorStore context. 358 | 359 | Returns: 360 | Dictionary of data variable names to concatenated TensorStore arrays. 361 | """ 362 | # Open all arrays in all datasets using tensorstore 363 | arrays_list = [] 364 | for path in paths: 365 | zarr_format = _get_zarr_format(path) 366 | # TODO(shoyer): Figure out how to support opening concatenated Zarrs with 367 | # consolidated metadata. xarray.open_mfdataset() doesn't support opening 368 | # from an existing store, so we'd have to replicate that functionality for 369 | # figuring out the structure of the concatenated dataset. 370 | group = None 371 | array_futures = _open_tensorstore_arrays( 372 | path, data_vars, group, zarr_format, write=False, context=context 373 | ) 374 | arrays_list.append(array_futures) 375 | 376 | # Concatenate the tensorstore arrays 377 | arrays = {} 378 | for k, axis in zip(data_vars, concat_axes, strict=True): 379 | datasets = [array_futures[k].result() for array_futures in arrays_list] 380 | arrays[k] = tensorstore.concat(datasets, axis=axis) 381 | 382 | return arrays 383 | 384 | 385 | def open_concatenated_zarrs( 386 | paths: list[str], 387 | concat_dim: str, 388 | *, 389 | context: tensorstore.Context | None = None, 390 | mask_and_scale: bool = True, 391 | ) -> xarray.Dataset: 392 | """Open an xarray.Dataset whilst concatenating multiple Zarr using TensorStore. 393 | 394 | Notes: 395 | This function depends on the Dask package. 396 | 397 | Args: 398 | paths: List of paths to zarr stores. 399 | concat_dim: Dimension along which to concatenate the data variables. 400 | context: TensorStore context. 401 | mask_and_scale: Whether to mask and scale the data. 402 | 403 | Returns: 404 | Concatentated Dataset with all data variables opened via TensorStore. 405 | """ 406 | if context is None: 407 | context = tensorstore.Context() 408 | 409 | ds = xarray.open_mfdataset( 410 | paths, 411 | concat_dim=concat_dim, 412 | combine='nested', 413 | mask_and_scale=mask_and_scale, 414 | engine='zarr', 415 | ) 416 | 417 | if mask_and_scale: 418 | # Data variables get replaced below with _TensorStoreAdapter arrays, which 419 | # don't get masked or scaled. Raising an error avoids surprising users with 420 | # incorrect data values. 421 | _raise_if_mask_and_scale_used_for_data_vars(ds) 422 | 423 | data_vars = list(ds.data_vars) 424 | concat_axes = [ds[v].dims.index(concat_dim) for v in data_vars] 425 | arrays = _tensorstore_open_concatenated_zarrs( 426 | paths, data_vars, concat_axes, context 427 | ) 428 | new_data = {k: _TensorStoreAdapter(v) for k, v in arrays.items()} 429 | 430 | return ds.copy(data=new_data) 431 | --------------------------------------------------------------------------------