├── .github └── workflows │ ├── ci.yml │ ├── lint.yml │ └── publish.yml ├── .gitignore ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── perf_tests ├── README.md ├── compute_air.py ├── compute_air.py-2024-02-26T22:57:59+07:00.svg ├── compute_air.py-2024-03-03T07:42:36+05:30.svg ├── compute_air.py-2024-03-03T07:50:18+05:30.svg ├── groupby_air.py ├── groupby_air.py-2024-02-15T13:01:22+07:00.svg ├── groupby_air.py-2024-02-15T15:03:53+07:00.svg ├── groupby_air.py-2024-02-17T18:42:14+07:00.svg ├── groupby_air.py-2024-02-26T23:04:34+07:00.svg ├── groupby_air.py-2024-03-03T07:43:22+05:30.svg ├── groupby_air.py-2024-03-03T07:50:06+05:30.svg ├── groupby_air_full.py ├── groupby_air_full.py-2024-02-26T23:06:38+07:00.svg ├── groupby_air_full.py-2024-03-03T07:44:17+05:30.svg ├── groupby_air_full.py-2024-03-03T07:49:53+05:30.svg ├── open_era5.py ├── open_era5.py-2024-02-18T17:21:44+07:00.svg ├── open_era5.py-2024-02-18T17:33:02+07:00.svg ├── open_era5.py-2024-02-19T17:52:51+07:00.svg ├── profile.sh ├── sanity.py ├── sanity.py-2024-02-15T09:08:28+07:00.svg ├── sanity.py-2024-02-15T09:15:07+07:00.svg ├── sanity.py-2024-02-15T12:51:37+07:00.svg ├── sanity.py-2024-02-15T15:04:48+07:00.svg ├── sanity.py-2024-02-17T18:42:08+07:00.svg ├── sanity.py-2024-02-26T23:00:00+07:00.svg ├── sanity.py-2024-03-03T07:45:47+05:30.svg └── sanity.py-2024-03-03T07:49:44+05:30.svg ├── pyproject.toml └── xarray_sql ├── __init__.py ├── core.py ├── df.py ├── df_integrationtest.py ├── df_test.py ├── sql.py └── sql_test.py /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | # Taken from Xee and minimally modified: https://github.com/google/Xee/blob/main/.github/workflows/ci-build.yml 2 | # 3 | # Copyright 2023 Google LLC 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # https://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # ============================================================================== 17 | name: ci 18 | 19 | on: 20 | # Triggers the workflow on push or pull request events but only for the main branch 21 | push: 22 | branches: [ main ] 23 | pull_request: 24 | branches: [ main ] 25 | # Allows you to run this workflow manually from the Actions tab 26 | workflow_dispatch: 27 | 28 | jobs: 29 | build: 30 | name: "python ${{ matrix.python-version }} tests" 31 | runs-on: ubuntu-latest 32 | strategy: 33 | fail-fast: false 34 | matrix: 35 | python-version: [ 36 | "3.8", 37 | "3.9", 38 | "3.10", 39 | "3.11", 40 | "3.12", 41 | ] 42 | steps: 43 | - name: Cancel previous 44 | uses: styfle/cancel-workflow-action@0.7.0 45 | with: 46 | access_token: ${{ github.token }} 47 | if: ${{github.ref != 'refs/head/main'}} 48 | - uses: actions/checkout@v2 49 | - name: Set up Python ${{ matrix.python-version }} 50 | uses: actions/setup-python@v4 51 | with: 52 | python-version: ${{ matrix.python-version }} 53 | cache: 'pip' 54 | - name: Install xarray_sql 55 | run: | 56 | pip install -e .[test] 57 | - uses: 'actions/checkout@v4' 58 | - name: Run unit tests 59 | run: | 60 | pytest xarray_sql -------------------------------------------------------------------------------- /.github/workflows/lint.yml: -------------------------------------------------------------------------------- 1 | # Taken from Xee and minimally modified: https://github.com/google/Xee/blob/main/.github/workflows/lint.yml 2 | # 3 | # Copyright 2023 Google LLC 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # https://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # ============================================================================== 17 | name: lint 18 | 19 | on: 20 | # Triggers the workflow on push or pull request events but only for the main branch 21 | push: 22 | branches: [ main ] 23 | pull_request: 24 | branches: [ main ] 25 | # Allows you to run this workflow manually from the Actions tab 26 | workflow_dispatch: 27 | 28 | jobs: 29 | build: 30 | name: "python ${{ matrix.python-version }} lint" 31 | runs-on: ubuntu-latest 32 | strategy: 33 | fail-fast: false 34 | matrix: 35 | python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] 36 | steps: 37 | - name: Cancel previous 38 | uses: styfle/cancel-workflow-action@0.7.0 39 | with: 40 | access_token: ${{ github.token }} 41 | if: ${{github.ref != 'refs/head/main'}} 42 | - uses: actions/checkout@v2 43 | - name: Set up Python ${{ matrix.python-version }} 44 | uses: actions/setup-python@v2 45 | with: 46 | python-version: ${{ matrix.python-version }} 47 | - name: Install linter 48 | run: | 49 | pip install pyink 50 | - name: Lint with pyink 51 | run: | 52 | pyink --check . -------------------------------------------------------------------------------- /.github/workflows/publish.yml: -------------------------------------------------------------------------------- 1 | # Taken from Xee and minimally modified: https://github.com/google/Xee/blob/main/.github/workflows/publish.yml 2 | # 3 | # Copyright 2023 Google LLC 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # https://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | name: Publish to PyPi 17 | 18 | on: 19 | release: 20 | types: [published] 21 | 22 | jobs: 23 | build-artifacts: 24 | runs-on: ubuntu-latest 25 | steps: 26 | - uses: actions/checkout@v2 27 | - name: Set up Python 28 | uses: actions/setup-python@v2.3.1 29 | with: 30 | python-version: 3.9 31 | 32 | - name: Install dependencies 33 | run: | 34 | python -m pip install --upgrade pip 35 | python -m pip install setuptools setuptools-scm wheel twine check-manifest 36 | 37 | - name: Build tarball and wheels 38 | run: | 39 | git clean -xdf 40 | git restore -SW . 41 | python -m build --sdist --wheel . 42 | - name: Check built artifacts 43 | run: | 44 | python -m twine check dist/* 45 | pwd 46 | - uses: actions/upload-artifact@v2 47 | with: 48 | name: releases 49 | path: dist 50 | 51 | test-built-dist: 52 | needs: build-artifacts 53 | runs-on: ubuntu-latest 54 | steps: 55 | - uses: actions/setup-python@v2.3.1 56 | name: Install Python 57 | with: 58 | python-version: 3.9 59 | - uses: actions/download-artifact@v2 60 | with: 61 | name: releases 62 | path: dist 63 | - name: List contents of built dist 64 | run: | 65 | ls -ltrh 66 | ls -ltrh dist 67 | - name: Publish package to TestPyPI 68 | if: github.event_name == 'push' 69 | uses: pypa/gh-action-pypi-publish@v1.4.2 70 | with: 71 | user: __token__ 72 | password: ${{ secrets.TESTPYPI_TOKEN }} 73 | repository_url: https://test.pypi.org/legacy/ 74 | verbose: true 75 | 76 | - name: Check uploaded package 77 | if: github.event_name == 'push' 78 | run: | 79 | sleep 3 80 | python -m pip install --upgrade pip 81 | python -m pip install --extra-index-url https://test.pypi.org/simple --upgrade de 82 | python -c "import qarray; print(qarray.__version__)" 83 | upload-to-pypi: 84 | needs: test-built-dist 85 | if: github.event_name == 'release' 86 | runs-on: ubuntu-latest 87 | steps: 88 | - uses: actions/download-artifact@v2 89 | with: 90 | name: releases 91 | path: dist 92 | - name: Publish package to PyPI 93 | uses: pypa/gh-action-pypi-publish@v1.4.2 94 | with: 95 | user: __token__ 96 | password: ${{ secrets.PYPI_TOKEN }} 97 | verbose: true -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.egg-info 2 | .DS_Store 3 | build 4 | dist 5 | __pycache__ 6 | .pytest_cache -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing Guide 2 | 3 | ## Where to start? 4 | 5 | Please check out the [issues tab](https://github.com/alxmrs/xarray-sql/issues). 6 | Let's have a discussion over there before proceeding with any changes. Great 7 | minds think alike -- someone may have already created an issue related to your 8 | inquiry. If there's a bug, please let us know. 9 | 10 | If you're totally new to open source development, I recommend 11 | reading [Xarray's contributing guide](https://docs.xarray.dev/en/stable/contributing.html). 12 | 13 | ## Developer setup 14 | 15 | 0. (Recommended) Create a project-specific python 16 | environment. [(mini)Conda](https://docs.anaconda.com/free/miniconda/index.html) 17 | or [Mamba](https://mamba.readthedocs.io/en/latest/) 18 | is preferred. 19 | 1. Clone the repository (bonus: [via SSH](https://docs.github.com/en/authentication/connecting-to-github-with-ssh/adding-a-new-ssh-key-to-your-github-account)) 20 | and `cd xarray_sql` (the project root). 21 | 1. Install dev dependencies via: `pip install -e ".[dev]` 22 | 23 | 24 | ## Before submitting a pull request... 25 | 26 | Thanks so much for your contribution! For a volunteer led project, we so 27 | appreciate your help. A few things to keep in mind: 28 | - Please be nice. We assume good intent from you, and we ask you to do the same for us. 29 | - Development in this project will be slow if not sporadic. Reviews will come 30 | as time allows. 31 | - Every contribution, big or small, matters and deserves credit. 32 | 33 | Here are a few requests for your development process: 34 | - We require all code to be formatted with `pyink`. 35 | - Please include unit tests, if possible, and performance tests when you touch the core functionality (see `perf_tests/`). 36 | - It's polite to do a self review before asking for one from a maintainer. Don't stress if you forget; we all do sometimes. 37 | - Please add (or update) documentation when adding new code. We use Google Style docstrings. 38 | - We are thrilled to get documentation-only PRs -- especially spelling and typo fixes (I am a bad speller). If writing tutorials excites you, it would be to everyone's benefit. 39 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # xarray-sql 2 | 3 | _Query Xarray with SQL_ 4 | 5 | [![ci](https://github.com/alxmrs/xarray-sql/actions/workflows/ci.yml/badge.svg)](https://github.com/alxmrs/xarray-sql/actions/workflows/ci.yml) 6 | [![lint](https://github.com/alxmrs/xarray-sql/actions/workflows/lint.yml/badge.svg)](https://github.com/alxmrs/xarray-sql/actions/workflows/lint.yml) 7 | 8 | ```shell 9 | pip install xarray-sql 10 | ``` 11 | 12 | ## What is this? 13 | 14 | This is an experiment to provide a SQL interface for raster data. 15 | 16 | ```python 17 | import xarray as xr 18 | import xarray_sql as qr 19 | 20 | ds = xr.tutorial.open_dataset('air_temperature') 21 | 22 | # The same as a dask-sql Context; i.e. an Apache DataFusion Context. 23 | c = qr.Context(ds) 24 | c.create_table('air', ds, chunks=dict(time=24)) 25 | 26 | df = c.sql(''' 27 | SELECT 28 | "lat", "lon", AVG("air") as air_total 29 | FROM 30 | "air" 31 | GROUP BY 32 | "lat", "lon" 33 | ''') 34 | 35 | # A table of the average temperature for each location across time. 36 | df.compute() 37 | 38 | # Alternatively, you can just create the DataFrame from the Dataset: 39 | df = qr.read_xarray(ds) 40 | df.head() 41 | ``` 42 | 43 | Succinctly, we "pivot" Xarray Datasets to treat them like tables so we can run 44 | SQL queries against them. 45 | 46 | ## Why build this? 47 | 48 | A few reasons: 49 | 50 | * Even though SQL is the lingua franca of data, scientific datasets are often 51 | inaccessible to non-scientists (SQL users). 52 | * Joining tabular data with raster data is common yet difficult. It could be 53 | easy. 54 | * There are many cloud-native, Xarray-openable datasets, 55 | from [Google Earth Engine](https://github.com/google/Xee) 56 | to [Pangeo Forge](https://pangeo-forge.org/). Wouldn’t it be great if these 57 | were also SQL-accessible? How can the bridge be built with minimal effort? 58 | 59 | This is a light-weight way to prove the value of the interface. 60 | 61 | The larger goal is to explore the hypothesis that [Pangeo](https://pangeo.io/) 62 | is a scientific database. Here, xarray-sql can be thought of as a missing DB 63 | front end. 64 | 65 | ## How does it work? 66 | 67 | All chunks in an Xarray Dataset are transformed into a Dask DataFrame via 68 | `from_map()` and `to_dataframe()`. For SQL support, we just use `dask-sql`. 69 | That's it! 70 | 71 | ## Why does this work? 72 | 73 | Underneath Xarray, Dask, and Pandas, there are NumPy arrays. These are paged in 74 | chunks and represented contiguously in memory. It is only a matter of metadata 75 | that breaks them up into ndarrays. `to_dataframe()` 76 | just changes this metadata (via a `ravel()`/`reshape()`), back into a column 77 | amenable to a DataFrame. 78 | 79 | There is added overhead from duplicating dimensions as columns, which we see as 80 | worth the convenience of DataFrames. 81 | 82 | ## What are the current limitations? 83 | 84 | Dask doesn't support 85 | `MultiIndex`s ([dask/dask#1493](https://github.com/dask/dask/issues/1493)). If 86 | it did, I suspect performance for many types of queries would greatly improve. 87 | 88 | Further, while this does play well with `dask-geopandas` (for geospatial query 89 | support), certain types of operations don't quite match standard geopandas. 90 | Spatial joins come to mind as a killer feature, but only inner joins are 91 | supported ([geopandas/dask-geopandas#72](https://github.com/geopandas/dask-geopandas/issues/72)) 92 | . 93 | 94 | ## What would a deeper integration look like? 95 | 96 | I have a few ideas so far. One approach involves applying operations directly on 97 | Xarray Datasets. This approach is being pursued 98 | [here](https://github.com/google/weather-tools/tree/main/xql), as `xql`. 99 | 100 | Deeper still: I was thinking we could make 101 | a [virtual](https://fsspec.github.io/kerchunk/) 102 | filesystem for parquet that would internally map to Zarr. Raster-backed virtual 103 | parquet would open up integrations to numerous tools like dask, pyarrow, duckdb, 104 | and BigQuery. More thoughts on this 105 | in [#4](https://github.com/alxmrs/xarray-sql/issues/4). 106 | 107 | ## Sponsors & Contributors 108 | 109 | I want to give a special thanks to the following folks and institutions: 110 | 111 | - Pramod Gupta and the Anthromet Team at Google Research for the problem 112 | formation and design inspiration. 113 | - Jake Wall and AI2/Ecoscope for compute resources and key use cases. 114 | - Charles Stern, Stephan Hoyer, Alexander Kmoch, Wei Ji, and Qiusheng Wu 115 | for the early review and discussion of this project. 116 | 117 | ## License 118 | 119 | ``` 120 | Copyright 2024 Alexander Merose 121 | 122 | Licensed under the Apache License, Version 2.0 (the "License"); 123 | you may not use this file except in compliance with the License. 124 | You may obtain a copy of the License at 125 | 126 | https://www.apache.org/licenses/LICENSE-2.0 127 | 128 | Unless required by applicable law or agreed to in writing, software 129 | distributed under the License is distributed on an "AS IS" BASIS, 130 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 131 | See the License for the specific language governing permissions and 132 | limitations under the License. 133 | ``` 134 | 135 | Some sources are re-distributed from Google LLC 136 | via https://github.com/google/Xee (also Apache-2.0 License) with and without 137 | modification (specifically, Github Actions workflows). These files are subject 138 | to the original copyright; they include the original license header comment as 139 | well as a note to indicate modifications (when appropriate). 140 | -------------------------------------------------------------------------------- /perf_tests/README.md: -------------------------------------------------------------------------------- 1 | # Performance testing & profiling 2 | 3 | So far, this includes statistical profiles via py-spy. 4 | 5 | ## Dev Process 6 | 7 | 1. Run a profile test with the `profile.sh` script as so: 8 | 9 | ```shell 10 | # PROFILE_CASE_PY=groupby_air.py 11 | sudo ./profile.sh $PROFILE_CASE_PY 12 | ``` 13 | 14 | This will open a flame graph in the browser. 15 | 16 | 2. After tuning code in xarray-sql, run another profile to generate a SVG. 17 | 18 | 3. Please commit the "after" profile SVG along with the performance improvements. 19 | 20 | -------------------------------------------------------------------------------- /perf_tests/compute_air.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import xarray as xr 4 | import xarray_sql as qr 5 | 6 | if __name__ == '__main__': 7 | air = xr.tutorial.open_dataset('air_temperature') 8 | chunks = {'time': 240} 9 | air = air.chunk(chunks) 10 | 11 | df = qr.read_xarray(air).compute() 12 | 13 | print(len(df)) 14 | -------------------------------------------------------------------------------- /perf_tests/groupby_air.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import xarray as xr 4 | import xarray_sql as qr 5 | from dask_sql import Context 6 | 7 | 8 | if __name__ == '__main__': 9 | air = xr.tutorial.open_dataset('air_temperature') 10 | chunks = {'time': 240, 'lat': 5, 'lon': 7} 11 | air = air.chunk(chunks) 12 | air_small = air.isel( 13 | time=slice(0, 12), lat=slice(0, 11), lon=slice(0, 10) 14 | ).chunk(chunks) 15 | 16 | df = qr.read_xarray(air_small) 17 | 18 | c = Context() 19 | c.create_table('air', df) 20 | 21 | query = c.sql( 22 | """ 23 | SELECT 24 | "lat", "lon", SUM("air") as air_total 25 | FROM 26 | "air" 27 | GROUP BY 28 | "lat", "lon" 29 | """ 30 | ) 31 | 32 | result = query.compute() 33 | 34 | expected = air_small.dims['lat'] * air_small.dims['lon'] 35 | assert ( 36 | len(result) == expected 37 | ), f'Length must be {expected}, but was {len(result)}.' 38 | print(expected) 39 | -------------------------------------------------------------------------------- /perf_tests/groupby_air_full.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import xarray as xr 4 | import xarray_sql as qr 5 | from dask_sql import Context 6 | 7 | 8 | if __name__ == '__main__': 9 | air = xr.tutorial.open_dataset('air_temperature') 10 | chunks = {'time': 240} 11 | air = air.chunk(chunks) 12 | 13 | df = qr.read_xarray(air) 14 | 15 | c = Context() 16 | c.create_table('air', df) 17 | 18 | query = c.sql( 19 | """ 20 | SELECT 21 | "lat", "lon", SUM("air") as air_total 22 | FROM 23 | "air" 24 | GROUP BY 25 | "lat", "lon" 26 | """ 27 | ) 28 | 29 | result = query.compute() 30 | 31 | expected = air.dims['lat'] * air.dims['lon'] 32 | assert ( 33 | len(result) == expected 34 | ), f'Length must be {expected}, but was {len(result)}.' 35 | print(expected) 36 | -------------------------------------------------------------------------------- /perf_tests/open_era5.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import xarray as xr 4 | import xarray_sql as qr 5 | 6 | # Requires authenticating with GCP 7 | era5_ds = xr.open_zarr( 8 | 'gs://gcp-public-data-arco-era5/ar/1959-2022-full_37-1h-0p25deg-chunk-1.zarr-v2', 9 | chunks={'time': 240, 'level': 1}, 10 | ) 11 | era5_wind_df = qr.read_xarray( 12 | era5_ds[['u_component_of_wind', 'v_component_of_wind']] 13 | ) 14 | 15 | print(era5_wind_df.columns) 16 | -------------------------------------------------------------------------------- /perf_tests/open_era5.py-2024-02-18T17:33:02+07:00.svg: -------------------------------------------------------------------------------- 1 | py-spy record ./open_era5.py --function --threads Reset ZoomSearch <module> (qarray/core.py:1) (4 samples, 0.12%)_find_and_load (<frozen importlib._bootstrap>:1002) (4 samples, 0.12%)_find_and_load_unlocked (<frozen importlib._bootstrap>:967) (4 samples, 0.12%)_call_with_frames_removed (<frozen importlib._bootstrap>:220) (4 samples, 0.12%)_find_and_load (<frozen importlib._bootstrap>:1002) (4 samples, 0.12%)_find_and_load_unlocked (<frozen importlib._bootstrap>:967) (4 samples, 0.12%)exec_module (<frozen importlib._bootstrap_external>:844) (7 samples, 0.21%)_call_with_frames_removed (<frozen importlib._bootstrap>:220) (7 samples, 0.21%)<module> (scipy/sparse/csgraph/_laplacian.py:1) (7 samples, 0.21%)_find_and_load (<frozen importlib._bootstrap>:1002) (7 samples, 0.21%)_find_and_load_unlocked (<frozen importlib._bootstrap>:967) (7 samples, 0.21%)_load_unlocked (<frozen importlib._bootstrap>:659) (7 samples, 0.21%)exec_module (<frozen importlib._bootstrap_external>:844) (7 samples, 0.21%)_call_with_frames_removed (<frozen importlib._bootstrap>:220) (7 samples, 0.21%)<module> (scipy/sparse/linalg/__init__.py:1) (7 samples, 0.21%)_find_and_load (<frozen importlib._bootstrap>:1002) (7 samples, 0.21%)_find_and_load_unlocked (<frozen importlib._bootstrap>:967) (7 samples, 0.21%)_load_unlocked (<frozen importlib._bootstrap>:659) (7 samples, 0.21%)exec_module (<frozen importlib._bootstrap_external>:844) (7 samples, 0.21%)_call_with_frames_removed (<frozen importlib._bootstrap>:220) (7 samples, 0.21%)<module> (scipy/sparse/linalg/_isolve/__init__.py:1) (7 samples, 0.21%)_find_and_load (<frozen importlib._bootstrap>:1002) (7 samples, 0.21%)_find_and_load_unlocked (<frozen importlib._bootstrap>:967) (7 samples, 0.21%)_load_unlocked (<frozen importlib._bootstrap>:659) (7 samples, 0.21%)exec_module (<frozen importlib._bootstrap_external>:844) (7 samples, 0.21%)_call_with_frames_removed (<frozen importlib._bootstrap>:220) (7 samples, 0.21%)<module> (scipy/sparse/linalg/_isolve/lgmres.py:4) (4 samples, 0.12%)_find_and_load (<frozen importlib._bootstrap>:1002) (4 samples, 0.12%)_find_and_load_unlocked (<frozen importlib._bootstrap>:967) (4 samples, 0.12%)_load_unlocked (<frozen importlib._bootstrap>:659) (4 samples, 0.12%)exec_module (<frozen importlib._bootstrap_external>:844) (4 samples, 0.12%)_call_with_frames_removed (<frozen importlib._bootstrap>:220) (4 samples, 0.12%)<module> (scipy/linalg/__init__.py:1) (4 samples, 0.12%)_find_and_load (<frozen importlib._bootstrap>:1002) (4 samples, 0.12%)_find_and_load_unlocked (<frozen importlib._bootstrap>:967) (4 samples, 0.12%)_load_unlocked (<frozen importlib._bootstrap>:659) (4 samples, 0.12%)<module> (dask/array/chunk_types.py:1) (11 samples, 0.33%)_find_and_load (<frozen importlib._bootstrap>:1002) (11 samples, 0.33%)_find_and_load_unlocked (<frozen importlib._bootstrap>:967) (11 samples, 0.33%)_load_unlocked (<frozen importlib._bootstrap>:659) (10 samples, 0.30%)exec_module (<frozen importlib._bootstrap_external>:844) (10 samples, 0.30%)_call_with_frames_removed (<frozen importlib._bootstrap>:220) (10 samples, 0.30%)<module> (scipy/sparse/__init__.py:1) (10 samples, 0.30%)_handle_fromlist (<frozen importlib._bootstrap>:1033) (8 samples, 0.24%)_call_with_frames_removed (<frozen importlib._bootstrap>:220) (8 samples, 0.24%)_find_and_load (<frozen importlib._bootstrap>:1002) (8 samples, 0.24%)_find_and_load_unlocked (<frozen importlib._bootstrap>:967) (8 samples, 0.24%)_load_unlocked (<frozen importlib._bootstrap>:659) (8 samples, 0.24%)exec_module (<frozen importlib._bootstrap_external>:844) (8 samples, 0.24%)_call_with_frames_removed (<frozen importlib._bootstrap>:220) (8 samples, 0.24%)<module> (scipy/sparse/csgraph/__init__.py:1) (8 samples, 0.24%)_find_and_load (<frozen importlib._bootstrap>:1002) (8 samples, 0.24%)_find_and_load_unlocked (<frozen importlib._bootstrap>:967) (8 samples, 0.24%)_load_unlocked (<frozen importlib._bootstrap>:659) (8 samples, 0.24%)<module> (dask/array/backends.py:1) (12 samples, 0.37%)_find_and_load (<frozen importlib._bootstrap>:1002) (12 samples, 0.37%)_find_and_load_unlocked (<frozen importlib._bootstrap>:967) (12 samples, 0.37%)_load_unlocked (<frozen importlib._bootstrap>:659) (12 samples, 0.37%)exec_module (<frozen importlib._bootstrap_external>:844) (12 samples, 0.37%)_call_with_frames_removed (<frozen importlib._bootstrap>:220) (12 samples, 0.37%)<module> (dask/array/core.py:1) (12 samples, 0.37%)_find_and_load (<frozen importlib._bootstrap>:1002) (12 samples, 0.37%)_find_and_load_unlocked (<frozen importlib._bootstrap>:967) (12 samples, 0.37%)_load_unlocked (<frozen importlib._bootstrap>:659) (12 samples, 0.37%)exec_module (<frozen importlib._bootstrap_external>:844) (12 samples, 0.37%)_call_with_frames_removed (<frozen importlib._bootstrap>:220) (12 samples, 0.37%)<module> (dask/array/fft.py:1) (4 samples, 0.12%)_find_and_load (<frozen importlib._bootstrap>:1002) (4 samples, 0.12%)_find_and_load_unlocked (<frozen importlib._bootstrap>:967) (4 samples, 0.12%)_load_unlocked (<frozen importlib._bootstrap>:659) (4 samples, 0.12%)exec_module (<frozen importlib._bootstrap_external>:844) (4 samples, 0.12%)_call_with_frames_removed (<frozen importlib._bootstrap>:220) (22 samples, 0.67%)<module> (dask/array/ma.py:1) (6 samples, 0.18%)_find_and_load (<frozen importlib._bootstrap>:1002) (6 samples, 0.18%)_find_and_load_unlocked (<frozen importlib._bootstrap>:967) (6 samples, 0.18%)_load_unlocked (<frozen importlib._bootstrap>:659) (6 samples, 0.18%)exec_module (<frozen importlib._bootstrap_external>:844) (6 samples, 0.18%)_call_with_frames_removed (<frozen importlib._bootstrap>:220) (6 samples, 0.18%)_call_with_frames_removed (<frozen importlib._bootstrap>:220) (23 samples, 0.70%)_find_and_load (<frozen importlib._bootstrap>:1002) (23 samples, 0.70%)_find_and_load_unlocked (<frozen importlib._bootstrap>:967) (23 samples, 0.70%)_load_unlocked (<frozen importlib._bootstrap>:659) (23 samples, 0.70%)exec_module (<frozen importlib._bootstrap_external>:844) (23 samples, 0.70%)_call_with_frames_removed (<frozen importlib._bootstrap>:220) (23 samples, 0.70%)<module> (dask/array/__init__.py:1) (23 samples, 0.70%)_handle_fromlist (<frozen importlib._bootstrap>:1033) (23 samples, 0.70%)_call_with_frames_removed (<frozen importlib._bootstrap>:220) (23 samples, 0.70%)_find_and_load (<frozen importlib._bootstrap>:1002) (23 samples, 0.70%)_find_and_load_unlocked (<frozen importlib._bootstrap>:967) (23 samples, 0.70%)_load_unlocked (<frozen importlib._bootstrap>:659) (23 samples, 0.70%)exec_module (<frozen importlib._bootstrap_external>:844) (23 samples, 0.70%)_find_and_load (<frozen importlib._bootstrap>:1002) (4 samples, 0.12%)_find_and_load_unlocked (<frozen importlib._bootstrap>:967) (4 samples, 0.12%)_load_unlocked (<frozen importlib._bootstrap>:659) (4 samples, 0.12%)exec_module (<frozen importlib._bootstrap_external>:844) (4 samples, 0.12%)_call_with_frames_removed (<frozen importlib._bootstrap>:220) (4 samples, 0.12%)<module> (dask/bag/__init__.py:1) (4 samples, 0.12%)_find_and_load (<frozen importlib._bootstrap>:1002) (4 samples, 0.12%)_find_and_load_unlocked (<frozen importlib._bootstrap>:967) (4 samples, 0.12%)_load_unlocked (<frozen importlib._bootstrap>:659) (4 samples, 0.12%)exec_module (<frozen importlib._bootstrap_external>:844) (4 samples, 0.12%)<module> (dask/dataframe/backends.py:1) (36 samples, 1.10%)_find_and_load (<frozen importlib._bootstrap>:1002) (36 samples, 1.10%)_find_and_load_unlocked (<frozen importlib._bootstrap>:967) (36 samples, 1.10%)_load_unlocked (<frozen importlib._bootstrap>:659) (13 samples, 0.40%)exec_module (<frozen importlib._bootstrap_external>:844) (13 samples, 0.40%)_call_with_frames_removed (<frozen importlib._bootstrap>:220) (13 samples, 0.40%)<module> (dask/dataframe/core.py:1) (13 samples, 0.40%)<module> (qarray/__init__.py:1) (44 samples, 1.34%)_find_and_load (<frozen importlib._bootstrap>:1002) (44 samples, 1.34%)_find_and_load_unlocked (<frozen importlib._bootstrap>:967) (44 samples, 1.34%)_load_unlocked (<frozen importlib._bootstrap>:659) (44 samples, 1.34%)exec_module (<frozen importlib._bootstrap_external>:844) (44 samples, 1.34%)_call_with_frames_removed (<frozen importlib._bootstrap>:220) (44 samples, 1.34%)<module> (qarray/df.py:1) (40 samples, 1.22%)_find_and_load (<frozen importlib._bootstrap>:1002) (40 samples, 1.22%)_find_and_load_unlocked (<frozen importlib._bootstrap>:967) (40 samples, 1.22%)_load_unlocked (<frozen importlib._bootstrap>:659) (40 samples, 1.22%)exec_module (<frozen importlib._bootstrap_external>:844) (40 samples, 1.22%)_call_with_frames_removed (<frozen importlib._bootstrap>:220) (40 samples, 1.22%)<module> (dask/dataframe/__init__.py:1) (40 samples, 1.22%)_handle_fromlist (<frozen importlib._bootstrap>:1033) (38 samples, 1.16%)_call_with_frames_removed (<frozen importlib._bootstrap>:220) (38 samples, 1.16%)_find_and_load (<frozen importlib._bootstrap>:1002) (38 samples, 1.16%)_find_and_load_unlocked (<frozen importlib._bootstrap>:967) (38 samples, 1.16%)_load_unlocked (<frozen importlib._bootstrap>:659) (38 samples, 1.16%)exec_module (<frozen importlib._bootstrap_external>:844) (38 samples, 1.16%)_call_with_frames_removed (<frozen importlib._bootstrap>:220) (38 samples, 1.16%)<module> (numpy/__init__.py:1) (8 samples, 0.24%)_handle_fromlist (<frozen importlib._bootstrap>:1033) (5 samples, 0.15%)_call_with_frames_removed (<frozen importlib._bootstrap>:220) (5 samples, 0.15%)_find_and_load (<frozen importlib._bootstrap>:1002) (5 samples, 0.15%)_find_and_load_unlocked (<frozen importlib._bootstrap>:967) (5 samples, 0.15%)_load_unlocked (<frozen importlib._bootstrap>:659) (5 samples, 0.15%)exec_module (<frozen importlib._bootstrap_external>:844) (5 samples, 0.15%)_call_with_frames_removed (<frozen importlib._bootstrap>:220) (5 samples, 0.15%)<module> (pandas/compat/__init__.py:1) (19 samples, 0.58%)_find_and_load (<frozen importlib._bootstrap>:1002) (19 samples, 0.58%)_find_and_load_unlocked (<frozen importlib._bootstrap>:967) (19 samples, 0.58%)_load_unlocked (<frozen importlib._bootstrap>:659) (19 samples, 0.58%)exec_module (<frozen importlib._bootstrap_external>:844) (19 samples, 0.58%)_call_with_frames_removed (<frozen importlib._bootstrap>:220) (18 samples, 0.55%)<module> (pandas/compat/pyarrow.py:1) (18 samples, 0.55%)_find_and_load (<frozen importlib._bootstrap>:1002) (18 samples, 0.55%)_find_and_load_unlocked (<frozen importlib._bootstrap>:967) (18 samples, 0.55%)_load_unlocked (<frozen importlib._bootstrap>:659) (18 samples, 0.55%)exec_module (<frozen importlib._bootstrap_external>:844) (18 samples, 0.55%)_call_with_frames_removed (<frozen importlib._bootstrap>:220) (18 samples, 0.55%)<module> (pyarrow/__init__.py:20) (18 samples, 0.55%)_find_and_load (<frozen importlib._bootstrap>:1002) (18 samples, 0.55%)_find_and_load_unlocked (<frozen importlib._bootstrap>:967) (18 samples, 0.55%)_load_unlocked (<frozen importlib._bootstrap>:659) (17 samples, 0.52%)module_from_spec (<frozen importlib._bootstrap>:558) (16 samples, 0.49%)create_module (<frozen importlib._bootstrap_external>:1171) (16 samples, 0.49%)_call_with_frames_removed (<frozen importlib._bootstrap>:220) (16 samples, 0.49%)exec_module (<frozen importlib._bootstrap_external>:1179) (4 samples, 0.12%)_call_with_frames_removed (<frozen importlib._bootstrap>:220) (4 samples, 0.12%)_find_and_load (<frozen importlib._bootstrap>:1002) (4 samples, 0.12%)_find_and_load_unlocked (<frozen importlib._bootstrap>:967) (4 samples, 0.12%)_load_unlocked (<frozen importlib._bootstrap>:659) (4 samples, 0.12%)exec_module (<frozen importlib._bootstrap_external>:1179) (4 samples, 0.12%)_call_with_frames_removed (<frozen importlib._bootstrap>:220) (4 samples, 0.12%)_find_and_load (<frozen importlib._bootstrap>:1002) (4 samples, 0.12%)_find_and_load_unlocked (<frozen importlib._bootstrap>:967) (4 samples, 0.12%)_load_unlocked (<frozen importlib._bootstrap>:659) (4 samples, 0.12%)exec_module (<frozen importlib._bootstrap_external>:1179) (4 samples, 0.12%)_call_with_frames_removed (<frozen importlib._bootstrap>:220) (4 samples, 0.12%)_find_and_load (<frozen importlib._bootstrap>:1002) (4 samples, 0.12%)_find_and_load_unlocked (<frozen importlib._bootstrap>:967) (4 samples, 0.12%)_call_with_frames_removed (<frozen importlib._bootstrap>:220) (4 samples, 0.12%)_find_and_load (<frozen importlib._bootstrap>:1002) (4 samples, 0.12%)_find_and_load_unlocked (<frozen importlib._bootstrap>:967) (4 samples, 0.12%)_load_unlocked (<frozen importlib._bootstrap>:659) (4 samples, 0.12%)exec_module (<frozen importlib._bootstrap_external>:844) (4 samples, 0.12%)_call_with_frames_removed (<frozen importlib._bootstrap>:220) (4 samples, 0.12%)<module> (pandas/_libs/tslibs/__init__.py:1) (4 samples, 0.12%)_find_and_load (<frozen importlib._bootstrap>:1002) (4 samples, 0.12%)_find_and_load_unlocked (<frozen importlib._bootstrap>:967) (4 samples, 0.12%)_load_unlocked (<frozen importlib._bootstrap>:659) (4 samples, 0.12%)<module> (pandas/_libs/__init__.py:1) (6 samples, 0.18%)_find_and_load (<frozen importlib._bootstrap>:1002) (6 samples, 0.18%)_find_and_load_unlocked (<frozen importlib._bootstrap>:967) (6 samples, 0.18%)_load_unlocked (<frozen importlib._bootstrap>:659) (6 samples, 0.18%)_decorate_compute_function (pyarrow/compute.py:120) (4 samples, 0.12%)<module> (pandas/core/arrays/_arrow_string_mixins.py:1) (9 samples, 0.27%)_find_and_load (<frozen importlib._bootstrap>:1002) (9 samples, 0.27%)_find_and_load_unlocked (<frozen importlib._bootstrap>:967) (9 samples, 0.27%)_load_unlocked (<frozen importlib._bootstrap>:659) (9 samples, 0.27%)exec_module (<frozen importlib._bootstrap_external>:844) (9 samples, 0.27%)_call_with_frames_removed (<frozen importlib._bootstrap>:220) (9 samples, 0.27%)<module> (pyarrow/compute.py:18) (9 samples, 0.27%)_make_global_functions (pyarrow/compute.py:306) (5 samples, 0.15%)_wrap_function (pyarrow/compute.py:290) (5 samples, 0.15%)<module> (pandas/core/arrays/__init__.py:1) (13 samples, 0.40%)_find_and_load (<frozen importlib._bootstrap>:1002) (13 samples, 0.40%)_find_and_load_unlocked (<frozen importlib._bootstrap>:967) (13 samples, 0.40%)_load_unlocked (<frozen importlib._bootstrap>:659) (13 samples, 0.40%)exec_module (<frozen importlib._bootstrap_external>:844) (13 samples, 0.40%)_call_with_frames_removed (<frozen importlib._bootstrap>:220) (13 samples, 0.40%)<module> (pandas/core/arrays/arrow/__init__.py:1) (13 samples, 0.40%)_find_and_load (<frozen importlib._bootstrap>:1002) (13 samples, 0.40%)_find_and_load_unlocked (<frozen importlib._bootstrap>:967) (13 samples, 0.40%)_load_unlocked (<frozen importlib._bootstrap>:659) (13 samples, 0.40%)exec_module (<frozen importlib._bootstrap_external>:844) (13 samples, 0.40%)_call_with_frames_removed (<frozen importlib._bootstrap>:220) (13 samples, 0.40%)<module> (pandas/core/arrays/arrow/array.py:1) (13 samples, 0.40%)_find_and_load (<frozen importlib._bootstrap>:1002) (13 samples, 0.40%)_find_and_load_unlocked (<frozen importlib._bootstrap>:967) (13 samples, 0.40%)_load_unlocked (<frozen importlib._bootstrap>:659) (13 samples, 0.40%)exec_module (<frozen importlib._bootstrap_external>:844) (13 samples, 0.40%)_call_with_frames_removed (<frozen importlib._bootstrap>:220) (13 samples, 0.40%)<module> (pandas/core/methods/describe.py:1) (4 samples, 0.12%)_find_and_load (<frozen importlib._bootstrap>:1002) (4 samples, 0.12%)_find_and_load_unlocked (<frozen importlib._bootstrap>:967) (4 samples, 0.12%)_load_unlocked (<frozen importlib._bootstrap>:659) (4 samples, 0.12%)exec_module (<frozen importlib._bootstrap_external>:844) (4 samples, 0.12%)get_code (<frozen importlib._bootstrap_external>:916) (4 samples, 0.12%)<module> (pandas/core/generic.py:2) (8 samples, 0.24%)_find_and_load (<frozen importlib._bootstrap>:1002) (7 samples, 0.21%)_find_and_load_unlocked (<frozen importlib._bootstrap>:967) (7 samples, 0.21%)_load_unlocked (<frozen importlib._bootstrap>:659) (7 samples, 0.21%)exec_module (<frozen importlib._bootstrap_external>:844) (7 samples, 0.21%)_call_with_frames_removed (<frozen importlib._bootstrap>:220) (7 samples, 0.21%)_find_and_load (<frozen importlib._bootstrap>:1002) (9 samples, 0.27%)_find_and_load_unlocked (<frozen importlib._bootstrap>:967) (9 samples, 0.27%)_load_unlocked (<frozen importlib._bootstrap>:659) (9 samples, 0.27%)exec_module (<frozen importlib._bootstrap_external>:844) (9 samples, 0.27%)_call_with_frames_removed (<frozen importlib._bootstrap>:220) (9 samples, 0.27%)<module> (pandas/core/frame.py:1) (9 samples, 0.27%)_find_and_load (<frozen importlib._bootstrap>:1002) (9 samples, 0.27%)_find_and_load_unlocked (<frozen importlib._bootstrap>:967) (9 samples, 0.27%)_load_unlocked (<frozen importlib._bootstrap>:659) (9 samples, 0.27%)exec_module (<frozen importlib._bootstrap_external>:844) (9 samples, 0.27%)_call_with_frames_removed (<frozen importlib._bootstrap>:220) (9 samples, 0.27%)<module> (pandas/core/api.py:1) (31 samples, 0.94%)_find_and_load (<frozen importlib._bootstrap>:1002) (31 samples, 0.94%)_find_and_load_unlocked (<frozen importlib._bootstrap>:967) (31 samples, 0.94%)_load_unlocked (<frozen importlib._bootstrap>:659) (31 samples, 0.94%)exec_module (<frozen importlib._bootstrap_external>:844) (31 samples, 0.94%)_call_with_frames_removed (<frozen importlib._bootstrap>:220) (31 samples, 0.94%)<module> (pandas/core/groupby/__init__.py:1) (10 samples, 0.30%)_find_and_load (<frozen importlib._bootstrap>:1002) (10 samples, 0.30%)_find_and_load_unlocked (<frozen importlib._bootstrap>:967) (10 samples, 0.30%)_load_unlocked (<frozen importlib._bootstrap>:659) (10 samples, 0.30%)exec_module (<frozen importlib._bootstrap_external>:844) (10 samples, 0.30%)_call_with_frames_removed (<frozen importlib._bootstrap>:220) (10 samples, 0.30%)<module> (pandas/core/groupby/generic.py:1) (10 samples, 0.30%)_find_and_load (<frozen importlib._bootstrap>:1002) (55 samples, 1.67%)_find_and_load_unlocked (<frozen importlib._bootstrap>:967) (55 samples, 1.67%)_load_unlocked (<frozen importlib._bootstrap>:659) (53 samples, 1.61%)exec_module (<frozen importlib._bootstrap_external>:844) (53 samples, 1.61%)_call_with_frames_removed (<frozen importlib._bootstrap>:220) (53 samples, 1.61%)<module> (xarray/testing.py:1) (64 samples, 1.95%)<.._find_and_load (<frozen importlib._bootstrap>:1002) (64 samples, 1.95%)_.._find_and_load_unlocked (<frozen importlib._bootstrap>:967) (64 samples, 1.95%)_.._load_unlocked (<frozen importlib._bootstrap>:659) (64 samples, 1.95%)_..exec_module (<frozen importlib._bootstrap_external>:844) (64 samples, 1.95%)e.._call_with_frames_removed (<frozen importlib._bootstrap>:220) (64 samples, 1.95%)_..<module> (pandas/__init__.py:1) (56 samples, 1.70%)<module> (dask/base.py:1) (4 samples, 0.12%)_find_and_load (<frozen importlib._bootstrap>:1002) (6 samples, 0.18%)_find_and_load_unlocked (<frozen importlib._bootstrap>:967) (6 samples, 0.18%)_load_unlocked (<frozen importlib._bootstrap>:659) (6 samples, 0.18%)exec_module (<frozen importlib._bootstrap_external>:844) (6 samples, 0.18%)_call_with_frames_removed (<frozen importlib._bootstrap>:220) (6 samples, 0.18%)_call_with_frames_removed (<frozen importlib._bootstrap>:220) (9 samples, 0.27%)<module> (xarray/backends/file_manager.py:1) (9 samples, 0.27%)_find_and_load (<frozen importlib._bootstrap>:1002) (9 samples, 0.27%)_find_and_load_unlocked (<frozen importlib._bootstrap>:967) (9 samples, 0.27%)_load_unlocked (<frozen importlib._bootstrap>:659) (9 samples, 0.27%)exec_module (<frozen importlib._bootstrap_external>:844) (9 samples, 0.27%)_call_with_frames_removed (<frozen importlib._bootstrap>:220) (9 samples, 0.27%)<module> (xarray/backends/locks.py:1) (9 samples, 0.27%)_find_and_load (<frozen importlib._bootstrap>:1002) (9 samples, 0.27%)_find_and_load_unlocked (<frozen importlib._bootstrap>:967) (9 samples, 0.27%)_call_with_frames_removed (<frozen importlib._bootstrap>:220) (9 samples, 0.27%)_find_and_load (<frozen importlib._bootstrap>:1002) (9 samples, 0.27%)_find_and_load_unlocked (<frozen importlib._bootstrap>:967) (9 samples, 0.27%)_load_unlocked (<frozen importlib._bootstrap>:659) (9 samples, 0.27%)exec_module (<frozen importlib._bootstrap_external>:844) (9 samples, 0.27%)_call_with_frames_removed (<frozen importlib._bootstrap>:220) (9 samples, 0.27%)<module> (dask/__init__.py:1) (9 samples, 0.27%)exec_module (<frozen importlib._bootstrap_external>:844) (10 samples, 0.30%)_handle_fromlist (<frozen importlib._bootstrap>:1033) (77 samples, 2.34%)_.._call_with_frames_removed (<frozen importlib._bootstrap>:220) (77 samples, 2.34%)_.._find_and_load (<frozen importlib._bootstrap>:1002) (77 samples, 2.34%)_.._find_and_load_unlocked (<frozen importlib._bootstrap>:967) (77 samples, 2.34%)_.._load_unlocked (<frozen importlib._bootstrap>:659) (77 samples, 2.34%)_..exec_module (<frozen importlib._bootstrap_external>:844) (77 samples, 2.34%)e.._call_with_frames_removed (<frozen importlib._bootstrap>:220) (77 samples, 2.34%)_..<module> (xarray/tutorial.py:1) (13 samples, 0.40%)_find_and_load (<frozen importlib._bootstrap>:1002) (13 samples, 0.40%)_find_and_load_unlocked (<frozen importlib._bootstrap>:967) (13 samples, 0.40%)_call_with_frames_removed (<frozen importlib._bootstrap>:220) (13 samples, 0.40%)_find_and_load (<frozen importlib._bootstrap>:1002) (13 samples, 0.40%)_find_and_load_unlocked (<frozen importlib._bootstrap>:967) (13 samples, 0.40%)_load_unlocked (<frozen importlib._bootstrap>:659) (11 samples, 0.33%)exec_module (<frozen importlib._bootstrap_external>:844) (11 samples, 0.33%)_call_with_frames_removed (<frozen importlib._bootstrap>:220) (11 samples, 0.33%)<module> (xarray/backends/__init__.py:1) (11 samples, 0.33%)_find_and_load (<frozen importlib._bootstrap>:1002) (11 samples, 0.33%)_find_and_load_unlocked (<frozen importlib._bootstrap>:967) (11 samples, 0.33%)_load_unlocked (<frozen importlib._bootstrap>:659) (11 samples, 0.33%)_find_and_load (<frozen importlib._bootstrap>:1002) (122 samples, 3.71%)_fin.._find_and_load_unlocked (<frozen importlib._bootstrap>:967) (122 samples, 3.71%)_fin.._load_unlocked (<frozen importlib._bootstrap>:659) (122 samples, 3.71%)_loa..exec_module (<frozen importlib._bootstrap_external>:844) (122 samples, 3.71%)exec.._call_with_frames_removed (<frozen importlib._bootstrap>:220) (122 samples, 3.71%)_cal..<module> (xarray/__init__.py:1) (78 samples, 2.37%)<m.._get_chunk (xarray/core/dataset.py:221) (25 samples, 0.76%)_dataset_from_backend_dataset (xarray/backends/api.py:350) (33 samples, 1.00%)_chunk_ds (xarray/backends/api.py:308) (33 samples, 1.00%)_maybe_chunk (xarray/core/dataset.py:279) (8 samples, 0.24%)tokenize (dask/base.py:1026) (7 samples, 0.21%)__call__ (dask/utils.py:762) (7 samples, 0.21%)normalize_dict (dask/base.py:1064) (7 samples, 0.21%)__call__ (dask/utils.py:762) (7 samples, 0.21%)normalize_seq (dask/base.py:1095) (7 samples, 0.21%)_normalize_seq_func (dask/base.py:1079) (7 samples, 0.21%)__call__ (dask/utils.py:762) (7 samples, 0.21%)normalize_seq (dask/base.py:1095) (7 samples, 0.21%)_normalize_seq_func (dask/base.py:1079) (7 samples, 0.21%)__call__ (dask/utils.py:762) (7 samples, 0.21%)normalize_seq (dask/base.py:1095) (7 samples, 0.21%)_normalize_seq_func (dask/base.py:1079) (7 samples, 0.21%)__call__ (dask/utils.py:762) (5 samples, 0.15%)_find_and_load (<frozen importlib._bootstrap>:1002) (4 samples, 0.12%)_find_and_load_unlocked (<frozen importlib._bootstrap>:967) (4 samples, 0.12%)_load_unlocked (<frozen importlib._bootstrap>:659) (4 samples, 0.12%)exec_module (<frozen importlib._bootstrap_external>:844) (4 samples, 0.12%)_call_with_frames_removed (<frozen importlib._bootstrap>:220) (4 samples, 0.12%)<module> (requests/__init__.py:6) (4 samples, 0.12%)_find_and_load (<frozen importlib._bootstrap>:1002) (4 samples, 0.12%)_find_and_load_unlocked (<frozen importlib._bootstrap>:967) (4 samples, 0.12%)_load_unlocked (<frozen importlib._bootstrap>:659) (4 samples, 0.12%)exec_module (<frozen importlib._bootstrap_external>:844) (4 samples, 0.12%)_call_with_frames_removed (<frozen importlib._bootstrap>:220) (4 samples, 0.12%)<module> (ee/__init__.py:1) (15 samples, 0.46%)_handle_fromlist (<frozen importlib._bootstrap>:1033) (15 samples, 0.46%)_call_with_frames_removed (<frozen importlib._bootstrap>:220) (15 samples, 0.46%)_find_and_load (<frozen importlib._bootstrap>:1002) (15 samples, 0.46%)_find_and_load_unlocked (<frozen importlib._bootstrap>:967) (15 samples, 0.46%)_load_unlocked (<frozen importlib._bootstrap>:659) (15 samples, 0.46%)exec_module (<frozen importlib._bootstrap_external>:844) (15 samples, 0.46%)_call_with_frames_removed (<frozen importlib._bootstrap>:220) (15 samples, 0.46%)<module> (ee/batch.py:1) (15 samples, 0.46%)_handle_fromlist (<frozen importlib._bootstrap>:1033) (15 samples, 0.46%)_call_with_frames_removed (<frozen importlib._bootstrap>:220) (15 samples, 0.46%)_find_and_load (<frozen importlib._bootstrap>:1002) (15 samples, 0.46%)_find_and_load_unlocked (<frozen importlib._bootstrap>:967) (15 samples, 0.46%)_load_unlocked (<frozen importlib._bootstrap>:659) (15 samples, 0.46%)exec_module (<frozen importlib._bootstrap_external>:844) (15 samples, 0.46%)_call_with_frames_removed (<frozen importlib._bootstrap>:220) (15 samples, 0.46%)<module> (ee/_cloud_api_utils.py:1) (15 samples, 0.46%)_handle_fromlist (<frozen importlib._bootstrap>:1033) (11 samples, 0.33%)_call_with_frames_removed (<frozen importlib._bootstrap>:220) (11 samples, 0.33%)_find_and_load (<frozen importlib._bootstrap>:1002) (11 samples, 0.33%)_find_and_load_unlocked (<frozen importlib._bootstrap>:967) (11 samples, 0.33%)_load_unlocked (<frozen importlib._bootstrap>:659) (11 samples, 0.33%)exec_module (<frozen importlib._bootstrap_external>:844) (11 samples, 0.33%)_call_with_frames_removed (<frozen importlib._bootstrap>:220) (11 samples, 0.33%)<module> (googleapiclient/discovery.py:15) (11 samples, 0.33%)_handle_fromlist (<frozen importlib._bootstrap>:1033) (8 samples, 0.24%)_call_with_frames_removed (<frozen importlib._bootstrap>:220) (8 samples, 0.24%)_find_and_load (<frozen importlib._bootstrap>:1002) (8 samples, 0.24%)_find_and_load_unlocked (<frozen importlib._bootstrap>:967) (8 samples, 0.24%)_load_unlocked (<frozen importlib._bootstrap>:659) (8 samples, 0.24%)exec_module (<frozen importlib._bootstrap_external>:844) (8 samples, 0.24%)_call_with_frames_removed (<frozen importlib._bootstrap>:220) (8 samples, 0.24%)<module> (oauth2/service_account.py:15) (8 samples, 0.24%)_handle_fromlist (<frozen importlib._bootstrap>:1033) (8 samples, 0.24%)_call_with_frames_removed (<frozen importlib._bootstrap>:220) (8 samples, 0.24%)_find_and_load (<frozen importlib._bootstrap>:1002) (8 samples, 0.24%)_find_and_load_unlocked (<frozen importlib._bootstrap>:967) (8 samples, 0.24%)_load_unlocked (<frozen importlib._bootstrap>:659) (8 samples, 0.24%)exec_module (<frozen importlib._bootstrap_external>:844) (8 samples, 0.24%)_call_with_frames_removed (<frozen importlib._bootstrap>:220) (8 samples, 0.24%)<module> (auth/_service_account_info.py:15) (8 samples, 0.24%)_handle_fromlist (<frozen importlib._bootstrap>:1033) (8 samples, 0.24%)_call_with_frames_removed (<frozen importlib._bootstrap>:220) (8 samples, 0.24%)_find_and_load (<frozen importlib._bootstrap>:1002) (8 samples, 0.24%)_find_and_load_unlocked (<frozen importlib._bootstrap>:967) (8 samples, 0.24%)_load_unlocked (<frozen importlib._bootstrap>:659) (8 samples, 0.24%)exec_module (<frozen importlib._bootstrap_external>:844) (8 samples, 0.24%)_call_with_frames_removed (<frozen importlib._bootstrap>:220) (8 samples, 0.24%)<module> (auth/crypt/__init__.py:15) (8 samples, 0.24%)_handle_fromlist (<frozen importlib._bootstrap>:1033) (8 samples, 0.24%)_call_with_frames_removed (<frozen importlib._bootstrap>:220) (8 samples, 0.24%)_find_and_load (<frozen importlib._bootstrap>:1002) (8 samples, 0.24%)_find_and_load_unlocked (<frozen importlib._bootstrap>:967) (8 samples, 0.24%)_load_unlocked (<frozen importlib._bootstrap>:659) (8 samples, 0.24%)exec_module (<frozen importlib._bootstrap_external>:844) (8 samples, 0.24%)_call_with_frames_removed (<frozen importlib._bootstrap>:220) (8 samples, 0.24%)<module> (auth/crypt/rsa.py:15) (8 samples, 0.24%)_handle_fromlist (<frozen importlib._bootstrap>:1033) (8 samples, 0.24%)_call_with_frames_removed (<frozen importlib._bootstrap>:220) (8 samples, 0.24%)_find_and_load (<frozen importlib._bootstrap>:1002) (8 samples, 0.24%)_find_and_load_unlocked (<frozen importlib._bootstrap>:967) (8 samples, 0.24%)_load_unlocked (<frozen importlib._bootstrap>:659) (8 samples, 0.24%)exec_module (<frozen importlib._bootstrap_external>:844) (8 samples, 0.24%)_call_with_frames_removed (<frozen importlib._bootstrap>:220) (8 samples, 0.24%)<module> (auth/crypt/_python_rsa.py:15) (8 samples, 0.24%)_handle_fromlist (<frozen importlib._bootstrap>:1033) (5 samples, 0.15%)_call_with_frames_removed (<frozen importlib._bootstrap>:220) (5 samples, 0.15%)_find_and_load (<frozen importlib._bootstrap>:1002) (5 samples, 0.15%)_find_and_load_unlocked (<frozen importlib._bootstrap>:967) (5 samples, 0.15%)_load_unlocked (<frozen importlib._bootstrap>:659) (5 samples, 0.15%)exec_module (<frozen importlib._bootstrap_external>:844) (5 samples, 0.15%)_call_with_frames_removed (<frozen importlib._bootstrap>:220) (5 samples, 0.15%)<module> (pyasn1/codec/der/decoder.py:7) (5 samples, 0.15%)_handle_fromlist (<frozen importlib._bootstrap>:1033) (5 samples, 0.15%)_call_with_frames_removed (<frozen importlib._bootstrap>:220) (5 samples, 0.15%)_find_and_load (<frozen importlib._bootstrap>:1002) (5 samples, 0.15%)_find_and_load_unlocked (<frozen importlib._bootstrap>:967) (5 samples, 0.15%)_load_unlocked (<frozen importlib._bootstrap>:659) (5 samples, 0.15%)exec_module (<frozen importlib._bootstrap_external>:844) (5 samples, 0.15%)get_backend (xarray/backends/plugins.py:200) (21 samples, 0.64%)list_engines (xarray/backends/plugins.py:119) (21 samples, 0.64%)build_engines (xarray/backends/plugins.py:106) (21 samples, 0.64%)backends_dict_from_pkg (xarray/backends/plugins.py:70) (21 samples, 0.64%)load (importlib_metadata/__init__.py:178) (21 samples, 0.64%)import_module (importlib/__init__.py:109) (21 samples, 0.64%)_gcd_import (<frozen importlib._bootstrap>:1018) (21 samples, 0.64%)_find_and_load (<frozen importlib._bootstrap>:1002) (21 samples, 0.64%)_find_and_load_unlocked (<frozen importlib._bootstrap>:967) (21 samples, 0.64%)_load_unlocked (<frozen importlib._bootstrap>:659) (21 samples, 0.64%)exec_module (<frozen importlib._bootstrap_external>:844) (21 samples, 0.64%)_call_with_frames_removed (<frozen importlib._bootstrap>:220) (21 samples, 0.64%)<module> (xee/__init__.py:15) (21 samples, 0.64%)_find_and_load (<frozen importlib._bootstrap>:1002) (21 samples, 0.64%)_find_and_load_unlocked (<frozen importlib._bootstrap>:967) (21 samples, 0.64%)_load_unlocked (<frozen importlib._bootstrap>:659) (21 samples, 0.64%)exec_module (<frozen importlib._bootstrap_external>:844) (21 samples, 0.64%)_call_with_frames_removed (<frozen importlib._bootstrap>:220) (21 samples, 0.64%)<module> (xee/ext.py:15) (21 samples, 0.64%)_find_and_load (<frozen importlib._bootstrap>:1002) (21 samples, 0.64%)_find_and_load_unlocked (<frozen importlib._bootstrap>:967) (21 samples, 0.64%)_load_unlocked (<frozen importlib._bootstrap>:659) (21 samples, 0.64%)exec_module (<frozen importlib._bootstrap_external>:844) (21 samples, 0.64%)_call_with_frames_removed (<frozen importlib._bootstrap>:220) (21 samples, 0.64%)<module> (pyproj/__init__.py:1) (6 samples, 0.18%)_find_and_load (<frozen importlib._bootstrap>:1002) (4 samples, 0.12%)_find_and_load_unlocked (<frozen importlib._bootstrap>:967) (4 samples, 0.12%)_load_unlocked (<frozen importlib._bootstrap>:659) (4 samples, 0.12%)exec_module (<frozen importlib._bootstrap_external>:844) (4 samples, 0.12%)_call_with_frames_removed (<frozen importlib._bootstrap>:220) (4 samples, 0.12%)_call_with_frames_removed (<frozen importlib._bootstrap>:220) (4 samples, 0.12%)exec_module (<frozen importlib._bootstrap_external>:844) (5 samples, 0.15%)_find_and_load (<frozen importlib._bootstrap>:1002) (7 samples, 0.21%)_find_and_load_unlocked (<frozen importlib._bootstrap>:967) (7 samples, 0.21%)_load_unlocked (<frozen importlib._bootstrap>:659) (7 samples, 0.21%)exec_module (<frozen importlib._bootstrap_external>:844) (7 samples, 0.21%)_call_with_frames_removed (<frozen importlib._bootstrap>:220) (7 samples, 0.21%)<module> (zarr/__init__.py:2) (7 samples, 0.21%)_find_and_load (<frozen importlib._bootstrap>:1002) (7 samples, 0.21%)_find_and_load_unlocked (<frozen importlib._bootstrap>:967) (7 samples, 0.21%)_load_unlocked (<frozen importlib._bootstrap>:659) (6 samples, 0.18%)_find_and_load (<frozen importlib._bootstrap>:1002) (6 samples, 0.18%)_find_and_load_unlocked (<frozen importlib._bootstrap>:967) (6 samples, 0.18%)_load_unlocked (<frozen importlib._bootstrap>:659) (6 samples, 0.18%)exec_module (<frozen importlib._bootstrap_external>:844) (6 samples, 0.18%)_call_with_frames_removed (<frozen importlib._bootstrap>:220) (6 samples, 0.18%)<module> (fsspec/implementations/http.py:1) (7 samples, 0.21%)_find_and_load (<frozen importlib._bootstrap>:1002) (7 samples, 0.21%)_find_and_load_unlocked (<frozen importlib._bootstrap>:967) (7 samples, 0.21%)_load_unlocked (<frozen importlib._bootstrap>:659) (7 samples, 0.21%)exec_module (<frozen importlib._bootstrap_external>:844) (7 samples, 0.21%)_call_with_frames_removed (<frozen importlib._bootstrap>:220) (7 samples, 0.21%)<module> (aiohttp/__init__.py:1) (7 samples, 0.21%)_find_and_load (<frozen importlib._bootstrap>:1002) (7 samples, 0.21%)_find_and_load_unlocked (<frozen importlib._bootstrap>:967) (7 samples, 0.21%)_load_unlocked (<frozen importlib._bootstrap>:659) (7 samples, 0.21%)exec_module (<frozen importlib._bootstrap_external>:844) (7 samples, 0.21%)_call_with_frames_removed (<frozen importlib._bootstrap>:220) (7 samples, 0.21%)<module> (aiohttp/client.py:1) (7 samples, 0.21%)_call_with_frames_removed (<frozen importlib._bootstrap>:220) (15 samples, 0.46%)<module> (gcsfs/core.py:1) (15 samples, 0.46%)_find_and_load (<frozen importlib._bootstrap>:1002) (14 samples, 0.43%)_find_and_load_unlocked (<frozen importlib._bootstrap>:967) (14 samples, 0.43%)_load_unlocked (<frozen importlib._bootstrap>:659) (12 samples, 0.37%)exec_module (<frozen importlib._bootstrap_external>:844) (12 samples, 0.37%)_call_with_frames_removed (<frozen importlib._bootstrap>:220) (12 samples, 0.37%)_un_chain (fsspec/core.py:319) (18 samples, 0.55%)get_filesystem_class (fsspec/registry.py:216) (18 samples, 0.55%)_import_class (fsspec/registry.py:254) (18 samples, 0.55%)import_module (importlib/__init__.py:109) (18 samples, 0.55%)_gcd_import (<frozen importlib._bootstrap>:1018) (18 samples, 0.55%)_find_and_load (<frozen importlib._bootstrap>:1002) (18 samples, 0.55%)_find_and_load_unlocked (<frozen importlib._bootstrap>:967) (18 samples, 0.55%)_load_unlocked (<frozen importlib._bootstrap>:659) (18 samples, 0.55%)exec_module (<frozen importlib._bootstrap_external>:844) (18 samples, 0.55%)_call_with_frames_removed (<frozen importlib._bootstrap>:220) (18 samples, 0.55%)<module> (gcsfs/__init__.py:1) (18 samples, 0.55%)_find_and_load (<frozen importlib._bootstrap>:1002) (18 samples, 0.55%)_find_and_load_unlocked (<frozen importlib._bootstrap>:967) (18 samples, 0.55%)_load_unlocked (<frozen importlib._bootstrap>:659) (18 samples, 0.55%)exec_module (<frozen importlib._bootstrap_external>:844) (18 samples, 0.55%)open_zarr (xarray/backends/zarr.py:740) (83 samples, 2.53%)op..open_dataset (xarray/backends/api.py:392) (83 samples, 2.53%)op..open_dataset (xarray/backends/zarr.py:944) (29 samples, 0.88%)open_group (xarray/backends/zarr.py:374) (26 samples, 0.79%)open_consolidated (zarr/convenience.py:1281) (19 samples, 0.58%)normalize_store_arg (zarr/storage.py:184) (19 samples, 0.58%)_normalize_store_arg_v2 (zarr/storage.py:143) (19 samples, 0.58%)__init__ (zarr/storage.py:1344) (19 samples, 0.58%)get_mapper (fsspec/mapping.py:206) (19 samples, 0.58%)url_to_fs (fsspec/core.py:350) (19 samples, 0.58%)<listcomp> (qarray/df.py:67) (6 samples, 0.18%)<listcomp> (qarray/df.py:73) (59 samples, 1.79%)<.._block_len (qarray/df.py:66) (58 samples, 1.76%)prod (numpy/core/fromnumeric.py:2979) (44 samples, 1.34%)_wrapreduction (numpy/core/fromnumeric.py:71) (41 samples, 1.25%)<dictcomp> (qarray/df.py:37) (53 samples, 1.61%)_get_chunk_slicer (qarray/df.py:14) (51 samples, 1.55%)<genexpr> (qarray/df.py:33) (6 samples, 0.18%)block_slices (qarray/df.py:24) (65 samples, 1.98%)b..<genexpr> (qarray/df.py:36) (64 samples, 1.95%)<..collections_to_dsk (dask/base.py:417) (9 samples, 0.27%)optimize (dask/array/optimization.py:27) (9 samples, 0.27%)cull (dask/highlevelgraph.py:706) (9 samples, 0.27%)get_output_keys (dask/blockwise.py:478) (8 samples, 0.24%)<setcomp> (dask/blockwise.py:484) (8 samples, 0.24%)_find_and_load (<frozen importlib._bootstrap>:1002) (5 samples, 0.15%)_find_and_load_unlocked (<frozen importlib._bootstrap>:967) (5 samples, 0.15%)_load_unlocked (<frozen importlib._bootstrap>:659) (5 samples, 0.15%)exec_module (<frozen importlib._bootstrap_external>:844) (5 samples, 0.15%)_call_with_frames_removed (<frozen importlib._bootstrap>:220) (5 samples, 0.15%)<module> (distributed/comm/__init__.py:1) (5 samples, 0.15%)_register_transports (distributed/comm/__init__.py:19) (5 samples, 0.15%)_handle_fromlist (<frozen importlib._bootstrap>:1033) (5 samples, 0.15%)_call_with_frames_removed (<frozen importlib._bootstrap>:220) (5 samples, 0.15%)_find_and_load (<frozen importlib._bootstrap>:1002) (5 samples, 0.15%)_find_and_load_unlocked (<frozen importlib._bootstrap>:967) (5 samples, 0.15%)_load_unlocked (<frozen importlib._bootstrap>:659) (5 samples, 0.15%)exec_module (<frozen importlib._bootstrap_external>:844) (5 samples, 0.15%)_call_with_frames_removed (<frozen importlib._bootstrap>:220) (5 samples, 0.15%)<module> (distributed/core.py:1) (8 samples, 0.24%)_find_and_load (<frozen importlib._bootstrap>:1002) (9 samples, 0.27%)_find_and_load_unlocked (<frozen importlib._bootstrap>:967) (9 samples, 0.27%)_load_unlocked (<frozen importlib._bootstrap>:659) (9 samples, 0.27%)exec_module (<frozen importlib._bootstrap_external>:844) (9 samples, 0.27%)_call_with_frames_removed (<frozen importlib._bootstrap>:220) (9 samples, 0.27%)<module> (distributed/actor.py:1) (10 samples, 0.30%)_find_and_load (<frozen importlib._bootstrap>:1002) (10 samples, 0.30%)_find_and_load_unlocked (<frozen importlib._bootstrap>:967) (10 samples, 0.30%)_load_unlocked (<frozen importlib._bootstrap>:659) (10 samples, 0.30%)exec_module (<frozen importlib._bootstrap_external>:844) (10 samples, 0.30%)_call_with_frames_removed (<frozen importlib._bootstrap>:220) (10 samples, 0.30%)<module> (distributed/client.py:1) (10 samples, 0.30%)_find_and_load (<frozen importlib._bootstrap>:1002) (18 samples, 0.55%)_find_and_load_unlocked (<frozen importlib._bootstrap>:967) (18 samples, 0.55%)_load_unlocked (<frozen importlib._bootstrap>:659) (15 samples, 0.46%)exec_module (<frozen importlib._bootstrap_external>:844) (15 samples, 0.46%)_call_with_frames_removed (<frozen importlib._bootstrap>:220) (15 samples, 0.46%)<module> (distributed/deploy/__init__.py:1) (5 samples, 0.15%)_find_and_load (<frozen importlib._bootstrap>:1002) (5 samples, 0.15%)_find_and_load_unlocked (<frozen importlib._bootstrap>:967) (5 samples, 0.15%)_load_unlocked (<frozen importlib._bootstrap>:659) (5 samples, 0.15%)exec_module (<frozen importlib._bootstrap_external>:844) (5 samples, 0.15%)_call_with_frames_removed (<frozen importlib._bootstrap>:220) (5 samples, 0.15%)thread (0x2035FF240) (362 samples, 11.01%)thread (0x2035FF..<module> (open_era5.py:3) (362 samples, 11.01%)<module> (open_e..to_dd (qarray/df.py:70) (157 samples, 4.78%)to_dd ..from_map (dask/dataframe/io/io.py:849) (31 samples, 0.94%)_emulate (dask/dataframe/core.py:7167) (31 samples, 0.94%)f (qarray/df.py:76) (31 samples, 0.94%)to_pd (qarray/df.py:54) (31 samples, 0.94%)unbounded_unravel (qarray/core.py:24) (31 samples, 0.94%)values (xarray/core/dataarray.py:750) (31 samples, 0.94%)values (xarray/core/variable.py:613) (31 samples, 0.94%)_as_array_or_item (xarray/core/variable.py:295) (31 samples, 0.94%)__array__ (dask/array/core.py:1699) (31 samples, 0.94%)compute (dask/base.py:355) (31 samples, 0.94%)compute (dask/base.py:603) (31 samples, 0.94%)get_scheduler (dask/base.py:1449) (22 samples, 0.67%)_find_and_load (<frozen importlib._bootstrap>:1002) (22 samples, 0.67%)_find_and_load_unlocked (<frozen importlib._bootstrap>:967) (22 samples, 0.67%)_load_unlocked (<frozen importlib._bootstrap>:659) (22 samples, 0.67%)exec_module (<frozen importlib._bootstrap_external>:844) (22 samples, 0.67%)_call_with_frames_removed (<frozen importlib._bootstrap>:220) (22 samples, 0.67%)<module> (distributed/__init__.py:1) (22 samples, 0.67%)_handle_fromlist (<frozen importlib._bootstrap>:1033) (4 samples, 0.12%)_call_with_frames_removed (<frozen importlib._bootstrap>:220) (4 samples, 0.12%)_find_and_load (<frozen importlib._bootstrap>:1002) (4 samples, 0.12%)_find_and_load_unlocked (<frozen importlib._bootstrap>:967) (4 samples, 0.12%)_load_unlocked (<frozen importlib._bootstrap>:659) (4 samples, 0.12%)exec_module (<frozen importlib._bootstrap_external>:844) (4 samples, 0.12%)_call_with_frames_removed (<frozen importlib._bootstrap>:220) (4 samples, 0.12%)<module> (distributed/config.py:1) (4 samples, 0.12%)_process_events (asyncio/selector_events.py:592) (60 samples, 1.83%)_.._add_callback (asyncio/base_events.py:1812) (17 samples, 0.52%)_reschedule_timeout (aiohttp/client_proto.py:180) (27 samples, 0.82%)_call_soon (asyncio/base_events.py:770) (40 samples, 1.22%)__init__ (asyncio/events.py:31) (13 samples, 0.40%)get_debug (asyncio/base_events.py:1923) (4 samples, 0.12%)data_received (aiohttp/client_proto.py:201) (353 samples, 10.74%)data_received (a..feed_data (aiohttp/streams.py:232) (167 samples, 5.08%)feed_d..set_result (aiohttp/helpers.py:808) (81 samples, 2.46%)se..call_soon (asyncio/base_events.py:741) (54 samples, 1.64%)_check_closed (asyncio/base_events.py:513) (6 samples, 0.18%)do_handshake (ssl.py:943) (16 samples, 0.49%)_read_ready (asyncio/selector_events.py:810) (2,079 samples, 63.25%)_read_ready (asyncio/selector_events.py:810)_read_ready__data_received (asyncio/selector_events.py:850) (2,050 samples, 62.37%)_read_ready__data_received (asyncio/selector_events.py:850)data_received (asyncio/sslproto.py:524) (1,352 samples, 41.13%)data_received (asyncio/sslproto.py:524)feed_ssldata (asyncio/sslproto.py:156) (933 samples, 28.38%)feed_ssldata (asyncio/sslproto.py:156)read (ssl.py:880) (698 samples, 21.24%)read (ssl.py:880)feed_ssldata (asyncio/sslproto.py:156) (5 samples, 0.15%)do_handshake (ssl.py:943) (5 samples, 0.15%)connection_made (asyncio/sslproto.py:477) (6 samples, 0.18%)_start_handshake (asyncio/sslproto.py:606) (6 samples, 0.18%)_process_write_backlog (asyncio/sslproto.py:671) (6 samples, 0.18%)do_handshake (asyncio/sslproto.py:105) (6 samples, 0.18%)__aenter__ (aiohttp/client.py:1193) (8 samples, 0.24%)_request (aiohttp/client.py:383) (8 samples, 0.24%)_get_headers (gcsfs/core.py:394) (5 samples, 0.15%)apply (gcsfs/credentials.py:185) (5 samples, 0.15%)maybe_refresh (gcsfs/credentials.py:170) (5 samples, 0.15%)refresh (oauth2/credentials.py:375) (5 samples, 0.15%)refresh_grant (oauth2/reauth.py:281) (5 samples, 0.15%)_token_endpoint_request_no_throw (oauth2/_client.py:139) (5 samples, 0.15%)_perform_request (oauth2/_client.py:190) (5 samples, 0.15%)__call__ (auth/transport/requests.py:155) (5 samples, 0.15%)request (requests/sessions.py:502) (5 samples, 0.15%)send (requests/sessions.py:673) (4 samples, 0.12%)send (requests/adapters.py:434) (4 samples, 0.12%)urlopen (urllib3/connectionpool.py:595) (4 samples, 0.12%)_make_request (urllib3/connectionpool.py:380) (4 samples, 0.12%)_validate_conn (urllib3/connectionpool.py:1084) (4 samples, 0.12%)connect (urllib3/connection.py:609) (4 samples, 0.12%)_ssl_wrap_socket_and_match_hostname (urllib3/connection.py:708) (4 samples, 0.12%)_read_nowait_chunk (aiohttp/streams.py:463) (141 samples, 4.29%)_read.._read_nowait (aiohttp/streams.py:490) (262 samples, 7.97%)_read_nowai..assert_timeout (aiohttp/helpers.py:681) (7 samples, 0.21%)_call (gcsfs/core.py:433) (563 samples, 17.13%)_call (gcsfs/core.py:433)fun (decorator.py:218) (554 samples, 16.85%)fun (decorator.py:218)retry_request (gcsfs/retry.py:117) (539 samples, 16.40%)retry_request (gcsfs/retr.._request (gcsfs/core.py:411) (532 samples, 16.18%)_request (gcsfs/core.py:4..read (aiohttp/client_reqrep.py:1097) (512 samples, 15.58%)read (aiohttp/client_req..read (aiohttp/streams.py:345) (503 samples, 15.30%)read (aiohttp/streams.p..readany (aiohttp/streams.py:387) (439 samples, 13.36%)readany (aiohttp/str.._wait (aiohttp/streams.py:288) (104 samples, 3.16%)_wa..create_future (asyncio/base_events.py:427) (29 samples, 0.88%)get_debug (asyncio/base_events.py:1923) (4 samples, 0.12%)wait_for (asyncio/tasks.py:421) (578 samples, 17.58%)wait_for (asyncio/tasks.py:.._cat_file (gcsfs/core.py:1020) (569 samples, 17.31%)_cat_file (gcsfs/core.py:10.._run (asyncio/events.py:78) (2,752 samples, 83.72%)_run (asyncio/events.py:78)select (selectors.py:554) (9 samples, 0.27%)_key_from_fd (selectors.py:276) (9 samples, 0.27%)thread (0x3084A0000) (2,924 samples, 88.96%)thread (0x3084A0000)_bootstrap (threading.py:923) (2,924 samples, 88.96%)_bootstrap (threading.py:923)_bootstrap_inner (threading.py:963) (2,924 samples, 88.96%)_bootstrap_inner (threading.py:963)run (threading.py:906) (2,924 samples, 88.96%)run (threading.py:906)run_forever (asyncio/base_events.py:588) (2,924 samples, 88.96%)run_forever (asyncio/base_events.py:588)_run_once (asyncio/base_events.py:1830) (2,920 samples, 88.83%)_run_once (asyncio/base_events.py:1830)time (asyncio/base_events.py:694) (5 samples, 0.15%)all (3,287 samples, 100%) -------------------------------------------------------------------------------- /perf_tests/profile.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | py-spy record ./$1 --function --threads -------------------------------------------------------------------------------- /perf_tests/sanity.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import xarray as xr 4 | import xarray_sql as qr 5 | 6 | if __name__ == '__main__': 7 | air = xr.tutorial.open_dataset('air_temperature') 8 | chunks = {'time': 240, 'lat': 5, 'lon': 7} 9 | 10 | air_small = air.isel( 11 | time=slice(0, 12), lat=slice(0, 11), lon=slice(0, 10) 12 | ).chunk(chunks) 13 | 14 | df = qr.read_xarray(air_small).compute() 15 | 16 | print(len(df)) 17 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "xarray_sql" 3 | dynamic = ["version"] 4 | description = "Querry Xarray with SQL." 5 | readme = "README.md" 6 | requires-python = ">=3.8" 7 | license = {text = "Apache-2.0"} 8 | authors = [ 9 | {name = "Alexander Merose", email = "al@merose.com"}, 10 | ] 11 | classifiers = [ 12 | "Development Status :: 4 - Beta", 13 | "Intended Audience :: Science/Research", 14 | "Intended Audience :: Developers", 15 | "Intended Audience :: Information Technology", 16 | "License :: OSI Approved :: Apache Software License", 17 | "Operating System :: MacOS :: MacOS X", 18 | "Operating System :: Microsoft :: Windows", 19 | "Operating System :: POSIX", 20 | "Programming Language :: Python :: 3.8", 21 | "Programming Language :: Python :: 3.9", 22 | "Programming Language :: Python :: 3.10", 23 | "Programming Language :: Python :: 3.11", 24 | "Programming Language :: Python :: 3.12", 25 | "Topic :: Scientific/Engineering :: Atmospheric Science", 26 | "Topic :: Database :: Front-Ends", 27 | ] 28 | dependencies = [ 29 | "xarray", 30 | "dask-sql", 31 | ] 32 | 33 | [project.optional-dependencies] 34 | test = [ 35 | "pytest", 36 | "xarray[io]", 37 | "gcsfs", 38 | ] 39 | dev = [ 40 | "xarray_sql[test]", 41 | "pyink", 42 | "py-spy" 43 | ] 44 | 45 | [project.urls] 46 | Homepage = "https://github.com/alxmrs/xarray-sql" 47 | Issues = "https://github.com/alxmrs/xarray-sql/issues" 48 | 49 | [build-system] 50 | requires = ["setuptools>=64", "setuptools_scm>=8"] 51 | build-backend = "setuptools.build_meta" 52 | 53 | [tool.setuptools.packages.find] 54 | exclude = ["demo", "perf_tests"] 55 | 56 | [tool.pyink] 57 | line-length = 80 58 | preview = true 59 | pyink-indentation = 2 60 | pyink-use-majority-quotes = true 61 | 62 | [tool.setuptools_scm] -------------------------------------------------------------------------------- /xarray_sql/__init__.py: -------------------------------------------------------------------------------- 1 | from .df import read_xarray 2 | from .sql import Context 3 | -------------------------------------------------------------------------------- /xarray_sql/core.py: -------------------------------------------------------------------------------- 1 | import itertools 2 | import typing as t 3 | 4 | import numpy as np 5 | import xarray as xr 6 | 7 | Row = t.List[t.Any] 8 | 9 | 10 | # deprecated 11 | def get_columns(ds: xr.Dataset) -> t.List[str]: 12 | return list(ds.dims.keys()) + list(ds.data_vars.keys()) 13 | 14 | 15 | # Deprecated 16 | def unravel(ds: xr.Dataset) -> t.Iterator[Row]: 17 | dim_keys, dim_vals = zip(*ds.dims.items()) 18 | 19 | for idx in itertools.product(*(range(d) for d in dim_vals)): 20 | coord_idx = dict(zip(dim_keys, idx)) 21 | data = ds.isel(coord_idx) 22 | coord_data = [ds.coords[v][coord_idx[v]] for v in dim_keys] 23 | row = [v.values for v in coord_data + list(data.data_vars.values())] 24 | yield row 25 | 26 | 27 | # Deprecated 28 | def unbounded_unravel(ds: xr.Dataset) -> np.ndarray: 29 | """Unravel with unbounded memory (as a NumPy Array).""" 30 | dim_keys, dim_vals = zip(*ds.dims.items()) 31 | columns = get_columns(ds) 32 | 33 | N = np.prod([d for d in dim_vals]) 34 | 35 | out = np.recarray((N,), dtype=[(c, ds[c].dtype) for c in columns]) 36 | 37 | for name, da in ds.items(): 38 | out[name] = da.values.ravel() 39 | 40 | prod_vals = (ds.coords[k].values for k in dim_keys) 41 | coords = np.array(np.meshgrid(*prod_vals), dtype=int).T.reshape( 42 | -1, len(dim_keys) 43 | ) 44 | 45 | for i, d in enumerate(dim_keys): 46 | out[d] = coords[:, i] 47 | 48 | return out 49 | -------------------------------------------------------------------------------- /xarray_sql/df.py: -------------------------------------------------------------------------------- 1 | import itertools 2 | import typing as t 3 | 4 | import dask 5 | import dask.dataframe as dd 6 | import numpy as np 7 | import pandas as pd 8 | import xarray as xr 9 | from dask.dataframe.io import from_map 10 | 11 | from . import core 12 | 13 | Block = t.Dict[str, slice] 14 | Chunks = t.Optional[t.Dict[str, int]] 15 | 16 | # Turn on Dask-Expr 17 | dask.config.set({'dataframe.query-planning-warning': False}) 18 | dask.config.set({'dataframe.query-planning': True}) 19 | # Turn on Copy-On-Write (needs Pandas 2.0). 20 | pd.options.mode.copy_on_write = True 21 | 22 | 23 | # Borrowed from Xarray 24 | def _get_chunk_slicer( 25 | dim: t.Hashable, chunk_index: t.Mapping, chunk_bounds: t.Mapping 26 | ): 27 | if dim in chunk_index: 28 | which_chunk = chunk_index[dim] 29 | return slice( 30 | chunk_bounds[dim][which_chunk], chunk_bounds[dim][which_chunk + 1] 31 | ) 32 | return slice(None) 33 | 34 | 35 | # Adapted from Xarray `map_blocks` implementation. 36 | def block_slices(ds: xr.Dataset, chunks: Chunks = None) -> t.Iterator[Block]: 37 | """Compute block slices for a chunked Dataset.""" 38 | if chunks is not None: 39 | for_chunking = ds.copy(data=None, deep=False).chunk(chunks) 40 | chunks = for_chunking.chunks 41 | del for_chunking 42 | else: 43 | chunks = ds.chunks 44 | 45 | assert chunks, 'Dataset `ds` must be chunked or `chunks` must be provided.' 46 | 47 | chunk_bounds = {dim: np.cumsum((0,) + c) for dim, c in chunks.items()} 48 | ichunk = {dim: range(len(c)) for dim, c in chunks.items()} 49 | ick, icv = zip(*ichunk.items()) # Makes same order of keys and val. 50 | chunk_idxs = (dict(zip(ick, i)) for i in itertools.product(*icv)) 51 | blocks = ( 52 | { 53 | dim: _get_chunk_slicer(dim, chunk_index, chunk_bounds) 54 | for dim in ds.dims 55 | } 56 | for chunk_index in chunk_idxs 57 | ) 58 | yield from blocks 59 | 60 | 61 | def explode(ds: xr.Dataset, chunks: Chunks = None) -> t.Iterator[xr.Dataset]: 62 | """Explodes a dataset into its chunks.""" 63 | yield from (ds.isel(b) for b in block_slices(ds, chunks=chunks)) 64 | 65 | 66 | def _block_len(block: Block) -> int: 67 | return np.prod([v.stop - v.start for v in block.values()]) 68 | 69 | 70 | def read_xarray(ds: xr.Dataset, chunks: Chunks = None) -> dd.DataFrame: 71 | """Pivots an Xarray Dataset into a Dask Dataframe, partitioned by chunks. 72 | 73 | Args: 74 | ds: An Xarray Dataset. All `data_vars` mush share the same dimensions. 75 | chunks: Xarray-like chunks. If not provided, will default to the Dataset's 76 | chunks. The product of the chunk sizes becomes the standard length of each 77 | dataframe partition. 78 | 79 | Returns: 80 | A Dask Dataframe, which is a table representation of the input Dataset. 81 | """ 82 | fst = next(iter(ds.values())).dims 83 | assert all( 84 | da.dims == fst for da in ds.values() 85 | ), 'All dimensions must be equal. Please filter data_vars in the Dataset.' 86 | 87 | blocks = list(block_slices(ds, chunks)) 88 | 89 | block_lengths = [_block_len(b) for b in blocks] 90 | divisions = tuple(np.cumsum([0] + block_lengths)) # 0 ==> start partition. 91 | 92 | def pivot(b: Block) -> pd.DataFrame: 93 | return ds.isel(b).to_dataframe().reset_index() 94 | 95 | # Token is needed to prevent Dask from spending too many cycles calculating 96 | # it's own token from the constituent parts. 97 | token = ( 98 | 'xarray-Dataset-' 99 | f'{"_".join(list(ds.dims.keys()))}' 100 | '__' 101 | f'{"_".join(list(ds.data_vars.keys()))}' 102 | ) 103 | 104 | columns = pivot(blocks[0]).columns 105 | 106 | # TODO(#18): Is it possible to pass the length (known now) here? 107 | meta = {c: ds[c].dtype for c in columns} 108 | 109 | return from_map( 110 | pivot, 111 | blocks, 112 | meta=meta, 113 | divisions=divisions, 114 | token=token, 115 | ) 116 | -------------------------------------------------------------------------------- /xarray_sql/df_integrationtest.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | import xarray as xr 4 | 5 | from . import read_xarray 6 | 7 | 8 | class Era5TestCast(unittest.TestCase): 9 | 10 | def test_open_era5(self): 11 | era5_ds = xr.open_zarr( 12 | 'gs://gcp-public-data-arco-era5/ar/1959-2022-full_37-1h-0p25deg-chunk-1.zarr-v2', 13 | chunks={'time': 240, 'level': 1}, 14 | ) 15 | era5_wind_df = read_xarray( 16 | era5_ds[['u_component_of_wind', 'v_component_of_wind']] 17 | ) 18 | 19 | self.assertEqual( 20 | list(era5_wind_df.columns), 21 | [ 22 | 'time', 23 | 'level', 24 | 'latitude', 25 | 'longitude', 26 | 'u_component_of_wind', 27 | 'v_component_of_wind', 28 | ], 29 | ) 30 | -------------------------------------------------------------------------------- /xarray_sql/df_test.py: -------------------------------------------------------------------------------- 1 | import itertools 2 | import unittest 3 | 4 | import dask.dataframe as dd 5 | import numpy as np 6 | import pandas as pd 7 | import xarray as xr 8 | 9 | from .df import explode, read_xarray, block_slices 10 | 11 | 12 | def rand_wx(start: str, end: str) -> xr.Dataset: 13 | np.random.seed(42) 14 | lat = np.linspace(-90, 90, num=720) 15 | lon = np.linspace(-180, 180, num=1440) 16 | time = pd.date_range(start, end, freq='H') 17 | level = np.array([1000, 500], dtype=np.int32) 18 | reference_time = pd.Timestamp(start) 19 | temperature = 15 + 8 * np.random.randn(720, 1440, len(time), len(level)) 20 | precipitation = 10 * np.random.rand(720, 1440, len(time), len(level)) 21 | return xr.Dataset( 22 | data_vars=dict( 23 | temperature=(['lat', 'lon', 'time', 'level'], temperature), 24 | precipitation=(['lat', 'lon', 'time', 'level'], precipitation), 25 | ), 26 | coords=dict( 27 | lat=lat, 28 | lon=lon, 29 | time=time, 30 | level=level, 31 | reference_time=reference_time, 32 | ), 33 | attrs=dict(description='Random weather.'), 34 | ) 35 | 36 | 37 | class DaskTestCase(unittest.TestCase): 38 | 39 | def setUp(self) -> None: 40 | self.air = xr.tutorial.open_dataset('air_temperature') 41 | self.chunks = {'time': 240} 42 | self.air = self.air.chunk(self.chunks) 43 | 44 | self.air_small = self.air.isel( 45 | time=slice(0, 12), lat=slice(0, 11), lon=slice(0, 10) 46 | ).chunk(self.chunks) 47 | self.randwx = rand_wx('1995-01-13T00', '1995-01-13T01') 48 | 49 | 50 | class ExplodeTest(DaskTestCase): 51 | 52 | def test_cardinality(self): 53 | dss = explode(self.air) 54 | self.assertEqual( 55 | len(list(dss)), np.prod([len(c) for c in self.air.chunks.values()]) 56 | ) 57 | 58 | def test_dim_sizes__one(self): 59 | ds = next(iter(explode(self.air))) 60 | for k, v in self.chunks.items(): 61 | self.assertIn(k, ds.dims) 62 | self.assertEqual(v, ds.dims[k]) 63 | 64 | def skip_test_dim_sizes__all(self): 65 | # TODO(alxmrs): Why is this test slow? 66 | dss = explode(self.air) 67 | self.assertEqual( 68 | [tuple(ds.dims.values()) for ds in dss], 69 | list(itertools.product(*self.air.chunksizes.values())), 70 | ) 71 | 72 | def test_data_equal__one__first(self): 73 | ds = next(iter(explode(self.air))) 74 | iselection = {dim: slice(0, s) for dim, s in ds.dims.items()} 75 | self.assertEqual(self.air.isel(iselection), ds) 76 | 77 | def test_data_equal__one__last(self): 78 | dss = list(explode(self.air)) 79 | ds = dss[-1] 80 | iselection = {dim: slice(0, s) for dim, s in ds.dims.items()} 81 | self.assertEqual(self.air.isel(iselection), ds) 82 | 83 | 84 | class DaskDataframeTest(DaskTestCase): 85 | 86 | def test_sanity(self): 87 | df = read_xarray(self.air_small).compute() 88 | self.assertIsNotNone(df) 89 | self.assertEqual(len(df), np.prod(list(self.air_small.dims.values()))) 90 | 91 | def test_columns(self): 92 | df = read_xarray(self.air_small).compute() 93 | cols = list(df.columns) 94 | self.assertEqual(cols, ['lat', 'time', 'lon', 'air']) 95 | 96 | def test_dtypes(self): 97 | df: dd.DataFrame = read_xarray(self.air_small).compute() 98 | types = list(df.dtypes) 99 | self.assertEqual([self.air_small[c].dtype for c in df.columns], types) 100 | 101 | def test_partitions_dont_match_dataset_chunks(self): 102 | standard_blocks = list(block_slices(self.air_small)) 103 | default: dd.DataFrame = read_xarray(self.air_small) 104 | chunked: dd.DataFrame = read_xarray(self.air_small, dict(time=5)) 105 | 106 | self.assertEqual(default.npartitions, len(standard_blocks)) 107 | self.assertNotEqual(chunked.npartitions, len(standard_blocks)) 108 | 109 | def test_chunk_perf(self): 110 | df = read_xarray(self.air, chunks=dict(time=6)).compute() 111 | self.assertIsNotNone(df) 112 | self.assertEqual(len(df), np.prod(list(self.air.dims.values()))) 113 | 114 | def test_column_metadata_preserved(self): 115 | try: 116 | _ = read_xarray(self.randwx, chunks=dict(time=24)).compute() 117 | except ValueError as e: 118 | if ( 119 | 'The columns in the computed data do not match the columns in the' 120 | ' provided metadata' in str(e) 121 | ): 122 | self.fail('Column metadata is incorrect.') 123 | 124 | 125 | if __name__ == '__main__': 126 | unittest.main() 127 | -------------------------------------------------------------------------------- /xarray_sql/sql.py: -------------------------------------------------------------------------------- 1 | import xarray as xr 2 | import dask_sql.input_utils 3 | 4 | from .df import read_xarray, Chunks 5 | 6 | 7 | class Context(dask_sql.Context): 8 | """See the `dask_sql.Context` docs.""" 9 | 10 | def create_table( 11 | self, 12 | table_name: str, 13 | input_table: dask_sql.input_utils.InputType, 14 | chunks: Chunks = None, 15 | *args, 16 | **kwargs, 17 | ): 18 | if isinstance(input_table, xr.Dataset): 19 | input_table = read_xarray(input_table, chunks) 20 | super().create_table(table_name, input_table, *args, **kwargs) 21 | -------------------------------------------------------------------------------- /xarray_sql/sql_test.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | 4 | from . import Context 5 | from .df_test import DaskTestCase 6 | 7 | 8 | class SqlTestCase(DaskTestCase): 9 | 10 | def test_sanity(self): 11 | c = Context() 12 | c.create_table('air', self.air_small) 13 | 14 | query = c.sql('SELECT "lat", "lon", "time", "air" FROM "air" LIMIT 100') 15 | 16 | result = query.compute() 17 | self.assertIsNotNone(result) 18 | self.assertEqual(len(result), 100) 19 | 20 | def test_agg_small(self): 21 | c = Context() 22 | c.create_table('air', self.air_small) 23 | 24 | query = c.sql( 25 | """ 26 | SELECT 27 | "lat", "lon", SUM("air") as air_total 28 | FROM 29 | "air" 30 | GROUP BY 31 | "lat", "lon" 32 | """ 33 | ) 34 | 35 | result = query.compute() 36 | self.assertIsNotNone(result) 37 | 38 | expected = self.air_small.dims['lat'] * self.air_small.dims['lon'] 39 | self.assertEqual(len(result), expected) 40 | 41 | def test_agg_regular(self): 42 | c = Context() 43 | c.create_table('air', self.air) 44 | 45 | query = c.sql( 46 | """ 47 | SELECT 48 | "lat", "lon", AVG("air") as air_total 49 | FROM 50 | "air" 51 | GROUP BY 52 | "lat", "lon" 53 | """ 54 | ) 55 | 56 | result = query.compute() 57 | self.assertIsNotNone(result) 58 | 59 | expected = self.air.dims['lat'] * self.air.dims['lon'] 60 | self.assertEqual(len(result), expected) 61 | 62 | 63 | if __name__ == '__main__': 64 | unittest.main() 65 | --------------------------------------------------------------------------------