├── .coveragerc
├── .gitattributes
├── .github
    ├── dependabot.yml
    └── workflows
    │   ├── release.yml
    │   └── tests.yaml
├── .gitignore
├── .pre-commit-config.yaml
├── CHANGELOG.md
├── CONTRIBUTING.md
├── LICENSE
├── MANIFEST.in
├── README.rst
├── ci
    └── envs
    │   ├── 310-minimal.yaml
    │   ├── 310-no-optional-deps.yaml
    │   ├── 311-latest-no-expr.yaml
    │   ├── 311-latest.yaml
    │   ├── 312-dev.yaml
    │   └── 312-latest.yaml
├── dask_geopandas
    ├── __init__.py
    ├── _expr.py
    ├── _version.py
    ├── backends.py
    ├── clip.py
    ├── core.py
    ├── expr.py
    ├── geohash.py
    ├── hilbert_distance.py
    ├── io
    │   ├── __init__.py
    │   ├── arrow.py
    │   ├── file.py
    │   └── parquet.py
    ├── morton_distance.py
    ├── sjoin.py
    └── tests
    │   ├── __init__.py
    │   ├── conftest.py
    │   ├── data
    │       ├── README.md
    │       ├── naturalearth_cities
    │       │   ├── naturalearth_cities.VERSION.txt
    │       │   ├── naturalearth_cities.cpg
    │       │   ├── naturalearth_cities.dbf
    │       │   ├── naturalearth_cities.prj
    │       │   ├── naturalearth_cities.shp
    │       │   └── naturalearth_cities.shx
    │       └── naturalearth_lowres
    │       │   ├── naturalearth_lowres.cpg
    │       │   ├── naturalearth_lowres.dbf
    │       │   ├── naturalearth_lowres.prj
    │       │   ├── naturalearth_lowres.shp
    │       │   └── naturalearth_lowres.shx
    │   ├── io
    │       ├── __init__.py
    │       ├── conftest.py
    │       ├── test_arrow.py
    │       ├── test_backend_integration.py
    │       ├── test_file.py
    │       └── test_parquet.py
    │   ├── test_clip.py
    │   ├── test_core.py
    │   ├── test_distributed.py
    │   ├── test_geohash.py
    │   ├── test_hilbert_distance.py
    │   ├── test_morton_distance.py
    │   ├── test_sjoin.py
    │   └── test_spatial_partitioning.py
├── doc
    ├── Makefile
    ├── make.bat
    ├── requirements.txt
    └── source
    │   ├── _static
    │       ├── binary_geo-difference.svg
    │       ├── binary_geo-intersection.svg
    │       ├── binary_geo-symm_diff.svg
    │       ├── binary_geo-union.svg
    │       ├── binary_op-01.svg
    │       ├── binary_op-02.svg
    │       ├── binary_op-03.svg
    │       └── custom.css
    │   ├── api.rst
    │   ├── changelog.rst
    │   ├── conf.py
    │   ├── docs
    │       └── reference
    │       │   ├── geodataframe.rst
    │       │   ├── geoseries.rst
    │       │   ├── io.rst
    │       │   └── tools.rst
    │   ├── getting_started.md
    │   ├── guide.md
    │   ├── guide
    │       ├── basic-intro.ipynb
    │       ├── dissolve.ipynb
    │       └── spatial-partitioning.ipynb
    │   ├── index.md
    │   ├── installation.md
    │   └── parquet.md
├── pyproject.toml
├── readthedocs.yml
├── requirements-dev.txt
├── setup.cfg
├── setup.py
└── versioneer.py


/.coveragerc:
--------------------------------------------------------------------------------
1 | [run]
2 | omit =
3 |     dask_geopandas/tests/*.py
4 |     */_version.py
5 | source =
6 |     dask_geopandas
7 | 


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | dask_geopandas/_version.py export-subst
2 | 


--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
1 | version: 2
2 | updates:
3 |   # Maintain dependencies for GitHub Actions
4 |   - package-ecosystem: "github-actions"
5 |     directory: "/"
6 |     schedule:
7 |       # Check for updates to GitHub Actions every week
8 |       interval: "weekly"
9 | 


--------------------------------------------------------------------------------
/.github/workflows/release.yml:
--------------------------------------------------------------------------------
 1 | name: Build and publish dask-geopandas to PyPI / GitHub
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - main  # just build the sdist & wheel, skip release
 7 |     tags:
 8 |       - "v*"
 9 |   pull_request:  # also build on PRs touching this file
10 |     paths:
11 |       - ".github/workflows/release.yml"
12 |       - "MANIFEST.in"
13 |       - "pyproject.toml"
14 |       - "setup.py"
15 | 
16 | jobs:
17 |   build:
18 |     name: Build dask-geopandas
19 |     runs-on: ubuntu-latest
20 | 
21 |     steps:
22 |       - name: Checkout source
23 |         uses: actions/checkout@v4
24 |         with:
25 |           fetch-depth: 0
26 | 
27 |       - name: Set up Python
28 |         uses: actions/setup-python@v5
29 |         with:
30 |           python-version: "3.x"
31 | 
32 |       - name: Build a binary wheel and a source tarball
33 |         run: |
34 |           python -m pip install --upgrade pip build
35 |           python -m build
36 | 
37 |       - uses: actions/upload-artifact@v4
38 |         with:
39 |           path: ./dist/*
40 |           retention-days: 5
41 | 
42 |   publish:
43 |     name: Publish dask-geopandas to PyPI
44 |     needs: [build]
45 |     runs-on: ubuntu-latest
46 |     # release on every tag
47 |     if: github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/')
48 | 
49 |     steps:
50 |       - uses: actions/download-artifact@v4
51 |         with:
52 |           name: artifact
53 |           path: dist
54 | 
55 |       - name: Publish distribution to PyPI
56 |         uses: pypa/gh-action-pypi-publish@release/v1
57 |         with:
58 |           user: __token__
59 |           password: ${{ secrets.PYPI_API_TOKEN }}
60 | 
61 |       - name: Create GitHub Release
62 |         id: create_release
63 |         uses: actions/create-release@v1
64 |         env:
65 |           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # This token is provided by Actions, you do not need to create your own token
66 |         with:
67 |           tag_name: ${{ github.ref }}
68 |           release_name: ${{ github.ref }}
69 |           draft: false
70 |           prerelease: false
71 | 
72 |       - name: Get Asset name
73 |         run: |
74 |           export PKG=$(ls dist/ | grep tar)
75 |           set -- $PKG
76 |           echo "name=$1" >> $GITHUB_ENV
77 | 
78 |       - name: Upload Release Asset (sdist) to GitHub
79 |         id: upload-release-asset
80 |         uses: actions/upload-release-asset@v1
81 |         env:
82 |           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
83 |         with:
84 |           upload_url: ${{ steps.create_release.outputs.upload_url }}
85 |           asset_path: dist/${{ env.name }}
86 |           asset_name: ${{ env.name }}
87 |           asset_content_type: application/zip
88 | 


--------------------------------------------------------------------------------
/.github/workflows/tests.yaml:
--------------------------------------------------------------------------------
 1 | name: Tests
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [main]
 6 |   pull_request:
 7 |     branches: [main]
 8 |   schedule:
 9 |     - cron: "0 0 * * *"
10 | 
11 | jobs:
12 |   Linting:
13 |     runs-on: ubuntu-latest
14 | 
15 |     steps:
16 |       - uses: actions/checkout@v4
17 |       - uses: actions/setup-python@v5
18 |       - uses: pre-commit/action@v3.0.1
19 | 
20 |   Test:
21 |     needs: Linting
22 |     name: ${{ matrix.os }}, ${{ matrix.env }}
23 |     runs-on: ${{ matrix.os }}
24 |     defaults:
25 |       run:
26 |         shell: bash -l {0}
27 |     continue-on-error: true
28 |     strategy:
29 |       matrix:
30 |         os: [ubuntu-latest]
31 |         env:
32 |           - ci/envs/310-minimal.yaml
33 |           - ci/envs/310-no-optional-deps.yaml
34 |           - ci/envs/311-latest.yaml
35 |           - ci/envs/312-latest.yaml
36 | 
37 |         include:
38 |           - env: ci/envs/311-latest.yaml
39 |             os: macos-latest
40 |           - env: ci/envs/311-latest.yaml
41 |             os: windows-latest
42 |           - env: ci/envs/312-dev.yaml
43 |             os: ubuntu-latest
44 | 
45 |     steps:
46 |       - uses: actions/checkout@v4
47 | 
48 |       - name: Setup Conda
49 |         uses: conda-incubator/setup-miniconda@v3
50 |         with:
51 |           environment-file: ${{ matrix.env }}
52 |           miniforge-version: latest
53 |           miniforge-variant: Miniforge3
54 |           use-mamba: true
55 | 
56 |       - name: Check and Log Environment
57 |         run: |
58 |           python -V
59 |           python -c "import geopandas; geopandas.show_versions();"
60 |           conda info
61 |           conda list
62 | 
63 |       - name: Test
64 |         run: |
65 |           pytest -v -r a --color=yes --cov=dask_geopandas --cov-append --cov-report term-missing --cov-report xml .
66 | 
67 |       - uses: codecov/codecov-action@v5
68 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .hypothesis
 2 | *.py[cod]
 3 | __pycache__/
 4 | *.egg-info
 5 | dask-worker-space/
 6 | docs/build
 7 | build/
 8 | dist/
 9 | .idea/
10 | log.*
11 | log
12 | .pytest_cache/
13 | .coverage
14 | .DS_Store
15 | *.swp
16 | *.swo
17 | .cache/
18 | .ipynb_checkpoints
19 | .vscode/
20 | 
21 | coverage.xml
22 | 
23 | doc/source/docs/reference/api


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | ci:
 2 |     autofix_prs: false
 3 |     autoupdate_schedule: quarterly
 4 | 
 5 | files: 'dask_geopandas\/'
 6 | repos:
 7 |     - repo: https://github.com/psf/black
 8 |       rev: 24.2.0
 9 |       hooks:
10 |           - id: black
11 |             language_version: python3
12 |     - repo: https://github.com/astral-sh/ruff-pre-commit
13 |       rev: "v0.4.4"
14 |       hooks:
15 |         - id: ruff
16 |           name: sort imports with ruff
17 |           args: [--select, I, --fix]
18 |         - id: ruff
19 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
  1 | Changelog
  2 | =========
  3 | 
  4 | Version 0.5.0 (upcoming)
  5 | ------------------------
  6 | 
  7 | Deprecations and compatibility notes:
  8 | 
  9 | - The deprecated `geom_almost_equals` method has been removed. Use `geom_equals_exact` instead.
 10 | 
 11 | 
 12 | Version 0.4.3 (January, 2025)
 13 | -----------------------------
 14 | 
 15 | Packaging:
 16 | 
 17 | - `dask>=2025.1.0` is now required.
 18 | - `python>=3.10` is now required.
 19 | 
 20 | Bug fixes:
 21 | 
 22 | - Fixed `GeoDataFrame.drop` returning a `GeoDataFrame`
 23 |   instead of a `DataFrame`, when dropping the geometry
 24 |   column (#321).
 25 | 
 26 | Version 0.4.2 (September 24, 2024)
 27 | ----------------------------------
 28 | 
 29 | Bug fixes:
 30 | 
 31 | - Ensure `read_file()` produces a correct empty meta object, avoiding later
 32 |   errors in `spatial_shuffle()` (#302).
 33 | - Fix in `sjoin()` to work with GeoDataFrames after a `spatial_shuffle()` (#303).
 34 | 
 35 | Packaging:
 36 | 
 37 | - `distributed` was dropped as a required dependency, only depending on
 38 |   `dask[dataframe]` (#258).
 39 | 
 40 | 
 41 | Version 0.4.1 (June 25, 2024)
 42 | -----------------------------
 43 | 
 44 | Bug fixes:
 45 | 
 46 | - Allow to run dask-geopandas with recent dask versions without using query
 47 |   planning (without dask-expr being installed).
 48 | 
 49 | Packaging:
 50 | 
 51 | - The `dask` dependency was updated to `dask[dataframe]` in pyproject.toml (when
 52 |   installing from source or binary wheels from PyPI). This ensures dask-expr
 53 |   gets installed automatically for recent versions of dask.
 54 | 
 55 | Version 0.4.0 (June 24, 2024)
 56 | -----------------------------
 57 | 
 58 | Enhancements:
 59 | 
 60 | - Added preliminary support for dask's new query planning (dask >= 2024.3.0) (#285).
 61 | - Added support for using dask-geopandas with distributed's P2P shuffle (this
 62 |   requires the latest distributed>=2024.6.0 to work) (#295).
 63 | - Added new `from_wkb()` and `from_wkt()` functions to convert a dask Series of
 64 |   WKB or WKT values into a dask-geopandas GeoSeries (#293).
 65 | 
 66 | Notes on dependencies:
 67 | 
 68 | - Removed support for PyGEOS, now requiring Shapely >= 2 (#280).
 69 | - Updated minimum supported versions of dependencies, now requiring Python 3.9,
 70 |   GeoPandas 0.12, numpy 1.23 and dask/distributed 2022.06.0.
 71 | 
 72 | Version 0.3.1 (April 28, 2023)
 73 | ------------------------------
 74 | 
 75 | Bug fixes:
 76 | 
 77 | - Compatibility with dask >= 2023.4 and changes regarding ``use_nullable_dtypes``
 78 |   keyword (#242).
 79 | - Ensure ``spatial_partitions`` are preserved when serialized deserialized
 80 |   with pickle (#237).
 81 | 
 82 | Version 0.3.0 (January 23, 2023)
 83 | --------------------------------
 84 | 
 85 | Enhancements:
 86 | 
 87 | - Dask-GeoPandas is now compatible with Shapely 2.0 (and if this version is
 88 |   installed, no longer requires PyGEOS)
 89 | 
 90 | Bug fixes:
 91 | 
 92 | - Compatibility with dask >= 2022.12 for ``read_parquet()`` (#230) and for
 93 |   ``dissolve()`` (#229)
 94 | - Fix the ``spatial_partitions`` of the result of ``sjoin()`` (#216)
 95 | 
 96 | Version 0.2.0 (July 1, 2022)
 97 | ----------------------------
 98 | 
 99 | Enhancements:
100 | 
101 | - Optionally skip spatial bounds in ``read_parquet`` (#203)
102 | 
103 | Bug fixes:
104 | 
105 | - Don't put ``GeoSeries`` in ``map_partitions`` kwarg (#205)
106 | 
107 | Version 0.1.3 (June 21, 2021)
108 | -----------------------------
109 | 
110 | Compatibility:
111 | 
112 | - MAINT: use ``predicate`` instead of ``op`` in ``sjoin`` (#204)
113 | 
114 | Version 0.1.2 (June 20, 2021)
115 | -----------------------------
116 | 
117 | Bug fixes:
118 | 
119 | - Update ``to_parquet`` to handle custom schema (to fix writing partitions with all missing data) (#201)
120 | 
121 | Version 0.1.1 (June 19, 2021)
122 | -----------------------------
123 | 
124 | Bug fixes:
125 | 
126 | - Compat with dask 2022.06.0: fix schema inference in ``to_parquet`` (#199)
127 | - Remove custom ``__dask_postcompute__`` (#191)
128 | - BUG: persist ``spatial_partitions`` information in ``persist()`` (#192)
129 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | Dask-geopandas is a community maintained project. We welcome contributions in the form of bug reports, documentation, code, design proposals, and more.
2 | 
3 | Also for general information on how to contribute to GeoPandas projects see https://geopandas.org/en/latest/community/contributing.html.
4 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2020, Dask Developers
 2 | All rights reserved.
 3 | 
 4 | Redistribution and use in source and binary forms, with or without
 5 | modification, are permitted provided that the following conditions are met:
 6 | 
 7 | 1. Redistributions of source code must retain the above copyright notice, this
 8 |    list of conditions and the following disclaimer.
 9 | 
10 | 2. Redistributions in binary form must reproduce the above copyright notice,
11 |    this list of conditions and the following disclaimer in the documentation
12 |    and/or other materials provided with the distribution.
13 | 
14 | 3. Neither the name of the copyright holder nor the names of its contributors
15 |   may be used to endorse or promote products derived from this software
16 |   without specific prior written permission.
17 | 
18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
22 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | recursive-include dask_geopandas *.py
2 | recursive-include dask_geopandas *.yaml
3 | 
4 | include versioneer.py
5 | include setup.py
6 | include README.rst
7 | include LICENSE
8 | include dask_geopandas/_version.py
9 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
 1 | dask-geopandas |conda| |pypi| |docs| |gitter|
 2 | =============================================
 3 | 
 4 | Parallel GeoPandas with Dask
 5 | 
 6 | Dask-GeoPandas is a project merging the geospatial capabilities of GeoPandas
 7 | and scalability of Dask. GeoPandas is an open source project designed to make working with geospatial data in Python easier. GeoPandas extends the datatypes used by pandas to allow spatial operations on geometric types.
 8 | Dask provides advanced parallelism and distributed out-of-core computation with a dask.dataframe module designed to scale
 9 | pandas. Since GeoPandas is an extension to the pandas DataFrame, the same way Dask scales pandas can also be applied to GeoPandas.
10 | 
11 | This project is a bridge between Dask and GeoPandas and offers geospatial capabilities of GeoPandas backed by Dask.
12 | 
13 | Documentation
14 | -------------
15 | 
16 | See the documentation on https://dask-geopandas.readthedocs.io/en/latest/
17 | 
18 | Installation
19 | ------------
20 | 
21 | This package depends on Shapely, GeoPandas and Dask.
22 | 
23 | One way to install all required dependencies is to use the ``conda`` package manager to
24 | create a new environment:
25 | 
26 | ::
27 | 
28 |     conda create -n geo_env
29 |     conda activate geo_env
30 |     conda config --env --add channels conda-forge
31 |     conda config --env --set channel_priority strict
32 |     conda install dask-geopandas
33 | 
34 | 
35 | 
36 | Example
37 | -------
38 | 
39 | Given a GeoPandas dataframe
40 | 
41 | .. code-block:: python
42 | 
43 |    import geopandas
44 |    df = geopandas.read_file('...')
45 | 
46 | We can repartition it into a Dask-GeoPandas dataframe:
47 | 
48 | .. code-block:: python
49 | 
50 |    import dask_geopandas
51 |    ddf = dask_geopandas.from_geopandas(df, npartitions=4)
52 | 
53 | The familiar spatial attributes and methods of GeoPandas are also available
54 | and will be computed in parallel:
55 | 
56 | .. code-block:: python
57 | 
58 |    ddf.geometry.area.compute()
59 |    ddf.within(polygon)
60 | 
61 | 
62 | .. |pypi| image:: https://img.shields.io/pypi/v/dask-geopandas.svg
63 |    :target: https://pypi.python.org/pypi/dask-geopandas/
64 | 
65 | .. |conda| image:: https://img.shields.io/conda/vn/conda-forge/dask-geopandas.svg
66 |    :target: https://anaconda.org/conda-forge/dask-geopandas
67 |    :alt: Conda Version
68 | 
69 | .. |docs| image:: https://readthedocs.org/projects/dask-geopandas/badge/?version=latest
70 |    :target: https://dask-geopandas.readthedocs.io/en/latest/?badge=latest
71 |    :alt: Documentation Status
72 | 
73 | .. |gitter| image:: https://badges.gitter.im/geopandas/geopandas.svg
74 |    :target: https://gitter.im/geopandas/geopandas
75 |    :alt: Gitter
76 | 


--------------------------------------------------------------------------------
/ci/envs/310-minimal.yaml:
--------------------------------------------------------------------------------
 1 | name: test
 2 | channels:
 3 |   - conda-forge
 4 | dependencies:
 5 |   # required dependencies
 6 |   - python=3.10
 7 |   - numpy=1.24
 8 |   - dask=2025.1.0
 9 |   - distributed=2025.1.0
10 |   - geopandas=0.14.3
11 |   - pandas=2.0.0
12 |   - shapely=2.0
13 |   - pyproj=3.4
14 |   - packaging
15 |   # test dependencies
16 |   - pytest
17 |   - pytest-cov
18 |   - hilbertcurve
19 |   - pygeohash
20 |   # optional dependencies
21 |   - pyarrow
22 |   - pip
23 |   - pip:
24 |       - pymorton
25 | 


--------------------------------------------------------------------------------
/ci/envs/310-no-optional-deps.yaml:
--------------------------------------------------------------------------------
 1 | name: test
 2 | channels:
 3 |   - conda-forge
 4 | dependencies:
 5 |   # required dependencies
 6 |   - python=3.10
 7 |   - dask
 8 |   - geopandas
 9 |   - pyproj
10 |   - packaging
11 |   # test dependencies
12 |   - pytest
13 |   - pytest-cov
14 | 


--------------------------------------------------------------------------------
/ci/envs/311-latest-no-expr.yaml:
--------------------------------------------------------------------------------
 1 | name: test
 2 | channels:
 3 |   - conda-forge
 4 | dependencies:
 5 |   # required dependencies
 6 |   - python=3.11
 7 |   - dask-core
 8 |   - geopandas
 9 |   - pyproj=3.4
10 |   - packaging
11 |   # test dependencies
12 |   - pytest
13 |   - pytest-cov
14 |   - hilbertcurve
15 |   - s3fs
16 |   - moto<5  # <5 pin because of https://github.com/dask/dask/issues/10869
17 |   - flask # needed for moto server
18 |   # optional dependencies
19 |   - pyarrow
20 |   - pyogrio>=0.4
21 |   - pygeohash
22 |   - pip
23 |   - pip:
24 |       - pymorton
25 | 


--------------------------------------------------------------------------------
/ci/envs/311-latest.yaml:
--------------------------------------------------------------------------------
 1 | name: test
 2 | channels:
 3 |   - conda-forge
 4 | dependencies:
 5 |   # required dependencies
 6 |   - python=3.11
 7 |   - dask
 8 |   - distributed
 9 |   - geopandas
10 |   - pyproj=3.4
11 |   - packaging
12 |   # test dependencies
13 |   - pytest
14 |   - pytest-cov
15 |   - hilbertcurve
16 |   - s3fs
17 |   - moto<5  # <5 pin because of https://github.com/dask/dask/issues/10869
18 |   - flask # needed for moto server
19 |   # optional dependencies
20 |   - pyarrow
21 |   - pyogrio>=0.4
22 |   - pygeohash
23 |   - pip
24 |   - pip:
25 |       - pymorton
26 | 


--------------------------------------------------------------------------------
/ci/envs/312-dev.yaml:
--------------------------------------------------------------------------------
 1 | name: test
 2 | channels:
 3 |   - conda-forge
 4 | dependencies:
 5 |   # required dependencies
 6 |   - python=3.12
 7 |   - distributed
 8 |   - pandas
 9 |   - geos  # for shapely main
10 |   - fiona
11 |   - pyproj
12 |   - fsspec
13 |   - packaging
14 |   # test dependencies
15 |   - pytest
16 |   - pytest-cov
17 |   - hilbertcurve
18 |   - s3fs
19 |   - moto<5  # <5 pin because of https://github.com/dask/dask/issues/10869
20 |   - flask # needed for moto server
21 |   # optional dependencies
22 |   - pyarrow
23 |   - pyogrio
24 |   - pygeohash
25 |   - pip
26 |   - pip:
27 |       - pymorton
28 |       - git+https://github.com/shapely/shapely.git@main
29 |       - git+https://github.com/geopandas/geopandas.git@main
30 |       - git+https://github.com/dask/dask.git@main
31 | 


--------------------------------------------------------------------------------
/ci/envs/312-latest.yaml:
--------------------------------------------------------------------------------
 1 | name: test
 2 | channels:
 3 |   - conda-forge
 4 | dependencies:
 5 |   # required dependencies
 6 |   - python=3.12
 7 |   - dask
 8 |   - distributed
 9 |   - geopandas
10 |   - shapely >= 2
11 |   - pyproj
12 |   - packaging
13 |   # test dependencies
14 |   - pytest
15 |   - pytest-cov
16 |   - hilbertcurve
17 |   - s3fs
18 |   - moto<5  # <5 pin because of https://github.com/dask/dask/issues/10869
19 |   - flask # needed for moto server
20 |   # optional dependencies
21 |   - pyarrow
22 |   - pyogrio
23 |   - pygeohash
24 |   - pip
25 |   - pip:
26 |       - pymorton
27 | 


--------------------------------------------------------------------------------
/dask_geopandas/__init__.py:
--------------------------------------------------------------------------------
 1 | from ._version import get_versions
 2 | 
 3 | from .expr import (
 4 |     points_from_xy,
 5 |     from_wkt,
 6 |     from_wkb,
 7 |     GeoDataFrame,
 8 |     GeoSeries,
 9 |     from_geopandas,
10 |     from_dask_dataframe,
11 | )
12 | from .io.file import read_file
13 | from .io.parquet import read_parquet, to_parquet
14 | from .io.arrow import read_feather, to_feather
15 | from .clip import clip
16 | from .sjoin import sjoin
17 | from . import backends as _  # needed to register dispatch functions with dask
18 | 
19 | 
20 | __version__ = get_versions()["version"]
21 | del get_versions
22 | 
23 | __all__ = [
24 |     "GeoDataFrame",
25 |     "GeoSeries",
26 |     "clip",
27 |     "from_dask_dataframe",
28 |     "from_geopandas",
29 |     "from_wkb",
30 |     "from_wkt",
31 |     "points_from_xy",
32 |     "read_feather",
33 |     "read_file",
34 |     "read_parquet",
35 |     "sjoin",
36 |     "to_feather",
37 |     "to_parquet",
38 | ]
39 | 
40 | from . import _version
41 | 
42 | __version__ = _version.get_versions()["version"]
43 | 


--------------------------------------------------------------------------------
/dask_geopandas/_expr.py:
--------------------------------------------------------------------------------
 1 | from typing import Literal
 2 | 
 3 | import dask.dataframe.dask_expr as dx
 4 | 
 5 | import geopandas
 6 | 
 7 | 
 8 | def _drop(df: geopandas.GeoDataFrame, columns, errors):
 9 |     return df.drop(columns=columns, errors=errors)
10 | 
11 | 
12 | def _validate_axis(axis=0, none_is_zero: bool = True) -> None | Literal[0, 1]:
13 |     if axis not in (0, 1, "index", "columns", None):
14 |         raise ValueError(f"No axis named {axis}")
15 |     # convert to numeric axis
16 |     numeric_axis: dict[str | None, Literal[0, 1]] = {"index": 0, "columns": 1}
17 |     if none_is_zero:
18 |         numeric_axis[None] = 0
19 | 
20 |     return numeric_axis.get(axis, axis)
21 | 
22 | 
23 | class Drop(dx.expr.Drop):
24 |     operation = staticmethod(_drop)
25 | 


--------------------------------------------------------------------------------
/dask_geopandas/backends.py:
--------------------------------------------------------------------------------
  1 | import uuid
  2 | from packaging.version import Version
  3 | 
  4 | import pandas as pd
  5 | 
  6 | import dask
  7 | from dask.base import normalize_token
  8 | from dask.dataframe.backends import _nonempty_index, meta_nonempty_dataframe
  9 | from dask.dataframe.core import get_parallel_type
 10 | from dask.dataframe.dispatch import make_meta_dispatch, pyarrow_schema_dispatch
 11 | from dask.dataframe.extensions import make_array_nonempty, make_scalar
 12 | from dask.dataframe.utils import meta_nonempty
 13 | 
 14 | import geopandas
 15 | import shapely.geometry
 16 | from geopandas.array import GeometryArray, GeometryDtype, from_shapely
 17 | from shapely.geometry.base import BaseGeometry
 18 | 
 19 | from .expr import GeoDataFrame, GeoSeries
 20 | 
 21 | get_parallel_type.register(geopandas.GeoDataFrame, lambda _: GeoDataFrame)
 22 | get_parallel_type.register(geopandas.GeoSeries, lambda _: GeoSeries)
 23 | 
 24 | 
 25 | @make_meta_dispatch.register(BaseGeometry)
 26 | def make_meta_shapely_geometry(x, index=None):
 27 |     return x
 28 | 
 29 | 
 30 | @make_array_nonempty.register(GeometryDtype)
 31 | def _(dtype):
 32 |     return from_shapely(
 33 |         [shapely.geometry.LineString([(i, i), (i, i + 1)]) for i in range(2)]
 34 |     )
 35 | 
 36 | 
 37 | @make_scalar.register(GeometryDtype.type)
 38 | def _(x):
 39 |     return shapely.geometry.Point(0, 0)
 40 | 
 41 | 
 42 | @meta_nonempty.register(geopandas.GeoSeries)
 43 | def _nonempty_geoseries(x, idx=None):
 44 |     if idx is None:
 45 |         idx = _nonempty_index(x.index)
 46 |     data = make_array_nonempty(x.dtype)
 47 |     return geopandas.GeoSeries(data, name=x.name, crs=x.crs)
 48 | 
 49 | 
 50 | @meta_nonempty.register(geopandas.GeoDataFrame)
 51 | def _nonempty_geodataframe(x):
 52 |     df = meta_nonempty_dataframe(x)
 53 |     return geopandas.GeoDataFrame(df, geometry=x._geometry_column_name, crs=x.crs)
 54 | 
 55 | 
 56 | @make_meta_dispatch.register((geopandas.GeoSeries, geopandas.GeoDataFrame))
 57 | def make_meta_geodataframe(df, index=None):
 58 |     return df.head(0)
 59 | 
 60 | 
 61 | @normalize_token.register(GeometryArray)
 62 | def tokenize_geometryarray(x):
 63 |     # TODO if we can find an efficient hashing function (eg hashing integer
 64 |     # pointers on the C level?), we could replace this random uuid
 65 |     return uuid.uuid4().hex
 66 | 
 67 | 
 68 | @pyarrow_schema_dispatch.register((geopandas.GeoDataFrame,))
 69 | def get_pyarrow_schema_geopandas(obj):
 70 |     import pyarrow as pa
 71 | 
 72 |     df = pd.DataFrame(obj.copy())
 73 |     for col in obj.columns[obj.dtypes == "geometry"]:
 74 |         df[col] = obj[col].to_wkb()
 75 |     return pa.Schema.from_pandas(df)
 76 | 
 77 | 
 78 | if Version(dask.__version__) >= Version("2023.6.1"):
 79 |     from dask.dataframe.dispatch import (
 80 |         from_pyarrow_table_dispatch,
 81 |         to_pyarrow_table_dispatch,
 82 |     )
 83 | 
 84 |     @to_pyarrow_table_dispatch.register((geopandas.GeoDataFrame,))
 85 |     def get_pyarrow_table_from_geopandas(obj, **kwargs):
 86 |         # `kwargs` must be supported by `pyarrow.Table.from_pandas`
 87 |         import pyarrow as pa
 88 | 
 89 |         if Version(geopandas.__version__).major < 1:
 90 |             return pa.Table.from_pandas(obj.to_wkb(), **kwargs)
 91 |         else:
 92 |             # TODO handle kwargs?
 93 |             return pa.table(obj.to_arrow())
 94 | 
 95 |     @from_pyarrow_table_dispatch.register((geopandas.GeoDataFrame,))
 96 |     def get_geopandas_geodataframe_from_pyarrow(meta, table, **kwargs):
 97 |         # `kwargs` must be supported by `pyarrow.Table.to_pandas`
 98 |         if Version(geopandas.__version__).major < 1:
 99 |             df = table.to_pandas(**kwargs)
100 | 
101 |             for col in meta.columns[meta.dtypes == "geometry"]:
102 |                 df[col] = geopandas.GeoSeries.from_wkb(df[col], crs=meta[col].crs)
103 | 
104 |             return df
105 | 
106 |         else:
107 |             # TODO handle kwargs?
108 |             return geopandas.GeoDataFrame.from_arrow(table)
109 | 


--------------------------------------------------------------------------------
/dask_geopandas/clip.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from dask.base import tokenize
 4 | from dask.dataframe import from_graph
 5 | from dask.highlevelgraph import HighLevelGraph
 6 | from dask.utils import derived_from
 7 | 
 8 | import geopandas
 9 | 
10 | 
11 | @derived_from(geopandas.tools)
12 | def clip(gdf, mask, keep_geom_type=False):
13 |     from dask_geopandas import GeoDataFrame, GeoSeries
14 | 
15 |     if isinstance(mask, (GeoDataFrame, GeoSeries)):
16 |         raise NotImplementedError("Mask cannot be a Dask GeoDataFrame or GeoSeries.")
17 | 
18 |     if gdf.spatial_partitions is None:
19 |         return gdf.map_partitions(
20 |             lambda partition: geopandas.clip(
21 |                 gdf=partition, mask=mask, keep_geom_type=keep_geom_type
22 |             ),
23 |             token="clip",
24 |             meta=gdf._meta,
25 |         )
26 | 
27 |     new_spatial_partitions = geopandas.clip(
28 |         gdf=gdf.spatial_partitions,
29 |         mask=mask,
30 |         # keep_geom_type is always false for clipping the spatial partitions
31 |         # otherwise we'd be falsely creating new partition(s)
32 |         keep_geom_type=False,
33 |     )
34 |     intersecting_partitions = np.asarray(new_spatial_partitions.index)
35 | 
36 |     name = f"clip-{tokenize(gdf, mask, keep_geom_type)}"
37 |     dsk = {
38 |         (name, i): (geopandas.clip, (gdf._name, part), mask, keep_geom_type)
39 |         for i, part in enumerate(intersecting_partitions)
40 |     }
41 |     divisions = [None] * (len(dsk) + 1)
42 |     graph = HighLevelGraph.from_collections(name, dsk, dependencies=[gdf])
43 | 
44 |     result = from_graph(graph, gdf._meta, tuple(divisions), dsk.keys(), "clip")
45 | 
46 |     result.spatial_partitions = new_spatial_partitions
47 |     return result
48 | 


--------------------------------------------------------------------------------
/dask_geopandas/core.py:
--------------------------------------------------------------------------------
 1 | import warnings
 2 | 
 3 | from .expr import *  # noqa: F403
 4 | 
 5 | warnings.warn(
 6 |     "dask_geopandas.core is deprecated and will be removed in a future version.",
 7 |     category=FutureWarning,
 8 |     stacklevel=1,
 9 | )
10 | 


--------------------------------------------------------------------------------
/dask_geopandas/geohash.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Geohash implementation
  3 | 
  4 | The code is originally based on the neathgeohash package,
  5 | Copyright (c) 2020 Marek Dwulit, MIT License
  6 | (https://pypi.org/project/neathgeohash/#description).
  7 | The vectorized implementation for quantization and bit interleaving is in turn based on,
  8 | "Geohash in Golang Assembly" blog (https://mmcloughlin.com/posts/geohash-assembly).
  9 | 
 10 | """
 11 | 
 12 | import warnings
 13 | 
 14 | import numpy as np
 15 | import pandas as pd
 16 | 
 17 | 
 18 | def _geohash(gdf, as_string, precision):
 19 |     """
 20 |     Calculate geohash based on the middle points of the geometry bounds
 21 |     for a given precision
 22 | 
 23 |     Parameters
 24 |     ----------
 25 |     gdf : GeoDataFrame
 26 |     as_string : bool
 27 |         to return string or int Geohash
 28 |     precision : int
 29 |         precision of the string Geohash
 30 | 
 31 | 
 32 |     Returns
 33 |     ---------
 34 |     type : pandas.Series
 35 |         Series containing geohash
 36 |     """
 37 |     with warnings.catch_warnings():
 38 |         warnings.filterwarnings(
 39 |             "ignore", "GeoSeries.isna() previously returned True", UserWarning
 40 |         )
 41 |         if gdf.is_empty.any() | gdf.geometry.isna().any():
 42 |             raise ValueError(
 43 |                 "Geohash cannot be computed on a GeoSeries with empty or "
 44 |                 "missing geometries.",
 45 |             )
 46 | 
 47 |     # Calculate bounds
 48 |     bounds = gdf.bounds.to_numpy()
 49 |     # Calculate mid points based on bounds
 50 |     x_mids, y_mids = _calculate_mid_points(bounds)
 51 |     # Create pairs of x and y midpoints
 52 |     coords = np.array([y_mids, x_mids]).T
 53 |     # Encode coords with Geohash
 54 |     geohash = encode_geohash(coords, as_string, precision)
 55 | 
 56 |     return pd.Series(geohash, index=gdf.index, name="geohash")
 57 | 
 58 | 
 59 | def _calculate_mid_points(bounds):
 60 |     """
 61 |     Calculate middle points based on the geometry bounds
 62 | 
 63 |     Parameters
 64 |     ----------
 65 |     bounds : array_like
 66 |         array containing xmin, ymin, xmax, ymax
 67 | 
 68 |     Returns
 69 |     ---------
 70 |     x_mids : mid points of x values
 71 |     y_mids : mid points of y values
 72 |     """
 73 | 
 74 |     # Calculate mid points for x and y bound coords
 75 |     x_mids = (bounds[:, 0] + bounds[:, 2]) / 2.0
 76 |     y_mids = (bounds[:, 1] + bounds[:, 3]) / 2.0
 77 | 
 78 |     return x_mids, y_mids
 79 | 
 80 | 
 81 | def encode_geohash(coords, as_string, precision):
 82 |     """
 83 |     Calculate geohash based on coordinates for a
 84 |     given precision
 85 | 
 86 |     Parameters
 87 |     ----------
 88 |     coords : array_like of shape (n, 2)
 89 |         array of [x, y] pairs
 90 |     as_string : bool
 91 |         to return string or int Geohash
 92 |     precision : int
 93 |         precision of the string Geohash
 94 |     Returns
 95 |     ---------
 96 |     geohash: array containing either int or string
 97 |         geohashes for each mid point
 98 |     """
 99 | 
100 |     quantized_coords = _quantize_points(coords)
101 |     int_geohash = _encode_into_uint64(quantized_coords)
102 | 
103 |     if not as_string:
104 |         return int_geohash
105 | 
106 |     gs_uint8_mat = _encode_base32(int_geohash)
107 |     str_geohash = _encode_unicode(gs_uint8_mat, precision)
108 | 
109 |     return str_geohash
110 | 
111 | 
112 | def _quantize_points(coords):
113 |     """
114 |     Quantize coordinates by mapping onto
115 |     unit intervals [0, 1] and multiplying by 2^32.
116 | 
117 |     Parameters
118 |     ----------
119 |     coords : array_like of shape (n, 2)
120 |         array of [x, y] pairs
121 |         coordinate pairs
122 | 
123 |     Returns
124 |     ---------
125 |     array_like of shape (n, 2)
126 |     """
127 | 
128 |     _q = np.array([(2.0**32 / 180, 0), (0, 2.0**32 / (180 * 2))], dtype="float64")
129 | 
130 |     quantized_coords = coords + np.array([90, 180])
131 |     quantized_coords = np.dot(quantized_coords, _q)
132 |     quantized_coords = np.floor(quantized_coords)
133 | 
134 |     return quantized_coords
135 | 
136 | 
137 | def _encode_into_uint64(quantized_coords):
138 |     """
139 | 
140 |     Encode quantized coordinates into uint64
141 |     using both spreading and interleaving bits
142 | 
143 |     Implementation based on "Geohash in Golang Assembly"
144 |     blog (https://mmcloughlin.com/posts/geohash-assembly)
145 | 
146 |     Parameters
147 |     ----------
148 |     quantized_coords : array_like of shape (n, 2)
149 |         array of quantized coordinate pairs
150 | 
151 |     Returns
152 |     ---------
153 |     array_like of shape (n, 2)
154 |         coordinate pairs encoded to uint64 values
155 |         quantized coordinate pairs
156 |     """
157 | 
158 |     # spread out 32 bits of x into 64 bits, where the bits occupy even bit positions.
159 |     x = quantized_coords.astype(np.uint64)
160 |     x = x.reshape(-1, 2)
161 |     x = (x | (x << 16)) & 0x0000FFFF0000FFFF
162 |     x = (x | (x << 8)) & 0x00FF00FF00FF00FF
163 |     x = (x | (x << 4)) & 0x0F0F0F0F0F0F0F0F
164 |     x = (x | (x << 2)) & 0x3333333333333333
165 |     x = (x | (x << 1)) & 0x5555555555555555
166 | 
167 |     # Dot
168 |     __s1 = np.array([(1, 0), (0, 2)], dtype=np.uint64)
169 |     x = x @ __s1
170 |     # Interleave x and y bits so that x and y occupy even and odd bit levels
171 |     x = x[:, 0] | x[:, 1]
172 |     x = x >> 4
173 | 
174 |     return x
175 | 
176 | 
177 | def _encode_base32(encoded_uint64):
178 |     """
179 |     Encode quantized coordinates into base32 pairs.
180 |     Encoding starts at the highest bit, consuming 5 bits for each character precision.
181 |     This means encoding happens 12 times for the 12 character precision or 60 bits.
182 | 
183 |     Implementation is based on "Geohash in Golang Assembly"
184 |     blog (https://mmcloughlin.com/posts/geohash-assembly)
185 | 
186 |     Parameters
187 |     ----------
188 |     g_uint64 : array_like
189 |         coordinate pairs encoded to uint64 values
190 | 
191 |     Returns
192 |     ---------
193 |     array_like of shape (n, 12)
194 |         with base 32 values as 8-bit unasigned integer
195 |     """
196 |     # Define 32 bit mask
197 |     mask = np.uint64(0x1F).flatten()  # equivalent to 32-1
198 |     # Return array for each character
199 |     c11 = (encoded_uint64 >> 0) & mask
200 |     c10 = (encoded_uint64 >> 5) & mask
201 |     c9 = (encoded_uint64 >> 10) & mask
202 |     c8 = (encoded_uint64 >> 15) & mask
203 |     c7 = (encoded_uint64 >> 20) & mask
204 |     c6 = (encoded_uint64 >> 25) & mask
205 |     c5 = (encoded_uint64 >> 30) & mask
206 |     c4 = (encoded_uint64 >> 35) & mask
207 |     c3 = (encoded_uint64 >> 40) & mask
208 |     c2 = (encoded_uint64 >> 45) & mask
209 |     c1 = (encoded_uint64 >> 50) & mask
210 |     c0 = (encoded_uint64 >> 55) & mask
211 | 
212 |     # Stack each array vertically
213 |     return np.column_stack((c0, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11)).astype(
214 |         "uint8"
215 |     )
216 | 
217 | 
218 | def _encode_unicode(encoded_base32, precision):
219 |     """
220 |     Encode base32 pairs into geohash bytes with an option to return
221 |     the geohash in unicode format
222 | 
223 |     Parameters
224 |     ----------
225 |     encoded_base32 : array_like
226 |         coordinate pairs
227 |     p : int
228 |         precision of the Geohash
229 | 
230 |     Returns
231 |     ---------
232 |     array_like of shape (n, precision)
233 |         containing geohash for a given precision
234 |     """
235 | 
236 |     # Define replacement values
237 |     replacement = np.array(
238 |         [
239 |             48,
240 |             49,
241 |             50,
242 |             51,
243 |             52,
244 |             53,
245 |             54,
246 |             55,
247 |             56,
248 |             57,
249 |             98,
250 |             99,
251 |             100,
252 |             101,
253 |             102,
254 |             103,
255 |             104,
256 |             106,
257 |             107,
258 |             109,
259 |             110,
260 |             112,
261 |             113,
262 |             114,
263 |             115,
264 |             116,
265 |             117,
266 |             118,
267 |             119,
268 |             120,
269 |             121,
270 |             122,
271 |         ],
272 |         dtype="uint8",
273 |     )
274 | 
275 |     encoded_base32 = replacement[encoded_base32]
276 | 
277 |     encoded_base32 = encoded_base32.view(np.dtype("|S12"))
278 |     return encoded_base32.flatten().astype(f"U{precision}")
279 | 


--------------------------------------------------------------------------------
/dask_geopandas/hilbert_distance.py:
--------------------------------------------------------------------------------
  1 | import warnings
  2 | 
  3 | import numpy as np
  4 | import pandas as pd
  5 | 
  6 | 
  7 | def _hilbert_distance(gdf, total_bounds=None, level=16):
  8 |     """
  9 |     Calculate the distance along a Hilbert curve.
 10 | 
 11 |     The distances are calculated for the midpoints of the geometries in the
 12 |     GeoDataFrame.
 13 | 
 14 |     Parameters
 15 |     ----------
 16 |     gdf : GeoDataFrame
 17 |     total_bounds : 4-element array
 18 |         Total bounds of geometries - array
 19 |     level : int (1 - 16), default 16
 20 |         Determines the precision of the curve (points on the curve will
 21 |         have coordinates in the range [0, 2^level - 1]).
 22 | 
 23 |     Returns
 24 |     ---------
 25 |     Pandas Series containing distances along the Hilbert curve
 26 | 
 27 |     """
 28 |     with warnings.catch_warnings():
 29 |         warnings.filterwarnings(
 30 |             "ignore", "GeoSeries.isna() previously returned True", UserWarning
 31 |         )
 32 |         if gdf.is_empty.any() | gdf.geometry.isna().any():
 33 |             raise ValueError(
 34 |                 "Hilbert distance cannot be computed on a GeoSeries with empty or "
 35 |                 "missing geometries.",
 36 |             )
 37 |     # Calculate bounds as numpy array
 38 |     bounds = gdf.bounds.to_numpy()
 39 | 
 40 |     # Calculate discrete coords based on total bounds and bounds
 41 |     x, y = _continuous_to_discrete_coords(bounds, level, total_bounds)
 42 |     # Compute distance along hilbert curve
 43 |     distances = _encode(level, x, y)
 44 | 
 45 |     return pd.Series(distances, index=gdf.index, name="hilbert_distance")
 46 | 
 47 | 
 48 | def _continuous_to_discrete_coords(bounds, level, total_bounds):
 49 |     """
 50 |     Calculates mid points & ranges of geoms and returns
 51 |     as discrete coords
 52 | 
 53 |     Parameters
 54 |     ----------
 55 | 
 56 |     bounds : Bounds of each geometry - array
 57 | 
 58 |     p : The number of iterations used in constructing the Hilbert curve
 59 | 
 60 |     total_bounds : Total bounds of geometries - array
 61 | 
 62 |     Returns
 63 |     ---------
 64 |     Discrete two-dimensional numpy array
 65 |     Two-dimensional array Array of hilbert distances for each geom
 66 | 
 67 |     """
 68 |     # Hilbert Side length
 69 |     side_length = (2**level) - 1
 70 | 
 71 |     # Calculate mid points for x and y bound coords - returns array
 72 |     x_mids = (bounds[:, 0] + bounds[:, 2]) / 2.0
 73 |     y_mids = (bounds[:, 1] + bounds[:, 3]) / 2.0
 74 | 
 75 |     # Calculate x and y range of total bound coords - returns array
 76 |     if total_bounds is None:
 77 |         total_bounds = np.array(
 78 |             (np.nanmin(x_mids), np.nanmin(y_mids), np.nanmax(x_mids), np.nanmax(y_mids))
 79 |         )
 80 | 
 81 |     xmin, ymin, xmax, ymax = total_bounds
 82 | 
 83 |     # Transform continuous value to discrete integer for each dimension
 84 |     x_int = _continuous_to_discrete(x_mids, (xmin, xmax), side_length)
 85 |     y_int = _continuous_to_discrete(y_mids, (ymin, ymax), side_length)
 86 | 
 87 |     return x_int, y_int
 88 | 
 89 | 
 90 | def _continuous_to_discrete(vals, val_range, n):
 91 |     """
 92 |     Convert a continuous one-dimensional array to discrete integer values
 93 |     based their ranges
 94 | 
 95 |     Parameters
 96 |     ----------
 97 |     vals : Array of continuous values
 98 | 
 99 |     val_range : Tuple containing range of continuous values
100 | 
101 |     n : Number of discrete values
102 | 
103 |     Returns
104 |     ---------
105 |     One-dimensional array of discrete ints
106 | 
107 |     """
108 |     width = val_range[1] - val_range[0]
109 |     res = (vals - val_range[0]) * (n / width)
110 | 
111 |     np.clip(res, 0, n, out=res)
112 |     return res.astype(np.uint32)
113 | 
114 | 
115 | # Fast Hilbert curve algorithm by http://threadlocalmutex.com/
116 | # From C++ https://github.com/rawrunprotected/hilbert_curves
117 | # (public domain)
118 | 
119 | 
120 | MAX_LEVEL = 16
121 | 
122 | 
123 | def _interleave(x):
124 |     x = (x | (x << 8)) & 0x00FF00FF
125 |     x = (x | (x << 4)) & 0x0F0F0F0F
126 |     x = (x | (x << 2)) & 0x33333333
127 |     x = (x | (x << 1)) & 0x55555555
128 |     return x
129 | 
130 | 
131 | def _encode(level, x, y):
132 | 
133 |     x = np.asarray(x, dtype="uint32")
134 |     y = np.asarray(y, dtype="uint32")
135 | 
136 |     if level > MAX_LEVEL:
137 |         raise ValueError("Level out of range")
138 | 
139 |     x = x << (16 - level)
140 |     y = y << (16 - level)
141 | 
142 |     # Initial prefix scan round, prime with x and y
143 |     a = x ^ y
144 |     b = 0xFFFF ^ a
145 |     c = 0xFFFF ^ (x | y)
146 |     d = x & (y ^ 0xFFFF)
147 | 
148 |     A = a | (b >> 1)
149 |     B = (a >> 1) ^ a
150 |     C = ((c >> 1) ^ (b & (d >> 1))) ^ c
151 |     D = ((a & (c >> 1)) ^ (d >> 1)) ^ d
152 | 
153 |     a = A.copy()
154 |     b = B.copy()
155 |     c = C.copy()
156 |     d = D.copy()
157 | 
158 |     A = (a & (a >> 2)) ^ (b & (b >> 2))
159 |     B = (a & (b >> 2)) ^ (b & ((a ^ b) >> 2))
160 |     C ^= (a & (c >> 2)) ^ (b & (d >> 2))
161 |     D ^= (b & (c >> 2)) ^ ((a ^ b) & (d >> 2))
162 | 
163 |     a = A.copy()
164 |     b = B.copy()
165 |     c = C.copy()
166 |     d = D.copy()
167 | 
168 |     A = (a & (a >> 4)) ^ (b & (b >> 4))
169 |     B = (a & (b >> 4)) ^ (b & ((a ^ b) >> 4))
170 |     C ^= (a & (c >> 4)) ^ (b & (d >> 4))
171 |     D ^= (b & (c >> 4)) ^ ((a ^ b) & (d >> 4))
172 | 
173 |     # Final round and projection
174 |     a = A.copy()
175 |     b = B.copy()
176 |     c = C.copy()
177 |     d = D.copy()
178 | 
179 |     C ^= (a & (c >> 8)) ^ (b & (d >> 8))
180 |     D ^= (b & (c >> 8)) ^ ((a ^ b) & (d >> 8))
181 | 
182 |     # Undo transformation prefix scan
183 |     a = C ^ (C >> 1)
184 |     b = D ^ (D >> 1)
185 | 
186 |     # Recover index bits
187 |     i0 = x ^ y
188 |     i1 = b | (0xFFFF ^ (i0 | a))
189 | 
190 |     return ((_interleave(i1) << 1) | _interleave(i0)) >> (32 - 2 * level)
191 | 


--------------------------------------------------------------------------------
/dask_geopandas/io/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geopandas/dask-geopandas/d60b432f21361516948a70b36ab22b6486c97622/dask_geopandas/io/__init__.py


--------------------------------------------------------------------------------
/dask_geopandas/io/arrow.py:
--------------------------------------------------------------------------------
  1 | import copy
  2 | import json
  3 | import math
  4 | from packaging.version import Version
  5 | from typing import TYPE_CHECKING
  6 | 
  7 | import pandas as pd
  8 | from fsspec.core import get_fs_token_paths
  9 | 
 10 | import dask
 11 | from dask.base import compute_as_if_collection, tokenize
 12 | from dask.dataframe import Scalar, from_graph
 13 | from dask.highlevelgraph import HighLevelGraph
 14 | from dask.layers import DataFrameIOLayer
 15 | from dask.utils import apply, natural_sort_key
 16 | 
 17 | import geopandas
 18 | import shapely.geometry
 19 | 
 20 | DASK_2022_12_0_PLUS = Version(dask.__version__) >= Version("2022.12.0")
 21 | DASK_2023_04_0 = Version(dask.__version__) >= Version("2023.4.0")
 22 | 
 23 | 
 24 | if TYPE_CHECKING:
 25 |     import pyarrow
 26 | 
 27 | 
 28 | def _update_meta_to_geodataframe(meta, schema_metadata):
 29 |     """
 30 |     Convert meta to a GeoDataFrame and update with potential GEO metadata
 31 |     """
 32 |     if schema_metadata and b"geo" in schema_metadata:
 33 |         geo_meta = json.loads(schema_metadata[b"geo"])
 34 |         geometry_column_name = geo_meta["primary_column"]
 35 |         crs = geo_meta["columns"][geometry_column_name].get("crs", "OGC:CRS84")
 36 |         geometry_columns = geo_meta["columns"]
 37 |     else:
 38 |         # TODO we could allow the user to pass those explicitly if not
 39 |         # stored in the metadata
 40 |         raise ValueError(
 41 |             "Missing geo metadata in the Parquet/Feather file. "
 42 |             "Use dask.dataframe.read_parquet/pandas.read_feather() instead."
 43 |         )
 44 | 
 45 |     # Update meta to be a GeoDataFrame
 46 |     meta = geopandas.GeoDataFrame(meta, geometry=geometry_column_name, crs=crs)
 47 |     for col, item in geometry_columns.items():
 48 |         if not col == meta._geometry_column_name:
 49 |             meta[col] = geopandas.GeoSeries(meta[col], crs=item.get("crs", "OGC:CRS84"))
 50 | 
 51 |     return meta
 52 | 
 53 | 
 54 | def _get_partition_bounds(schema_metadata):
 55 |     """
 56 |     Get the partition bounds, if available, for the dataset fragment.
 57 |     """
 58 |     if not (schema_metadata and b"geo" in schema_metadata):
 59 |         return None
 60 | 
 61 |     metadata = json.loads(schema_metadata[b"geo"].decode("utf-8"))
 62 | 
 63 |     # for now only check the primary column (TODO generalize this to follow
 64 |     # the logic of geopandas to fallback to other geometry columns)
 65 |     geometry = metadata["primary_column"]
 66 |     bbox = metadata["columns"][geometry].get("bbox", None)
 67 |     if bbox is None or all(math.isnan(val) for val in bbox):
 68 |         return None
 69 |     return shapely.geometry.box(*bbox)
 70 | 
 71 | 
 72 | class ArrowDatasetEngine:
 73 |     """
 74 |     Custom IO engine based on pyarrow.dataset.
 75 | 
 76 |     This is designed after dask's ArrowDatasetEngine for Parquet IO (but simpler
 77 |     with less options, and not dealing with a legacy engine) and ArrowORCEngine
 78 |     for ORC IO (but using pyarrow.dataset for the read_metadata discovery).
 79 |     """
 80 | 
 81 |     file_format: str
 82 | 
 83 |     @classmethod
 84 |     def read_metadata(cls, fs, paths, columns, filters, index):
 85 |         import pyarrow.dataset as ds
 86 |         from pyarrow.parquet import _filters_to_expression
 87 | 
 88 |         # dataset discovery
 89 |         if len(paths) == 1:
 90 |             # list of 1 directory path is not supported
 91 |             paths = paths[0]
 92 |         dataset = ds.dataset(
 93 |             paths, partitioning="hive", filesystem=fs, format=cls.file_format
 94 |         )
 95 | 
 96 |         # Get all (filtered) fragments
 97 |         if filters is not None:
 98 |             filter = _filters_to_expression(filters)
 99 |         else:
100 |             filter = None
101 | 
102 |         fragments = list(dataset.get_fragments(filter=filter))
103 | 
104 |         # numeric rather than glob ordering
105 |         # TODO how does this handle different partitioned directories?
106 |         fragments = sorted(fragments, key=lambda f: natural_sort_key(f.path))
107 | 
108 |         # TODO potential splitting / aggregating of fragments
109 | 
110 |         # Create dask meta
111 |         schema = dataset.schema
112 |         # TODO add support for `categories`keyword
113 |         meta = schema.empty_table().to_pandas()
114 | 
115 |         if index:
116 |             meta = meta.set_index(index)
117 | 
118 |         if columns is not None:
119 |             ex = set(columns) - set(meta.columns)
120 |             if ex:
121 |                 raise ValueError(
122 |                     f"Requested columns {ex} not in schema {set(meta.columns)}"
123 |                 )
124 |             meta = meta[columns]
125 | 
126 |         return fragments, meta, schema, filter
127 | 
128 |     @classmethod
129 |     def _arrow_table_to_pandas(
130 |         cls, arrow_table: "pyarrow.Table", categories, **kwargs
131 |     ) -> pd.DataFrame:
132 |         _kwargs = kwargs.get("arrow_to_pandas", {})
133 |         _kwargs.update({"use_threads": False, "ignore_metadata": False})
134 | 
135 |         return arrow_table.to_pandas(categories=categories, **_kwargs)
136 | 
137 |     @classmethod
138 |     def read_partition(cls, fs, fragment, schema, columns, filter, **kwargs):
139 |         table = fragment.to_table(
140 |             schema=schema, columns=columns, filter=filter, use_threads=False
141 |         )
142 |         df = cls._arrow_table_to_pandas(table, None)
143 |         return df
144 | 
145 |     @classmethod
146 |     def write_partition(cls, df, path, fs, filename, **kwargs):
147 |         from pyarrow import feather
148 | 
149 |         table = cls._pandas_to_arrow_table(df, preserve_index=None)
150 |         # TODO using the datasets API could automatically support partitioning
151 |         # on columns
152 |         with fs.open(fs.sep.join([path, filename]), "wb") as f:
153 |             feather.write_feather(table, f)
154 | 
155 | 
156 | class GeoDatasetEngine:
157 |     """
158 |     Mixin to combine with an IO Engine (the custom engine defined above for
159 |     Feather IO, or dask's engine for Parquet IO) that holds the custom logic
160 |     for geospatial data: overriding the arrow <-> pandas conversions to ensure
161 |     we read/write GeoDataFrames.
162 | 
163 |     """
164 | 
165 |     @classmethod
166 |     def _arrow_table_to_pandas(
167 |         cls, arrow_table: "pyarrow.Table", categories, **kwargs
168 |     ) -> pd.DataFrame:
169 |         from geopandas.io.arrow import _arrow_to_geopandas
170 | 
171 |         _kwargs = kwargs.get("arrow_to_pandas", {})
172 |         _kwargs.update({"use_threads": False, "ignore_metadata": False})
173 | 
174 |         # TODO support additional keywords
175 |         try:
176 |             return _arrow_to_geopandas(arrow_table)
177 |         except ValueError as err:
178 |             # when no geometry column is selected, the above will error.
179 |             # We want to fallback to reading it as a plain dask object, because
180 |             # the column selection can be an automatic pushdown (eg `ddf['col']`)
181 |             # TODO more robust detection of when to fall back?
182 |             if "No geometry columns are included" in str(err):
183 |                 return super()._arrow_table_to_pandas(
184 |                     arrow_table, categories=categories, **kwargs
185 |                 )
186 |             # when there are no columns, we also fall back (the dataset might
187 |             # have no files, and so we don't want to raise a confusing error
188 |             # about no geometry column)
189 |             elif not arrow_table.schema.names:
190 |                 return super()._arrow_table_to_pandas(
191 |                     arrow_table, categories=categories, **kwargs
192 |                 )
193 |             else:
194 |                 raise
195 | 
196 |     @classmethod
197 |     def _pandas_to_arrow_table(
198 |         cls, df: pd.DataFrame, preserve_index=False, schema=None, **kwargs
199 |     ) -> "pyarrow.Table":
200 |         from geopandas.io.arrow import _geopandas_to_arrow
201 | 
202 |         table = _geopandas_to_arrow(df, index=preserve_index)
203 | 
204 |         if schema is not None:
205 |             if not table.schema.equals(schema):
206 |                 # table.schema.metadata contains the "geo" metadata, so
207 |                 # ensure to preserve this in the cast operation
208 |                 if table.schema.metadata and not schema.metadata:
209 |                     schema = schema.with_metadata(table.schema.metadata)
210 |                 table = table.cast(schema)
211 | 
212 |         return table
213 | 
214 | 
215 | class FeatherDatasetEngine(GeoDatasetEngine, ArrowDatasetEngine):
216 |     file_format = "feather"
217 | 
218 | 
219 | class FeatherFunctionWrapper:
220 |     """
221 |     Feather Function-Wrapper Class
222 |     Reads Feather data from disk to produce a partition.
223 |     """
224 | 
225 |     def __init__(self, engine, fs, columns, filter, schema, index):
226 |         self.engine = engine
227 |         self.fs = fs
228 |         self.columns = columns
229 |         self.filter = filter
230 |         self.schema = schema
231 |         self.index = index
232 | 
233 |     def project_columns(self, columns):
234 |         """Return a new FeatherFunctionWrapper object with
235 |         a sub-column projection.
236 |         """
237 |         if columns == self.columns:
238 |             return self
239 |         func = copy.deepcopy(self)
240 |         func.columns = columns
241 |         return func
242 | 
243 |     def __call__(self, parts):
244 |         _df = self.engine.read_partition(
245 |             self.fs, parts, self.schema, self.columns, self.filter
246 |         )
247 |         if self.index:
248 |             _df.set_index(self.index, inplace=True)
249 |         return _df
250 | 
251 | 
252 | def read_feather(
253 |     path,
254 |     columns=None,
255 |     filters=None,
256 |     index=None,
257 |     storage_options=None,
258 | ):
259 |     """Read a Feather dataset into a Dask-GeoPandas DataFrame.
260 | 
261 |     Parameters
262 |     ----------
263 |     path: str or list(str)
264 |         Source directory for data, or path(s) to individual Feather files.
265 |         Paths can be a full URL with protocol specifier, and may include
266 |         glob character if a single string.
267 |     columns: None or list(str)
268 |         Columns to load. If None, loads all.
269 |     filters : list (of list) of tuples or pyarrow.dataset.Expression, default None
270 |         Row-wise filter to apply while reading the dataset. Can be specified
271 |         as a ``pyarrow.dataset.Expression`` object or using a list of tuples
272 |         notation, like ``[[('col1', '==', 0), ...], ...]``. The filter will
273 |         be applied both at the partition level, this is to prevent the loading
274 |         of some files, as at the file level to filter the actual rows.
275 | 
276 |         For the list of tuples format, predicates can be expressed in disjunctive
277 |         normal form (DNF). This means that the innermost tuple describes a single
278 |         column predicate. These inner predicates are combined with an AND
279 |         conjunction into a larger predicate. The outer-most list then combines all
280 |         of the combined filters with an OR disjunction.
281 | 
282 |         Predicates can also be expressed as a List[Tuple]. These are evaluated
283 |         as an AND conjunction. To express OR in predictates, one must use the
284 |         List[List[Tuple]] notation.
285 |     index : str, list or False, default None
286 |         Field name(s) to use as the output frame index. By default will be
287 |         inferred from the pandas metadata (if present in the files). Use False
288 |         to read all fields as columns.
289 |     storage_options : dict, default None
290 |         Key/value pairs to be passed on to the file-system backend, if any
291 |         (inferred from the path, such as "s3://...").
292 |         Please see ``fsspec`` for more details.
293 | 
294 |     Returns
295 |     -------
296 |     dask_geopandas.GeoDataFrame (even if there is only one column)
297 | 
298 |     """
299 |     if index is False:
300 |         raise NotImplementedError("Specifying index=False is not yet implemented")
301 | 
302 |     # Get engine
303 |     engine = FeatherDatasetEngine
304 | 
305 |     # Process file path(s)
306 |     storage_options = storage_options or {}
307 |     fs, _, paths = get_fs_token_paths(path, mode="rb", storage_options=storage_options)
308 |     paths = sorted(paths, key=natural_sort_key)  # numeric rather than glob ordering
309 | 
310 |     # Let backend engine generate a list of parts from the dataset metadata
311 |     parts, meta, schema, filter = engine.read_metadata(
312 |         fs,
313 |         paths,
314 |         columns,
315 |         filters,
316 |         index,
317 |     )
318 | 
319 |     # Update meta to be a GeoDataFrame
320 |     meta = _update_meta_to_geodataframe(meta, schema.metadata)
321 | 
322 |     # Construct spatial partitioning information, if available
323 |     spatial_partitions = geopandas.GeoSeries(
324 |         [_get_partition_bounds(frag.physical_schema.metadata) for frag in parts],
325 |         crs=meta.crs,
326 |     )
327 |     if spatial_partitions.isna().any():
328 |         spatial_partitions = None
329 | 
330 |     # Construct and return a Blockwise layer
331 |     label = "read-feather-"
332 |     output_name = label + tokenize(path, columns, filters, index)
333 |     layer = DataFrameIOLayer(
334 |         output_name,
335 |         columns,
336 |         parts,
337 |         FeatherFunctionWrapper(engine, fs, columns, filter, schema, index),
338 |         label=label,
339 |     )
340 |     graph = HighLevelGraph({output_name: layer}, {output_name: set()})
341 |     result = from_graph(
342 |         graph,
343 |         meta,
344 |         [None] * (len(parts) + 1),
345 |         [(output_name, i) for i in range(len(parts))],
346 |         "read_feather",
347 |     )
348 | 
349 |     result.spatial_partitions = spatial_partitions
350 |     return result
351 | 
352 | 
353 | def to_feather(
354 |     df,
355 |     path,
356 |     write_index=True,
357 |     storage_options=None,
358 |     compute=True,
359 |     compute_kwargs=None,
360 | ):
361 |     """Store Dask.dataframe to Feather files
362 | 
363 |     Notes
364 |     -----
365 |     Each partition will be written to a separate file.
366 | 
367 |     Parameters
368 |     ----------
369 |     df : dask_geopandas.GeoDataFrame
370 |     path : string or pathlib.Path
371 |         Destination directory for data.  Prepend with protocol like ``s3://``
372 |         or ``hdfs://`` for remote data.
373 |     write_index : boolean, default True
374 |         Whether or not to write the index. Defaults to True.
375 |     storage_options : dict, default None
376 |         Key/value pairs to be passed on to the file-system backend, if any
377 |         (inferred from the path, such as "s3://...").
378 |         Please see ``fsspec`` for more details.
379 |     compute : bool, default True
380 |         If True (default) then the result is computed immediately. If False
381 |         then a ``dask.delayed`` object is returned for future computation.
382 |     compute_kwargs : dict, default True
383 |         Options to be passed in to the compute method
384 | 
385 |     See Also
386 |     --------
387 |     dask_geopandas.read_feather: Read Feather data to dask.dataframe
388 |     """
389 |     # based on the to_orc function from dask
390 | 
391 |     # Get engine
392 |     engine = FeatherDatasetEngine
393 | 
394 |     # Process file path
395 |     storage_options = storage_options or {}
396 |     fs, _, _ = get_fs_token_paths(path, mode="wb", storage_options=storage_options)
397 |     # Trim any protocol information from the path before forwarding
398 |     path = fs._strip_protocol(path)
399 | 
400 |     if not write_index:
401 |         # Not writing index - might as well drop it
402 |         df = df.reset_index(drop=True)
403 | 
404 |     # Use df.npartitions to define file-name list
405 |     fs.mkdirs(path, exist_ok=True)
406 |     filenames = [f"part.{i}.feather" for i in range(df.npartitions)]
407 | 
408 |     # Construct IO graph
409 |     dsk = {}
410 |     name = "to-feather-" + tokenize(df, fs, path, write_index, storage_options)
411 |     part_tasks = []
412 |     for d, filename in enumerate(filenames):
413 |         dsk[(name, d)] = (
414 |             apply,
415 |             engine.write_partition,
416 |             [
417 |                 (df._name, d),
418 |                 path,
419 |                 fs,
420 |                 filename,
421 |             ],
422 |         )
423 |         part_tasks.append((name, d))
424 |     dsk[name] = (lambda x: None, part_tasks)
425 |     graph = HighLevelGraph.from_collections(name, dsk, dependencies=[df])
426 | 
427 |     # Compute or return future
428 |     if compute:
429 |         if compute_kwargs is None:
430 |             compute_kwargs = dict()
431 |         from dask_geopandas import GeoDataFrame
432 | 
433 |         return compute_as_if_collection(
434 |             GeoDataFrame, graph, part_tasks, **compute_kwargs
435 |         )
436 |     return Scalar(graph, name, "")
437 | 


--------------------------------------------------------------------------------
/dask_geopandas/io/file.py:
--------------------------------------------------------------------------------
  1 | from math import ceil
  2 | 
  3 | from pandas import RangeIndex
  4 | 
  5 | from dask.base import tokenize
  6 | from dask.dataframe import from_graph
  7 | from dask.highlevelgraph import HighLevelGraph
  8 | 
  9 | 
 10 | class FileFunctionWrapper:
 11 |     """
 12 |     GDAL File reader Function-Wrapper Class
 13 | 
 14 |     Reads data from disk to produce a partition (given row subset to read).
 15 |     """
 16 | 
 17 |     def __init__(self, layer, columns):
 18 |         self.layer = layer
 19 |         self.columns = columns
 20 |         self.read_geometry = True
 21 |         if columns is not None and "geometry" not in columns:
 22 |             self.read_geometry = False
 23 | 
 24 |     def project_columns(self, columns):
 25 |         """Return a new FileFunctionWrapper object with
 26 |         a sub-column projection.
 27 |         """
 28 |         if columns == self.columns:
 29 |             return self
 30 |         return FileFunctionWrapper(self.layer, columns)
 31 | 
 32 |     def __call__(self, part):
 33 |         path, row_offset, batch_size = part
 34 | 
 35 |         import pyogrio
 36 | 
 37 |         df = pyogrio.read_dataframe(
 38 |             path,
 39 |             layer=self.layer,
 40 |             columns=self.columns,
 41 |             read_geometry=self.read_geometry,
 42 |             skip_features=row_offset,
 43 |             max_features=batch_size,
 44 |         )
 45 |         df.index = RangeIndex(row_offset, row_offset + batch_size)
 46 |         return df
 47 | 
 48 | 
 49 | def read_file(
 50 |     path, npartitions=None, chunksize=None, layer=None, columns=None, **kwargs
 51 | ):
 52 |     """
 53 |     Read a GIS file into a Dask GeoDataFrame.
 54 | 
 55 |     This function requires `pyogrio <https://github.com/geopandas/pyogrio/>`__.
 56 | 
 57 |     Parameters
 58 |     ----------
 59 |     path : str
 60 |         The absolute or relative path to the file or URL to
 61 |         be opened.
 62 |     npartitions : int, optional
 63 |         The number of partitions to create. Either this or `chunksize` should
 64 |         be specified.
 65 |     chunksize : int, optional
 66 |         The number of rows per partition to use. Either this or `npartitions`
 67 |         should be specified.
 68 |     layer : int or str, optional (default: first layer)
 69 |         If an integer is provided, it corresponds to the index of the layer
 70 |         with the data source.  If a string is provided, it must match the name
 71 |         of the layer in the data source.  Defaults to first layer in data source.
 72 |     columns : list-like, optional (default: all columns)
 73 |         List of column names to import from the data source.  Column names must
 74 |         exactly match the names in the data source, and will be returned in
 75 |         the order they occur in the data source.  To avoid reading any columns,
 76 |         pass an empty list-like.
 77 | 
 78 |     """
 79 |     try:
 80 |         import pyogrio
 81 |     except ImportError as err:
 82 |         raise ImportError(
 83 |             "The 'read_file' function requires the 'pyogrio' package, but it is "
 84 |             "not installed or does not import correctly."
 85 |             f"\nImporting pyogrio resulted in: {err}"
 86 |         )
 87 | 
 88 |     from dask.layers import DataFrameIOLayer
 89 | 
 90 |     # TODO smart inference for a good default partition size ?
 91 |     if (npartitions is None) == (chunksize is None):
 92 |         raise ValueError("Exactly one of npartitions and chunksize must be specified.")
 93 | 
 94 |     if "skip_features" in kwargs or "max_features" in kwargs:
 95 |         # TODO we currently use those keywords already for reading in each
 96 |         # partition (we would need to take those into account for determining
 97 |         # the part start/ends)
 98 |         raise ValueError(
 99 |             "The 'skip_features'/'max_feature' keywords are not yet supported"
100 |         )
101 |     if kwargs:
102 |         raise ValueError("Additional pyogrio keywords are not yet supported")
103 | 
104 |     total_size = pyogrio.read_info(path, layer=layer)["features"]
105 | 
106 |     if chunksize is None:
107 |         chunksize = int(ceil(total_size / npartitions))
108 | 
109 |     # TODO this could be inferred from read_info ?
110 |     read_geometry = True
111 |     if columns is not None and "geometry" not in columns:
112 |         read_geometry = False
113 |     meta = pyogrio.read_dataframe(
114 |         path, layer=layer, columns=columns, read_geometry=read_geometry, max_features=5
115 |     ).head(0)
116 | 
117 |     # Define parts
118 |     parts = []
119 |     row_offset = 0
120 |     divs = [row_offset]
121 | 
122 |     while row_offset < total_size:
123 |         batch_size = min(chunksize, total_size - row_offset)
124 |         parts.append((path, row_offset, batch_size))
125 |         row_offset += batch_size
126 |         divs.append(row_offset)
127 |     # Set the last division value to be the largest index value in the last partition
128 |     divs[-1] = divs[-1] - 1
129 | 
130 |     # Create Blockwise layer
131 |     label = "read-file-"
132 |     output_name = label + tokenize(path, chunksize, layer, columns)
133 |     layer = DataFrameIOLayer(
134 |         output_name,
135 |         columns,
136 |         parts,
137 |         FileFunctionWrapper(layer, columns),
138 |         label=label,
139 |     )
140 |     graph = HighLevelGraph({output_name: layer}, {output_name: set()})
141 | 
142 |     result = from_graph(
143 |         graph,
144 |         meta,
145 |         divs,
146 |         [(output_name, i) for i in range(len(divs) - 1)],
147 |         "read_file",
148 |     )
149 |     return result
150 | 


--------------------------------------------------------------------------------
/dask_geopandas/io/parquet.py:
--------------------------------------------------------------------------------
  1 | from functools import partial
  2 | 
  3 | import dask.dataframe as dd
  4 | 
  5 | import geopandas
  6 | 
  7 | from .arrow import (
  8 |     GeoDatasetEngine,
  9 |     _get_partition_bounds,
 10 |     _update_meta_to_geodataframe,
 11 | )
 12 | 
 13 | try:
 14 |     # pyarrow is imported here, but is an optional dependency
 15 |     from dask.dataframe.io.parquet.arrow import (
 16 |         ArrowDatasetEngine as DaskArrowDatasetEngine,
 17 |     )
 18 | except ImportError:
 19 |     DaskArrowDatasetEngine = object
 20 | 
 21 | 
 22 | def _get_partition_bounds_parquet(part, fs):
 23 |     """
 24 |     Based on the part information gathered by dask, get the partition bounds
 25 |     if available.
 26 | 
 27 |     """
 28 |     from pyarrow.parquet import ParquetFile
 29 | 
 30 |     # read the metadata from the actual file (this is again file IO, but
 31 |     # we can't rely on the schema metadata, because this is only the
 32 |     # metadata of the first piece)
 33 |     pq_metadata = None
 34 |     if "piece" in part:
 35 |         path = part["piece"][0]
 36 |         if isinstance(path, str):
 37 |             with fs.open(path, "rb") as f:
 38 |                 pq_metadata = ParquetFile(f).metadata
 39 |     if pq_metadata is None:
 40 |         return None
 41 | 
 42 |     return _get_partition_bounds(pq_metadata.metadata)
 43 | 
 44 | 
 45 | class GeoArrowEngine(GeoDatasetEngine, DaskArrowDatasetEngine):
 46 |     """
 47 |     Engine for reading geospatial Parquet datasets. Subclasses dask's
 48 |     ArrowEngine for Parquet, but overriding some methods to ensure we
 49 |     correctly read/write GeoDataFrames.
 50 | 
 51 |     """
 52 | 
 53 |     @classmethod
 54 |     def _update_meta(cls, meta, schema):
 55 |         """
 56 |         Convert meta to a GeoDataFrame and update with potential GEO metadata
 57 |         """
 58 |         return _update_meta_to_geodataframe(meta, schema.metadata)
 59 | 
 60 |     @classmethod
 61 |     def _create_dd_meta(cls, dataset_info):
 62 |         meta = super()._create_dd_meta(dataset_info)
 63 |         schema = dataset_info["schema"]
 64 |         if not schema.names and not schema.metadata:
 65 |             if len(list(dataset_info["ds"].get_fragments())) == 0:
 66 |                 raise ValueError(
 67 |                     "No dataset parts discovered. Use dask.dataframe.read_parquet "
 68 |                     "to read it as an empty DataFrame"
 69 |                 )
 70 |         meta = cls._update_meta(meta, schema)
 71 | 
 72 |         if dataset_info["kwargs"].get("gather_spatial_partitions", True):
 73 |             fs = dataset_info["fs"]
 74 |             parts, _, _ = cls._construct_collection_plan(dataset_info)
 75 |             regions = geopandas.GeoSeries(
 76 |                 [_get_partition_bounds_parquet(part, fs) for part in parts],
 77 |                 crs=meta.crs,
 78 |             )
 79 |             if regions.notna().all():
 80 |                 # a bit hacky, but this allows us to get this passed through
 81 |                 meta.attrs["spatial_partitions"] = regions
 82 | 
 83 |         return meta
 84 | 
 85 | 
 86 | to_parquet = partial(dd.to_parquet, engine=GeoArrowEngine)
 87 | to_parquet.__doc__ = dd.to_parquet.__doc__
 88 | 
 89 | 
 90 | def read_parquet(*args, **kwargs):
 91 |     from dask.dataframe import read_parquet
 92 | 
 93 |     result = read_parquet(*args, engine=GeoArrowEngine, **kwargs)
 94 |     # check if spatial partitioning information was stored
 95 |     spatial_partitions = result._meta.attrs.get("spatial_partitions", None)
 96 | 
 97 |     result = dd.from_graph(
 98 |         result.dask,
 99 |         result._meta,
100 |         result.divisions,
101 |         result.__dask_keys__(),
102 |         "read_parquet",
103 |     )
104 | 
105 |     result.spatial_partitions = spatial_partitions
106 |     return result
107 | 
108 | 
109 | read_parquet.__doc__ = dd.read_parquet.__doc__
110 | 


--------------------------------------------------------------------------------
/dask_geopandas/morton_distance.py:
--------------------------------------------------------------------------------
 1 | import warnings
 2 | 
 3 | import pandas as pd
 4 | 
 5 | from dask_geopandas.hilbert_distance import _continuous_to_discrete_coords
 6 | 
 7 | 
 8 | def _morton_distance(gdf, total_bounds, level):
 9 |     """
10 |     Calculate distance of geometries along Morton curve
11 | 
12 |     The Morton curve is also known as Z-order https://en.wikipedia.org/wiki/Z-order_curve
13 | 
14 |     Parameters
15 |     ----------
16 |     gdf : GeoDataFrame
17 |     total_bounds : array_like
18 |         array containing xmin, ymin, xmax, ymax
19 |     level : int (1 - 16)
20 |         Determines the precision of the Morton curve.
21 | 
22 |     Returns
23 |     -------
24 |     type : pandas.Series
25 |         Series containing distances from Morton curve
26 | 
27 |     """
28 |     with warnings.catch_warnings():
29 |         warnings.filterwarnings(
30 |             "ignore", "GeoSeries.isna() previously returned True", UserWarning
31 |         )
32 |         if gdf.is_empty.any() | gdf.geometry.isna().any():
33 |             raise ValueError(
34 |                 "Morton distance cannot be computed on a GeoSeries with empty or "
35 |                 "missing geometries.",
36 |             )
37 |     # Calculate bounds as numpy array
38 |     bounds = gdf.bounds.to_numpy()
39 |     # Calculate discrete coords based on total bounds and bounds
40 |     x_int, y_int = _continuous_to_discrete_coords(bounds, level, total_bounds)
41 |     # Calculate distance from morton curve
42 |     distances = _distances_from_coordinates(x_int, y_int)
43 | 
44 |     return pd.Series(distances, index=gdf.index, name="morton_distance")
45 | 
46 | 
47 | def _distances_from_coordinates(x, y):
48 |     """
49 |     Calculate distances from geometry mid-points along Morton curve
50 | 
51 |     Parameters
52 |     ----------
53 |     x, y : array_like
54 |         x, y coordinate pairs based on mid-points of geoms
55 | 
56 |     Returns
57 |     -------
58 |     type : int
59 |         Integer distances from Morton curve
60 |     """
61 | 
62 |     return _part1by1(x) | (_part1by1(y) << 1)
63 | 
64 | 
65 | def _part1by1(n):
66 |     """
67 |     Interleave bits by ninary magic numbers
68 | 
69 |     Based on #http://graphics.stanford.edu/~seander/bithacks.html#InterleaveBMN
70 | 
71 |     Parameters
72 |     ----------
73 |     n : np.array
74 |         X or Y coordinates
75 | 
76 |     Returns
77 |     -------
78 |     n : int
79 |         Interleaved bits
80 |     """
81 |     n &= 0x0000FFFF
82 |     n = (n | (n << 8)) & 0x00FF00FF
83 |     n = (n | (n << 4)) & 0x0F0F0F0F
84 |     n = (n | (n << 2)) & 0x33333333
85 |     n = (n | (n << 1)) & 0x55555555
86 | 
87 |     return n
88 | 


--------------------------------------------------------------------------------
/dask_geopandas/sjoin.py:
--------------------------------------------------------------------------------
  1 | import warnings
  2 | 
  3 | import numpy as np
  4 | 
  5 | from dask.base import tokenize
  6 | from dask.dataframe import from_graph
  7 | from dask.highlevelgraph import HighLevelGraph
  8 | 
  9 | import geopandas
 10 | 
 11 | from .expr import from_geopandas
 12 | 
 13 | 
 14 | def sjoin(left, right, how="inner", predicate="intersects", **kwargs):
 15 |     """
 16 |     Spatial join of two GeoDataFrames.
 17 | 
 18 |     Parameters
 19 |     ----------
 20 |     left, right : geopandas or dask_geopandas GeoDataFrames
 21 |         If a geopandas.GeoDataFrame is passed, it is considered as a
 22 |         dask_geopandas.GeoDataFrame with 1 partition (without spatial
 23 |         partitioning information).
 24 |     how : string, default 'inner'
 25 |         The type of join. Currently only 'inner' is supported.
 26 |     predicate : string, default 'intersects'
 27 |         Binary predicate how to match corresponding rows of the left and right
 28 |         GeoDataFrame. Possible values: 'contains', 'contains_properly',
 29 |         'covered_by', 'covers', 'crosses', 'intersects', 'overlaps',
 30 |         'touches', 'within'.
 31 | 
 32 |     Returns
 33 |     -------
 34 |     dask_geopandas.GeoDataFrame
 35 | 
 36 |     Notes
 37 |     -----
 38 |     If both the left and right GeoDataFrame have spatial partitioning
 39 |     information available (the ``spatial_partitions`` attribute is set),
 40 |     the output partitions are determined based on intersection of the
 41 |     spatial partitions. In all other cases, the output partitions are
 42 |     all combinations (cartesian/cross product) of all input partition
 43 |     of the left and right GeoDataFrame.
 44 |     """
 45 |     if "op" in kwargs:
 46 |         predicate = kwargs.pop("op")
 47 |         deprecation_message = (
 48 |             "The `op` parameter is deprecated and will be removed"
 49 |             " in a future release. Please use the `predicate` parameter"
 50 |             " instead."
 51 |         )
 52 |         warnings.warn(deprecation_message, FutureWarning, stacklevel=2)
 53 |     if how != "inner":
 54 |         raise NotImplementedError("Only how='inner' is supported right now")
 55 | 
 56 |     if isinstance(left, geopandas.GeoDataFrame):
 57 |         left = from_geopandas(left, npartitions=1)
 58 |     if isinstance(right, geopandas.GeoDataFrame):
 59 |         right = from_geopandas(right, npartitions=1)
 60 | 
 61 |     # We call optimize on the inputs to ensure that any optimizations
 62 |     # done by dask-expr (which might change the expression, and thus the
 63 |     # name of the DataFrame) *before* we build the HighLevelGraph.
 64 |     # https://github.com/dask/dask-expr/issues/1129
 65 |     left = left.optimize()
 66 |     right = right.optimize()
 67 | 
 68 |     name = "sjoin-" + tokenize(left, right, how, predicate)
 69 |     meta = geopandas.sjoin(left._meta, right._meta, how=how, predicate=predicate)
 70 | 
 71 |     if left.spatial_partitions is not None and right.spatial_partitions is not None:
 72 |         # Spatial partitions are known -> use them to trim down the list of
 73 |         # partitions that need to be joined
 74 |         parts = geopandas.sjoin(
 75 |             left.spatial_partitions.to_frame("geometry"),
 76 |             right.spatial_partitions.to_frame("geometry"),
 77 |             how="inner",
 78 |             predicate="intersects",
 79 |         )
 80 |         parts_left = np.asarray(parts.index).tolist()
 81 |         parts_right = np.asarray(parts["index_right"].values).tolist()
 82 |         using_spatial_partitions = True
 83 |     else:
 84 |         # Unknown spatial partitions -> full cartesian (cross) product of all
 85 |         # combinations of the partitions of the left and right dataframe
 86 |         n_left = left.npartitions
 87 |         n_right = right.npartitions
 88 |         parts_left = np.repeat(np.arange(n_left), n_right)
 89 |         parts_right = np.tile(np.arange(n_right), n_left)
 90 |         using_spatial_partitions = False
 91 | 
 92 |     dsk = {}
 93 |     new_spatial_partitions = []
 94 |     for i, (part_left, part_right) in enumerate(zip(parts_left, parts_right)):
 95 |         dsk[(name, i)] = (
 96 |             geopandas.sjoin,
 97 |             (left._name, part_left),
 98 |             (right._name, part_right),
 99 |             how,
100 |             predicate,
101 |         )
102 |         # TODO preserve spatial partitions of the output if only left has spatial
103 |         # partitions
104 |         if using_spatial_partitions:
105 |             lr = left.spatial_partitions.iloc[part_left]
106 |             rr = right.spatial_partitions.iloc[part_right]
107 |             # extent = lr.intersection(rr).buffer(buffer).intersection(lr.union(rr))
108 |             extent = lr.intersection(rr)
109 |             new_spatial_partitions.append(extent)
110 | 
111 |     divisions = [None] * (len(dsk) + 1)
112 |     graph = HighLevelGraph.from_collections(name, dsk, dependencies=[left, right])
113 |     if using_spatial_partitions:
114 |         new_spatial_partitions = geopandas.GeoSeries(
115 |             data=new_spatial_partitions, crs=left.crs
116 |         )
117 |     else:
118 |         new_spatial_partitions = None
119 | 
120 |     result = from_graph(graph, meta, divisions, dsk.keys(), "sjoin")
121 |     result.spatial_partitions = new_spatial_partitions
122 |     return result
123 | 


--------------------------------------------------------------------------------
/dask_geopandas/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geopandas/dask-geopandas/d60b432f21361516948a70b36ab22b6486c97622/dask_geopandas/tests/__init__.py


--------------------------------------------------------------------------------
/dask_geopandas/tests/conftest.py:
--------------------------------------------------------------------------------
 1 | import os.path
 2 | from packaging.version import Version
 3 | 
 4 | import dask
 5 | 
 6 | import geopandas
 7 | 
 8 | import pytest
 9 | 
10 | # TODO update version once geopandas has a proper tag for 1.0
11 | GEOPANDAS_GE_10 = (Version(geopandas.__version__) >= Version("0.14.0+70")) and (
12 |     Version(geopandas.__version__) < Version("0.14.1")
13 | )
14 | 
15 | 
16 | # TODO Disable usage of pyarrow strings until the expected results in the tests
17 | # are updated to use those as well
18 | dask.config.set({"dataframe.convert-string": False})
19 | 
20 | 
21 | # Datasets used in our tests
22 | 
23 | _HERE = os.path.abspath(os.path.dirname(__file__))
24 | _TEST_DATA_DIR = os.path.join(_HERE, "data")
25 | _NATURALEARTH_CITIES = os.path.join(
26 |     _TEST_DATA_DIR, "naturalearth_cities", "naturalearth_cities.shp"
27 | )
28 | _NATURALEARTH_LOWRES = os.path.join(
29 |     _TEST_DATA_DIR, "naturalearth_lowres", "naturalearth_lowres.shp"
30 | )
31 | 
32 | 
33 | @pytest.fixture(scope="session")
34 | def naturalearth_lowres() -> str:
35 |     # skip if data missing, unless on github actions
36 |     if os.path.isfile(_NATURALEARTH_LOWRES) or os.getenv("GITHUB_ACTIONS"):
37 |         return _NATURALEARTH_LOWRES
38 |     else:
39 |         pytest.skip("Naturalearth lowres dataset not found")
40 | 
41 | 
42 | @pytest.fixture(scope="session")
43 | def naturalearth_cities() -> str:
44 |     # skip if data missing, unless on github actions
45 |     if os.path.isfile(_NATURALEARTH_CITIES) or os.getenv("GITHUB_ACTIONS"):
46 |         return _NATURALEARTH_CITIES
47 |     else:
48 |         pytest.skip("Naturalearth cities dataset not found")
49 | 


--------------------------------------------------------------------------------
/dask_geopandas/tests/data/README.md:
--------------------------------------------------------------------------------
1 | # Datasets previously included with geopandas
2 | 
3 | - `'naturalearth_cities'`: capital cities, based on http://www.naturalearthdata.com/downloads/10m-cultural-vectors/110m-populated-places/
4 | - `'naturalearth_lowres'`: country boundaries, based on http://www.naturalearthdata.com/downloads/110m-cultural-vectors/110m-admin-0-countries/
5 | 
6 | 


--------------------------------------------------------------------------------
/dask_geopandas/tests/data/naturalearth_cities/naturalearth_cities.VERSION.txt:
--------------------------------------------------------------------------------
1 | 2.0.0


--------------------------------------------------------------------------------
/dask_geopandas/tests/data/naturalearth_cities/naturalearth_cities.cpg:
--------------------------------------------------------------------------------
1 | ISO-8859-1


--------------------------------------------------------------------------------
/dask_geopandas/tests/data/naturalearth_cities/naturalearth_cities.dbf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geopandas/dask-geopandas/d60b432f21361516948a70b36ab22b6486c97622/dask_geopandas/tests/data/naturalearth_cities/naturalearth_cities.dbf


--------------------------------------------------------------------------------
/dask_geopandas/tests/data/naturalearth_cities/naturalearth_cities.prj:
--------------------------------------------------------------------------------
1 | GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137.0,298.257223563]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]]


--------------------------------------------------------------------------------
/dask_geopandas/tests/data/naturalearth_cities/naturalearth_cities.shp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geopandas/dask-geopandas/d60b432f21361516948a70b36ab22b6486c97622/dask_geopandas/tests/data/naturalearth_cities/naturalearth_cities.shp


--------------------------------------------------------------------------------
/dask_geopandas/tests/data/naturalearth_cities/naturalearth_cities.shx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geopandas/dask-geopandas/d60b432f21361516948a70b36ab22b6486c97622/dask_geopandas/tests/data/naturalearth_cities/naturalearth_cities.shx


--------------------------------------------------------------------------------
/dask_geopandas/tests/data/naturalearth_lowres/naturalearth_lowres.cpg:
--------------------------------------------------------------------------------
1 | ISO-8859-1


--------------------------------------------------------------------------------
/dask_geopandas/tests/data/naturalearth_lowres/naturalearth_lowres.dbf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geopandas/dask-geopandas/d60b432f21361516948a70b36ab22b6486c97622/dask_geopandas/tests/data/naturalearth_lowres/naturalearth_lowres.dbf


--------------------------------------------------------------------------------
/dask_geopandas/tests/data/naturalearth_lowres/naturalearth_lowres.prj:
--------------------------------------------------------------------------------
1 | GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137.0,298.257223563]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]]


--------------------------------------------------------------------------------
/dask_geopandas/tests/data/naturalearth_lowres/naturalearth_lowres.shp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geopandas/dask-geopandas/d60b432f21361516948a70b36ab22b6486c97622/dask_geopandas/tests/data/naturalearth_lowres/naturalearth_lowres.shp


--------------------------------------------------------------------------------
/dask_geopandas/tests/data/naturalearth_lowres/naturalearth_lowres.shx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geopandas/dask-geopandas/d60b432f21361516948a70b36ab22b6486c97622/dask_geopandas/tests/data/naturalearth_lowres/naturalearth_lowres.shx


--------------------------------------------------------------------------------
/dask_geopandas/tests/io/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geopandas/dask-geopandas/d60b432f21361516948a70b36ab22b6486c97622/dask_geopandas/tests/io/__init__.py


--------------------------------------------------------------------------------
/dask_geopandas/tests/io/conftest.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import os
  3 | import shlex
  4 | import subprocess
  5 | import sys
  6 | import time
  7 | from contextlib import contextmanager
  8 | 
  9 | import pytest
 10 | 
 11 | 
 12 | @contextmanager
 13 | def ensure_safe_environment_variables():
 14 |     """
 15 |     Get a context manager to safely set environment variables
 16 | 
 17 |     All changes will be undone on close, hence environment variables set
 18 |     within this contextmanager will neither persist nor change global state.
 19 |     """
 20 |     saved_environ = dict(os.environ)
 21 |     try:
 22 |         yield
 23 |     finally:
 24 |         os.environ.clear()
 25 |         os.environ.update(saved_environ)
 26 | 
 27 | 
 28 | @pytest.fixture(scope="session")
 29 | def s3_server():
 30 |     """
 31 |     Fixture for mocking S3 interaction.
 32 | 
 33 |     Sets up moto server in separate process
 34 |     """
 35 |     pytest.importorskip("s3fs")
 36 |     pytest.importorskip("boto3")
 37 |     pytest.importorskip("moto", minversion="1.3.14")
 38 |     pytest.importorskip("flask")  # server mode needs flask too
 39 |     requests = pytest.importorskip("requests")
 40 |     logging.getLogger("requests").disabled = True
 41 | 
 42 |     endpoint_url = "http://127.0.0.1:5555/"
 43 | 
 44 |     with ensure_safe_environment_variables():
 45 |         os.environ["AWS_ACCESS_KEY_ID"] = "testing"
 46 |         os.environ["AWS_SECRET_ACCESS_KEY"] = "testing"
 47 |         os.environ["AWS_SECURITY_TOKEN"] = "testing"
 48 |         os.environ["AWS_SESSION_TOKEN"] = "testing"
 49 | 
 50 |         # Launching moto in server mode, i.e., as a separate process
 51 |         # with an S3 endpoint on localhost
 52 | 
 53 |         # pipe to null to avoid logging in terminal
 54 |         proc = subprocess.Popen(
 55 |             shlex.split("moto_server s3 -p 5555"),
 56 |             stdout=subprocess.DEVNULL,
 57 |         )
 58 | 
 59 |         timeout = 5
 60 |         while True:
 61 |             try:
 62 |                 # OK to go once server is accepting connections
 63 |                 r = requests.get(endpoint_url)
 64 |                 if r.ok:
 65 |                     break
 66 |             except Exception:
 67 |                 pass
 68 |             timeout -= 0.1
 69 |             time.sleep(0.1)
 70 |             assert timeout > 0, "Timed out waiting for moto server"
 71 |         yield endpoint_url
 72 | 
 73 |         # shut down external process
 74 |         proc.terminate()
 75 |         try:
 76 |             proc.wait(timeout=3)
 77 |         except subprocess.TimeoutExpired:
 78 |             proc.kill()
 79 |             if sys.platform == "win32":
 80 |                 # belt & braces
 81 |                 subprocess.call("TASKKILL /F /PID {pid} /T".format(pid=proc.pid))
 82 | 
 83 | 
 84 | @pytest.fixture
 85 | def s3_storage_options():
 86 |     return {"client_kwargs": {"endpoint_url": "http://127.0.0.1:5555/"}}
 87 | 
 88 | 
 89 | @pytest.fixture()
 90 | def s3_resource(s3_server):
 91 |     """
 92 |     Sets up S3 bucket 'geopandas-test'.
 93 |     """
 94 |     endpoint_url = s3_server
 95 | 
 96 |     import boto3
 97 |     import s3fs
 98 | 
 99 |     bucket = "geopandas-test"
100 |     client = boto3.client("s3", endpoint_url=endpoint_url)
101 | 
102 |     client.create_bucket(Bucket=bucket, ACL="public-read-write")
103 | 
104 |     fs = s3fs.S3FileSystem(anon=True, client_kwargs={"endpoint_url": endpoint_url})
105 |     s3fs.S3FileSystem.clear_instance_cache()
106 |     fs.invalidate_cache()
107 | 
108 |     try:
109 |         yield fs, endpoint_url
110 |     finally:
111 |         fs.rm(bucket, recursive=True)
112 | 


--------------------------------------------------------------------------------
/dask_geopandas/tests/io/test_arrow.py:
--------------------------------------------------------------------------------
  1 | import dask.dataframe as dd
  2 | 
  3 | import geopandas
  4 | 
  5 | import dask_geopandas
  6 | 
  7 | import pytest
  8 | from geopandas.testing import assert_geodataframe_equal, assert_geoseries_equal
  9 | from pandas.testing import assert_index_equal
 10 | 
 11 | pa = pytest.importorskip("pyarrow")
 12 | ds = pytest.importorskip("pyarrow.dataset")
 13 | 
 14 | 
 15 | pytestmark = pytest.mark.filterwarnings(
 16 |     "ignore:this is an initial implementation:UserWarning"
 17 | )
 18 | 
 19 | 
 20 | def test_read(tmp_path, naturalearth_lowres):
 21 |     df = geopandas.read_file(naturalearth_lowres)
 22 | 
 23 |     # writing a partitioned dataset with geopandas (to not rely on roundtrip)
 24 |     basedir = tmp_path / "dataset"
 25 |     basedir.mkdir()
 26 |     df.iloc[:100].to_feather(basedir / "data.0.feather")
 27 |     df.iloc[100:].to_feather(basedir / "data.1.feather")
 28 | 
 29 |     result = dask_geopandas.read_feather(basedir)
 30 |     assert isinstance(result, dask_geopandas.GeoDataFrame)
 31 |     assert result.npartitions == 2
 32 |     assert result.crs == df.crs
 33 |     assert result.spatial_partitions is not None
 34 |     # TODO this reset_index should not be necessary
 35 |     result_gpd = result.compute().reset_index(drop=True)
 36 |     assert_geodataframe_equal(result_gpd, df)
 37 | 
 38 | 
 39 | def test_write(tmp_path, naturalearth_lowres):
 40 |     df = geopandas.read_file(naturalearth_lowres)
 41 |     ddf = dask_geopandas.from_geopandas(df, npartitions=4)
 42 | 
 43 |     basedir = tmp_path / "dataset"
 44 |     ddf.to_feather(basedir)
 45 | 
 46 |     # each partition (4) is written as a feather file
 47 |     paths = list(basedir.glob("*.feather"))
 48 |     assert len(paths) == 4
 49 | 
 50 |     # each individual file is a valid feather file
 51 |     result_part0 = geopandas.read_feather(basedir / "part.0.feather")
 52 |     result_part0.index.name = None
 53 |     assert_geodataframe_equal(result_part0, df.iloc[:45])
 54 | 
 55 |     # TODO geopandas doesn't actually support this for "feather" format
 56 |     # # the written dataset is also readable by plain geopandas
 57 |     # result_gpd = geopandas.read_feather(basedir)
 58 |     # # the dataset written by dask has "__null_dask_index__" index column name
 59 |     # result_gpd.index.name = None
 60 |     # assert_geodataframe_equal(result_gpd, df)
 61 | 
 62 | 
 63 | @pytest.mark.xfail  # https://github.com/dask/dask/issues/8022
 64 | def test_write_delayed(tmp_path, naturalearth_lowres):
 65 |     df = geopandas.read_file(naturalearth_lowres)
 66 |     ddf = dask_geopandas.from_geopandas(df, npartitions=4)
 67 | 
 68 |     basedir = tmp_path / "dataset"
 69 |     dataset = ddf.to_feather(basedir, compute=False)
 70 |     dataset.compute()
 71 |     result = dask_geopandas.read_feather(basedir)
 72 |     assert result.npartitions == 4
 73 |     # TODO this reset_index should not be necessary
 74 |     result_gpd = result.compute().reset_index(drop=True)
 75 |     assert_geodataframe_equal(result_gpd, df)
 76 | 
 77 | 
 78 | def test_roundtrip(tmp_path, naturalearth_lowres):
 79 |     # basic roundtrip
 80 |     df = geopandas.read_file(naturalearth_lowres)
 81 |     ddf = dask_geopandas.from_geopandas(df, npartitions=4)
 82 | 
 83 |     basedir = tmp_path / "dataset"
 84 |     ddf.to_feather(basedir)
 85 | 
 86 |     # reading back gives identical GeoDataFrame
 87 |     result = dask_geopandas.read_feather(basedir)
 88 |     assert result.npartitions == 4
 89 |     assert result.crs == df.crs
 90 |     # TODO this reset_index should not be necessary
 91 |     result_gpd = result.compute().reset_index(drop=True)
 92 |     assert_geodataframe_equal(result_gpd, df)
 93 |     # reading back also populates the spatial partitioning property
 94 |     ddf.calculate_spatial_partitions()
 95 |     assert_geoseries_equal(result.spatial_partitions, ddf.spatial_partitions.envelope)
 96 | 
 97 | 
 98 | def test_roundtrip_s3(s3_resource, s3_storage_options, naturalearth_lowres):
 99 |     fs, endpoint_url = s3_resource
100 | 
101 |     # basic roundtrip to S3
102 |     df = geopandas.read_file(naturalearth_lowres)
103 |     ddf = dask_geopandas.from_geopandas(df, npartitions=4)
104 | 
105 |     uri = "s3://geopandas-test/dataset.feather"
106 |     ddf.to_feather(uri, storage_options=s3_storage_options)
107 | 
108 |     # reading back gives identical GeoDataFrame
109 |     result = dask_geopandas.read_feather(uri, storage_options=s3_storage_options)
110 |     assert result.npartitions == 4
111 |     assert_geodataframe_equal(result.compute().reset_index(drop=True), df)
112 |     # reading back correctly sets the CRS in meta
113 |     assert result.crs == df.crs
114 |     # reading back also populates the spatial partitioning property
115 |     assert result.spatial_partitions is not None
116 | 
117 | 
118 | def test_column_selection_push_down(tmp_path, naturalearth_lowres):
119 |     # set up dataset
120 |     df = geopandas.read_file(naturalearth_lowres)
121 |     ddf = dask_geopandas.from_geopandas(df, npartitions=4)
122 |     basedir = tmp_path / "dataset"
123 |     # TODO awaiting a `to_feather` implementation
124 |     # ddf.to_feather(basedir)
125 |     basedir.mkdir()
126 |     for i, part in enumerate(ddf.partitions):
127 |         part.compute().to_feather(basedir / f"part.{i}.feather")
128 | 
129 |     ddf = dask_geopandas.read_feather(basedir)
130 | 
131 |     # selecting columns including geometry column still gives GeoDataFrame
132 |     ddf_subset = ddf[["pop_est", "geometry"]]
133 |     assert type(ddf_subset) is dask_geopandas.GeoDataFrame
134 |     # and also preserves the spatial partitioning information
135 |     assert ddf_subset.spatial_partitions is not None
136 | 
137 |     # selecting a single non-geometry column on the dataframe should work
138 |     s = ddf["pop_est"]
139 |     assert type(s) is dd.Series
140 |     assert s.max().compute() == df["pop_est"].max()
141 | 
142 | 
143 | def test_missing_metadata(tmp_path, naturalearth_lowres):
144 |     df = geopandas.read_file(naturalearth_lowres)
145 |     path = tmp_path / "test.feather"
146 | 
147 |     # convert to DataFrame with wkb -> writing to feather will have only pandas metadata
148 |     df = df.to_wkb()
149 |     df.to_feather(path)
150 | 
151 |     with pytest.raises(ValueError, match="Missing geo metadata"):
152 |         dask_geopandas.read_feather(path)
153 | 
154 |     # remove metadata completely
155 |     from pyarrow import feather
156 | 
157 |     table = feather.read_table(path)
158 |     feather.write_feather(table.replace_schema_metadata(), path)
159 | 
160 |     with pytest.raises(ValueError, match="Missing geo metadata"):
161 |         dask_geopandas.read_feather(path)
162 | 
163 | 
164 | @pytest.mark.parametrize(
165 |     "filter", [[("continent", "=", "Africa")], ds.field("continent") == "Africa"]
166 | )
167 | def test_filters(tmp_path, naturalearth_lowres, filter):
168 |     # set up dataset
169 |     df = geopandas.read_file(naturalearth_lowres)
170 |     ddf = dask_geopandas.from_geopandas(df, npartitions=4)
171 |     basedir = tmp_path / "dataset"
172 |     ddf.to_feather(basedir)
173 | 
174 |     # specifying filters argument
175 |     result = dask_geopandas.read_feather(basedir, filters=filter)
176 |     assert result.npartitions == 4
177 | 
178 |     result_gpd = result.compute().reset_index(drop=True)
179 |     expected = df[df["continent"] == "Africa"].reset_index(drop=True)
180 |     assert_geodataframe_equal(result_gpd, expected)
181 | 
182 | 
183 | def test_index(tmp_path, naturalearth_lowres):
184 |     # set up dataset
185 |     df = geopandas.read_file(naturalearth_lowres)
186 |     # get meaningful index by shuffling (hilbert distance)
187 |     df = dask_geopandas.from_geopandas(df, npartitions=2).spatial_shuffle().compute()
188 |     ddf = dask_geopandas.from_geopandas(df, npartitions=4)
189 | 
190 |     # roundtrip preserves the index by default
191 |     basedir = tmp_path / "dataset"
192 |     ddf.to_feather(basedir)
193 |     result = dask_geopandas.read_feather(basedir)
194 |     assert "hilbert_distance" not in result.columns
195 |     assert result.index.name == "hilbert_distance"
196 |     assert_index_equal(result.index.compute(), df.index)
197 | 
198 |     # TODO not setting the index
199 |     with pytest.raises(NotImplementedError):
200 |         result = dask_geopandas.read_feather(basedir, index=False)
201 |     # assert "hilbert_distance" in result.columns
202 |     # assert result.index.name is None
203 | 
204 |     # setting specific columns as the index
205 |     result = dask_geopandas.read_feather(basedir, index="iso_a3")
206 |     assert "iso_a3" not in result.columns
207 |     assert result.index.name == "iso_a3"
208 |     assert_geodataframe_equal(result.compute(), df.set_index("iso_a3"))
209 | 
210 |     # not writing the index
211 |     basedir = tmp_path / "dataset"
212 |     ddf.to_feather(basedir, write_index=False)
213 |     result = dask_geopandas.read_feather(basedir)
214 |     assert "hilbert_distance" not in result.columns
215 |     assert result.index.name is None
216 |     assert result.index.compute()[0] == 0
217 | 
218 | 
219 | def test_read_meta_is_empty(tmp_path, naturalearth_lowres):
220 |     df = geopandas.read_file(naturalearth_lowres)
221 | 
222 |     basedir = tmp_path / "dataset"
223 |     basedir.mkdir()
224 |     df.iloc[:100].to_feather(basedir / "data.0.feather")
225 |     df.iloc[100:].to_feather(basedir / "data.1.feather")
226 | 
227 |     result = dask_geopandas.read_feather(basedir)
228 |     assert len(result._meta) == 0
229 | 


--------------------------------------------------------------------------------
/dask_geopandas/tests/io/test_backend_integration.py:
--------------------------------------------------------------------------------
 1 | import geopandas
 2 | 
 3 | import dask_geopandas
 4 | 
 5 | import pytest
 6 | from geopandas.testing import assert_geodataframe_equal
 7 | 
 8 | try:
 9 |     import pyogrio  # noqa: F401
10 | 
11 |     PYOGRIO = True
12 | except ImportError:
13 |     PYOGRIO = False
14 | 
15 | BACKENDS = ["arrow", "file", "parquet"]
16 | 
17 | 
18 | @pytest.fixture(params=BACKENDS)
19 | def backend(request):
20 |     param = request.param
21 |     if not PYOGRIO and param == "file":
22 |         pytest.skip("Unable to import pyogrio for file backend")
23 |     return param
24 | 
25 | 
26 | def from_arrow_backend(path, tmp_path, npartitions):
27 |     df = geopandas.read_file(path)
28 |     basedir = tmp_path / "dataset"
29 |     basedir.mkdir()
30 |     ddf = dask_geopandas.from_geopandas(df, npartitions=npartitions)
31 |     for i, part in enumerate(ddf.partitions):
32 |         part.compute().to_feather(basedir / f"data.{i}.feather")
33 |     return dask_geopandas.read_feather(basedir)
34 | 
35 | 
36 | def from_file_backend(path, tmp_path, npartitions):
37 |     return dask_geopandas.read_file(path, npartitions=npartitions)
38 | 
39 | 
40 | def from_parquet_backend(path, tmp_path, npartitions):
41 |     ddf = dask_geopandas.from_geopandas(
42 |         geopandas.read_file(path), npartitions=npartitions
43 |     )
44 |     basedir = tmp_path / "dataset"
45 |     ddf.to_parquet(basedir)
46 |     return dask_geopandas.read_parquet(basedir)
47 | 
48 | 
49 | def get_from_backend(backend, data_path, tmp_path, npartitions=4):
50 |     if backend == "arrow":
51 |         ddf = from_arrow_backend(data_path, tmp_path, npartitions)
52 |     elif backend == "file":
53 |         ddf = from_file_backend(data_path, tmp_path, npartitions)
54 |     elif backend == "parquet":
55 |         ddf = from_parquet_backend(data_path, tmp_path, npartitions)
56 |     else:
57 |         raise ValueError()
58 |     return ddf
59 | 
60 | 
61 | def test_spatial_shuffle_integration(backend, naturalearth_lowres, tmp_path):
62 |     ddf = get_from_backend(backend, naturalearth_lowres, tmp_path)
63 |     new_idx = ddf.hilbert_distance()
64 |     expected = ddf.compute().set_index(new_idx.compute())
65 | 
66 |     result = ddf.spatial_shuffle()
67 |     # Sort because the index is shuffled
68 |     assert_geodataframe_equal(result.compute().sort_index(), expected.sort_index())
69 | 


--------------------------------------------------------------------------------
/dask_geopandas/tests/io/test_file.py:
--------------------------------------------------------------------------------
  1 | import random
  2 | 
  3 | import pandas as pd
  4 | 
  5 | import dask.dataframe as dd
  6 | 
  7 | import geopandas
  8 | from shapely.geometry import Polygon
  9 | 
 10 | import dask_geopandas
 11 | 
 12 | import pytest
 13 | from geopandas.testing import assert_geodataframe_equal, assert_geoseries_equal
 14 | from pandas.testing import assert_frame_equal, assert_series_equal
 15 | 
 16 | pytest.importorskip("pyogrio")
 17 | 
 18 | 
 19 | def test_read_file(naturalearth_lowres):
 20 |     path = naturalearth_lowres
 21 |     df = geopandas.read_file(path)
 22 |     result = dask_geopandas.read_file(path, npartitions=4)
 23 |     assert isinstance(result, dask_geopandas.GeoDataFrame)
 24 |     assert result.npartitions == 4
 25 |     assert result.crs == df.crs
 26 |     assert_geodataframe_equal(result.compute(), df)
 27 | 
 28 |     result = dask_geopandas.read_file(path, chunksize=100)
 29 |     assert isinstance(result, dask_geopandas.GeoDataFrame)
 30 |     assert result.npartitions == 2
 31 |     assert result.crs == df.crs
 32 |     assert_geodataframe_equal(result.compute(), df)
 33 | 
 34 |     msg = "Exactly one of npartitions and chunksize must be specified"
 35 |     with pytest.raises(ValueError, match=msg):
 36 |         dask_geopandas.read_file(path)
 37 |     with pytest.raises(ValueError, match=msg):
 38 |         dask_geopandas.read_file(path, npartitions=4, chunksize=100)
 39 | 
 40 | 
 41 | def test_read_file_divisions(naturalearth_lowres):
 42 |     path = naturalearth_lowres
 43 |     result = dask_geopandas.read_file(path, npartitions=4)
 44 |     assert result.known_divisions
 45 |     assert result.index.divisions == (0, 45, 90, 135, 176)
 46 |     assert result.divisions == (0, 45, 90, 135, 176)
 47 | 
 48 | 
 49 | def test_read_file_index(naturalearth_lowres):
 50 |     path = naturalearth_lowres
 51 |     df = geopandas.read_file(path)
 52 |     result = dask_geopandas.read_file(path, npartitions=4)
 53 |     assert (result.index.compute() == pd.RangeIndex(0, len(df))).all()
 54 | 
 55 | 
 56 | def test_read_file_columns(naturalearth_lowres):
 57 |     path = naturalearth_lowres
 58 |     df = geopandas.read_file(path)
 59 | 
 60 |     # explicit column selection
 61 |     result = dask_geopandas.read_file(
 62 |         path, npartitions=4, columns=["pop_est", "geometry"]
 63 |     )
 64 |     assert isinstance(result, dask_geopandas.GeoDataFrame)
 65 |     assert result.npartitions == 4
 66 |     assert result.crs == df.crs
 67 |     assert len(result.columns) == 2
 68 |     assert_geodataframe_equal(result.compute(), df[["pop_est", "geometry"]])
 69 |     # only selecting non-geometry column
 70 |     result = dask_geopandas.read_file(path, npartitions=4, columns=["pop_est"])
 71 |     assert type(result) == dd.DataFrame
 72 |     assert len(result.columns) == 1
 73 |     assert result.npartitions == 4
 74 |     assert_frame_equal(result.compute(), df[["pop_est"]])
 75 | 
 76 |     # column selection through getitem
 77 |     ddf = dask_geopandas.read_file(path, npartitions=4)
 78 |     result = ddf[["pop_est", "geometry"]]
 79 |     assert isinstance(result, dask_geopandas.GeoDataFrame)
 80 |     assert result.npartitions == 4
 81 |     assert result.crs == df.crs
 82 |     assert_geodataframe_equal(result.compute(), df[["pop_est", "geometry"]])
 83 | 
 84 |     # only select non-geometry column
 85 |     result = ddf["pop_est"]
 86 |     assert isinstance(result, dd.Series)
 87 |     assert_series_equal(result.compute(), df["pop_est"])
 88 | 
 89 |     # only select geometry column
 90 |     result = ddf["geometry"]
 91 |     assert isinstance(result, dask_geopandas.GeoSeries)
 92 |     assert_geoseries_equal(result.compute(), df["geometry"])
 93 | 
 94 | 
 95 | def test_read_file_meta_is_empty(naturalearth_lowres):
 96 |     path = naturalearth_lowres
 97 |     result = dask_geopandas.read_file(path, npartitions=4)
 98 |     assert len(result._meta) == 0
 99 | 
100 | 
101 | def test_read_file_layer(tmp_path):
102 |     df_points = geopandas.GeoDataFrame(
103 |         {
104 |             "col": [1, 2, 3, 4],
105 |             "geometry": geopandas.points_from_xy([1, 2, 3, 4], [2, 3, 4, 1]),
106 |         },
107 |         crs=4326,
108 |     )
109 |     df_polygons = geopandas.GeoDataFrame(
110 |         {
111 |             "col": [5, 6, 7, 8],
112 |             "geometry": [
113 |                 Polygon([(random.random(), random.random()) for i in range(3)])
114 |                 for _ in range(4)
115 |             ],
116 |         },
117 |         crs=4326,
118 |     )
119 | 
120 |     path = tmp_path / "test_layers.gpkg"
121 |     df_points.to_file(path, layer="points")
122 |     df_polygons.to_file(path, layer="polygons")
123 | 
124 |     ddf_points = dask_geopandas.read_file(path, npartitions=2, layer="points")
125 |     assert_geodataframe_equal(ddf_points.compute(), df_points)
126 |     ddf_polygons = dask_geopandas.read_file(path, npartitions=2, layer="polygons")
127 |     assert_geodataframe_equal(ddf_polygons.compute(), df_polygons)
128 | 


--------------------------------------------------------------------------------
/dask_geopandas/tests/io/test_parquet.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | 
  3 | import dask.dataframe as dd
  4 | 
  5 | import geopandas
  6 | import shapely
  7 | 
  8 | import dask_geopandas
  9 | 
 10 | import pytest
 11 | from geopandas.testing import assert_geodataframe_equal
 12 | from pandas.testing import assert_series_equal
 13 | 
 14 | pa = pytest.importorskip("pyarrow")
 15 | 
 16 | 
 17 | pytestmark = pytest.mark.filterwarnings(
 18 |     "ignore:this is an initial implementation:UserWarning"
 19 | )
 20 | 
 21 | 
 22 | def test_parquet_roundtrip(tmp_path, naturalearth_lowres):
 23 |     # basic roundtrip
 24 |     df = geopandas.read_file(naturalearth_lowres)
 25 |     ddf = dask_geopandas.from_geopandas(df, npartitions=4)
 26 | 
 27 |     basedir = tmp_path / "dataset"
 28 |     ddf.to_parquet(basedir)
 29 | 
 30 |     # each partition (4) is written as parquet file
 31 |     paths = list(basedir.glob("*.parquet"))
 32 |     assert len(paths) == 4
 33 | 
 34 |     # reading back gives identical GeoDataFrame
 35 |     result = dask_geopandas.read_parquet(basedir)
 36 |     assert result.npartitions == 4
 37 |     assert_geodataframe_equal(result.compute(), df)
 38 |     # reading back correctly sets the CRS in meta
 39 |     assert result.crs == df.crs
 40 |     # reading back also populates the spatial partitioning property
 41 |     assert result.spatial_partitions is not None
 42 |     assert result.spatial_partitions.crs == df.crs
 43 | 
 44 |     # the written dataset is also readable by plain geopandas
 45 |     result_gpd = geopandas.read_parquet(basedir)
 46 |     # the dataset written by dask has "__null_dask_index__" index column name
 47 |     result_gpd.index.name = None
 48 |     assert_geodataframe_equal(result_gpd, df)
 49 | 
 50 |     result_part0 = geopandas.read_parquet(basedir / "part.0.parquet")
 51 |     result_part0.index.name = None
 52 |     assert_geodataframe_equal(result_part0, df.iloc[:45])
 53 | 
 54 | 
 55 | def test_roundtrip_geometry_column_name(tmp_path, naturalearth_lowres):
 56 |     # basic roundtrip with different geometry column name
 57 |     df = geopandas.read_file(naturalearth_lowres)
 58 |     df = df.rename_geometry("geom")
 59 | 
 60 |     # geopandas -> dask-geopandas roundtrip
 61 |     path = tmp_path / "data.parquet"
 62 |     df.to_parquet(path)
 63 |     result = dask_geopandas.read_parquet(path)
 64 |     assert isinstance(result, dask_geopandas.GeoDataFrame)
 65 |     assert result.geometry.name == "geom"
 66 |     assert result.crs == df.crs
 67 |     assert result.spatial_partitions is not None
 68 |     assert_geodataframe_equal(result.compute(), df)
 69 | 
 70 |     # dask-geopandas -> dask-geopandas roundtrip
 71 |     ddf = dask_geopandas.from_geopandas(df, npartitions=4)
 72 |     assert ddf.geometry.name == "geom"
 73 |     basedir = tmp_path / "dataset"
 74 |     ddf.to_parquet(basedir)
 75 | 
 76 |     result = dask_geopandas.read_parquet(basedir)
 77 |     assert isinstance(result, dask_geopandas.GeoDataFrame)
 78 |     assert result.geometry.name == "geom"
 79 |     assert result.crs == df.crs
 80 |     assert result.spatial_partitions is not None
 81 |     assert_geodataframe_equal(result.compute(), df)
 82 | 
 83 | 
 84 | def test_roundtrip_multiple_geometry_columns(tmp_path, naturalearth_lowres):
 85 |     # basic roundtrip with different geometry column name
 86 |     df = geopandas.read_file(naturalearth_lowres)
 87 |     df["geometry2"] = df.geometry.representative_point().to_crs("EPSG:3857")
 88 |     ddf = dask_geopandas.from_geopandas(df, npartitions=4)
 89 | 
 90 |     basedir = tmp_path / "dataset"
 91 |     ddf.to_parquet(basedir)
 92 | 
 93 |     result = dask_geopandas.read_parquet(basedir)
 94 |     assert isinstance(result, dask_geopandas.GeoDataFrame)
 95 |     assert result.crs == df.crs
 96 |     assert result.spatial_partitions is not None
 97 |     assert_geodataframe_equal(result.compute(), df)
 98 | 
 99 |     # ensure the geometry2 column is also considered as geometry in meta
100 |     assert_series_equal(result.dtypes, df.dtypes)
101 |     assert isinstance(result["geometry2"], dask_geopandas.GeoSeries)
102 |     assert result["geometry"].crs == "EPSG:4326"
103 |     assert result["geometry2"].crs == "EPSG:3857"
104 | 
105 | 
106 | def test_column_selection_push_down(tmp_path, naturalearth_lowres):
107 |     # set up dataset
108 |     df = geopandas.read_file(naturalearth_lowres)
109 |     ddf = dask_geopandas.from_geopandas(df, npartitions=4)
110 |     basedir = tmp_path / "dataset"
111 |     ddf.to_parquet(basedir)
112 | 
113 |     ddf = dask_geopandas.read_parquet(basedir)
114 | 
115 |     # selecting columns including geometry column still gives GeoDataFrame
116 |     ddf_subset = ddf[["pop_est", "geometry"]]
117 |     assert type(ddf_subset) is dask_geopandas.GeoDataFrame
118 |     # and also preserves the spatial partitioning information
119 |     assert ddf_subset.spatial_partitions is not None
120 | 
121 |     # selecting a single non-geometry column on the dataframe should work
122 |     s = ddf["pop_est"]
123 |     assert type(s) is dd.Series
124 |     assert s.max().compute() == df["pop_est"].max()
125 | 
126 | 
127 | def test_parquet_roundtrip_s3(s3_resource, s3_storage_options, naturalearth_lowres):
128 |     fs, endpoint_url = s3_resource
129 | 
130 |     # basic roundtrip
131 |     df = geopandas.read_file(naturalearth_lowres)
132 |     ddf = dask_geopandas.from_geopandas(df, npartitions=4)
133 | 
134 |     uri = "s3://geopandas-test/dataset.parquet"
135 |     ddf.to_parquet(uri, storage_options=s3_storage_options)
136 | 
137 |     # reading back gives identical GeoDataFrame
138 |     result = dask_geopandas.read_parquet(uri, storage_options=s3_storage_options)
139 |     assert result.npartitions == 4
140 |     assert_geodataframe_equal(result.compute(), df)
141 |     # reading back correctly sets the CRS in meta
142 |     assert result.crs == df.crs
143 |     # reading back also populates the spatial partitioning property
144 |     assert result.spatial_partitions is not None
145 | 
146 | 
147 | def test_parquet_empty_partitions(tmp_path, naturalearth_lowres):
148 |     df = geopandas.read_file(naturalearth_lowres)
149 |     # Creating filtered dask dataframe with at least one empty partition
150 |     ddf = dask_geopandas.from_geopandas(df, npartitions=4)
151 |     ddf_filtered = ddf[ddf["pop_est"] > 1_000_000_000]
152 |     assert (ddf_filtered.map_partitions(len).compute() == 0).any()
153 | 
154 |     basedir = tmp_path / "dataset"
155 |     # TODO don't write metadata file as that fails with empty partitions on
156 |     # inferring the schema
157 |     ddf_filtered.to_parquet(basedir, write_metadata_file=False)
158 | 
159 |     result = dask_geopandas.read_parquet(basedir)
160 |     assert_geodataframe_equal(result.compute(), df[df["pop_est"] > 1_000_000_000])
161 |     # once one partition has no spatial extent, we don't restore the spatial partitions
162 |     assert result.spatial_partitions is None
163 | 
164 | 
165 | def test_parquet_partitions_with_all_missing_strings(tmp_path):
166 |     df = geopandas.GeoDataFrame(
167 |         {"col": ["a", "b", None, None]},
168 |         geometry=geopandas.points_from_xy([0, 1, 2, 3], [0, 1, 2, 3]),
169 |     )
170 |     # Creating filtered dask dataframe with at least one empty partition
171 |     ddf = dask_geopandas.from_geopandas(df, npartitions=2)
172 | 
173 |     basedir = tmp_path / "dataset"
174 |     ddf.to_parquet(basedir)
175 | 
176 |     result = dask_geopandas.read_parquet(basedir)
177 |     assert_geodataframe_equal(result.compute(), df)
178 | 
179 | 
180 | def test_parquet_empty_dataset(tmp_path):
181 |     # ensure informative error message if there are no parts (otherwise
182 |     # will raise in not finding any geo metadata)
183 |     with pytest.raises(ValueError, match="No dataset parts discovered"):
184 |         dask_geopandas.read_parquet(tmp_path / "data.*.parquet")
185 | 
186 | 
187 | @pytest.mark.parametrize("write_metadata_file", [True, False])
188 | def test_parquet_partition_on(tmp_path, naturalearth_lowres, write_metadata_file):
189 |     df = geopandas.read_file(naturalearth_lowres)
190 |     ddf = dask_geopandas.from_geopandas(df, npartitions=4)
191 | 
192 |     # Writing a partitioned dataset based on one of the attribute columns
193 |     basedir = tmp_path / "naturalearth_lowres_by_continent.parquet"
194 |     ddf.to_parquet(
195 |         basedir, partition_on="continent", write_metadata_file=write_metadata_file
196 |     )
197 | 
198 |     # Check for one of the partitions that the file is present and is correct
199 |     n_files = 10 if write_metadata_file else 8  # 8 continents + 2 metadata files
200 |     assert len(list(basedir.iterdir())) == n_files
201 |     assert (basedir / "continent=Africa").exists()
202 |     result_africa = geopandas.read_parquet(basedir / "continent=Africa")
203 |     expected = df[df["continent"] == "Africa"].drop(columns=["continent"])
204 |     result_africa.index.name = None
205 |     assert_geodataframe_equal(result_africa, expected)
206 | 
207 |     # Check roundtrip
208 |     result = dask_geopandas.read_parquet(basedir)
209 |     assert result.npartitions >= 8
210 |     assert result.spatial_partitions is not None
211 |     expected = df.copy()
212 |     expected["continent"] = expected["continent"].astype("category")
213 |     assert_geodataframe_equal(result.compute(), expected, check_like=True)
214 | 
215 | 
216 | def test_no_gather_spatial_partitions(tmp_path, naturalearth_lowres):
217 |     # basic roundtrip
218 |     df = geopandas.read_file(naturalearth_lowres)
219 |     ddf = dask_geopandas.from_geopandas(df, npartitions=4)
220 | 
221 |     basedir = tmp_path / "dataset"
222 |     ddf.to_parquet(basedir)
223 | 
224 |     result = dask_geopandas.read_parquet(basedir, gather_spatial_partitions=False)
225 |     assert result.spatial_partitions is None
226 |     assert result.crs == df.crs
227 | 
228 | 
229 | def test_read_parquet_default_crs(tmp_path):
230 |     pyproj = pytest.importorskip("pyproj")
231 |     import pyarrow.parquet as pq
232 | 
233 |     from geopandas.io.arrow import _geopandas_to_arrow
234 | 
235 |     gdf = geopandas.GeoDataFrame(geometry=[shapely.box(0, 0, 10, 10)])
236 |     gdf["other_geom"] = gdf["geometry"].centroid
237 |     table = _geopandas_to_arrow(gdf)
238 |     # update the geo metadata to strip 'crs' entry
239 |     metadata = table.schema.metadata
240 |     geo_metadata = json.loads(metadata[b"geo"].decode("utf-8"))
241 |     del geo_metadata["columns"]["geometry"]["crs"]
242 |     del geo_metadata["columns"]["other_geom"]["crs"]
243 |     metadata.update({b"geo": json.dumps(geo_metadata).encode("utf-8")})
244 |     table = table.replace_schema_metadata(metadata)
245 |     filename = str(tmp_path / "test.parquet")
246 |     pq.write_table(table, filename)
247 | 
248 |     result = dask_geopandas.read_parquet(filename)
249 |     assert result.crs.equals(pyproj.CRS("OGC:CRS84"))
250 |     assert result["other_geom"].crs.equals(pyproj.CRS("OGC:CRS84"))
251 | 
252 | 
253 | def test_read_parquet_meta_is_empty(tmp_path, naturalearth_lowres):
254 |     # basic roundtrip
255 |     df = geopandas.read_file(naturalearth_lowres)
256 |     ddf = dask_geopandas.from_geopandas(df, npartitions=4)
257 | 
258 |     basedir = tmp_path / "dataset"
259 |     ddf.to_parquet(basedir)
260 | 
261 |     result = dask_geopandas.read_parquet(basedir)
262 |     assert len(result._meta) == 0
263 | 


--------------------------------------------------------------------------------
/dask_geopandas/tests/test_clip.py:
--------------------------------------------------------------------------------
 1 | import geopandas
 2 | 
 3 | import dask_geopandas
 4 | 
 5 | from .test_core import geodf_points  # noqa: F401
 6 | 
 7 | import pytest
 8 | from geopandas.testing import assert_geodataframe_equal, assert_geoseries_equal
 9 | 
10 | 
11 | def test_clip(naturalearth_lowres, naturalearth_cities):
12 |     cities = geopandas.read_file(naturalearth_cities)
13 |     dask_obj = dask_geopandas.from_geopandas(cities, npartitions=4)
14 |     dask_obj.calculate_spatial_partitions()
15 |     mask = geopandas.read_file(naturalearth_lowres).query("continent == 'Africa'")
16 |     expected = geopandas.clip(cities, mask)
17 |     clipped = dask_geopandas.clip(dask_obj, mask)
18 | 
19 |     assert isinstance(clipped.spatial_partitions, geopandas.GeoSeries)
20 | 
21 |     result = clipped.compute()
22 |     assert_geodataframe_equal(expected.sort_index(), result.sort_index())
23 | 
24 | 
25 | def test_clip_no_spatial_partitions(geodf_points):  # noqa: F811
26 |     dask_obj = dask_geopandas.from_geopandas(geodf_points, npartitions=2)
27 |     mask = geodf_points.iloc[:1]
28 |     mask["geometry"] = mask["geometry"].buffer(2)
29 |     expected = geodf_points.iloc[:2]
30 |     result = dask_geopandas.clip(dask_obj, mask).compute()
31 |     assert_geodataframe_equal(expected, result)
32 | 
33 | 
34 | def test_clip_dask_mask(geodf_points):  # noqa: F811
35 |     dask_obj = dask_geopandas.from_geopandas(geodf_points, npartitions=2)
36 |     mask = dask_geopandas.from_geopandas(geodf_points.iloc[:1], npartitions=1)
37 |     with pytest.raises(
38 |         NotImplementedError, match=r"Mask cannot be a Dask GeoDataFrame or GeoSeries."
39 |     ):
40 |         dask_geopandas.clip(dask_obj, mask)
41 | 
42 | 
43 | def test_clip_geoseries(geodf_points):  # noqa: F811
44 |     dask_obj = dask_geopandas.from_geopandas(geodf_points, npartitions=2)
45 |     dask_obj.calculate_spatial_partitions()
46 |     mask = geodf_points.iloc[:1]
47 |     mask["geometry"] = mask["geometry"].buffer(2)
48 |     expected = geopandas.clip(geodf_points.geometry, mask)
49 |     result = dask_geopandas.clip(dask_obj.geometry, mask).compute()
50 |     assert_geoseries_equal(expected, result)
51 | 


--------------------------------------------------------------------------------
/dask_geopandas/tests/test_distributed.py:
--------------------------------------------------------------------------------
 1 | from packaging.version import Version
 2 | 
 3 | import geopandas
 4 | 
 5 | import dask_geopandas
 6 | 
 7 | import pytest
 8 | from geopandas.testing import assert_geodataframe_equal
 9 | 
10 | distributed = pytest.importorskip("distributed")
11 | 
12 | 
13 | from distributed import Client, LocalCluster
14 | 
15 | 
16 | @pytest.mark.skipif(
17 |     Version(distributed.__version__) < Version("2024.6.0"),
18 |     reason="distributed < 2024.6 has a wrong assertion",
19 |     # https://github.com/dask/distributed/pull/8667
20 | )
21 | @pytest.mark.skipif(
22 |     Version(distributed.__version__) < Version("0.13"),
23 |     reason="geopandas < 0.13 does not implement sorting geometries",
24 | )
25 | def test_spatial_shuffle(naturalearth_cities):
26 |     df_points = geopandas.read_file(naturalearth_cities)
27 | 
28 |     with LocalCluster(n_workers=1) as cluster:
29 |         with Client(cluster):
30 |             ddf_points = dask_geopandas.from_geopandas(df_points, npartitions=4)
31 | 
32 |             ddf_result = ddf_points.spatial_shuffle(
33 |                 by="hilbert", calculate_partitions=False
34 |             )
35 |             result = ddf_result.compute()
36 | 
37 |     expected = df_points.sort_values("geometry").reset_index(drop=True)
38 |     assert_geodataframe_equal(result.reset_index(drop=True), expected)
39 | 


--------------------------------------------------------------------------------
/dask_geopandas/tests/test_geohash.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pandas as pd
  3 | 
  4 | import geopandas
  5 | from shapely.geometry import LineString, Point, Polygon
  6 | from shapely.wkt import loads
  7 | 
  8 | from dask_geopandas import from_geopandas
  9 | from dask_geopandas.geohash import _calculate_mid_points
 10 | 
 11 | import pytest
 12 | from numpy.testing import assert_array_equal
 13 | from pandas.testing import assert_index_equal
 14 | 
 15 | 
 16 | @pytest.fixture
 17 | def geoseries_points():
 18 |     p1 = Point(1, 2)
 19 |     p2 = Point(2, 3)
 20 |     p3 = Point(3, 4)
 21 |     p4 = Point(4, 1)
 22 |     return geopandas.GeoSeries([p1, p2, p3, p4])
 23 | 
 24 | 
 25 | @pytest.fixture
 26 | def geoseries_lines():
 27 |     l1 = LineString([(0, 0), (0, 1), (1, 1)])
 28 |     l2 = LineString([(0, 0), (1, 0), (1, 1), (0, 1)])
 29 |     return geopandas.GeoSeries([l1, l2] * 2)
 30 | 
 31 | 
 32 | @pytest.fixture()
 33 | def geoseries_polygons():
 34 |     t1 = Polygon([(0, 3.5), (7, 2.4), (1, 0.1)])
 35 |     t2 = Polygon([(0, 0), (1, 1), (0, 1)])
 36 |     sq1 = Polygon([(0, 0), (1, 0), (1, 1), (0, 1)])
 37 |     sq2 = Polygon([(0, 0), (1, 0), (1, 2), (0, 2)])
 38 |     return geopandas.GeoSeries([t1, t2, sq1, sq2])
 39 | 
 40 | 
 41 | def geohash_dask(geoseries):
 42 |     pygeohash = pytest.importorskip("pygeohash")
 43 | 
 44 |     p = 12
 45 |     as_string = True
 46 |     bounds = geoseries.bounds.to_numpy()
 47 |     x_mids, y_mids = _calculate_mid_points(bounds)
 48 | 
 49 |     geohash_vec = np.vectorize(pygeohash.encode)
 50 |     # Encode mid points of geometries using geohash
 51 |     expected = geohash_vec(y_mids, x_mids, p)
 52 | 
 53 |     ddf = from_geopandas(geoseries, npartitions=1)
 54 |     result = ddf.geohash(precision=p, as_string=as_string).compute()
 55 | 
 56 |     assert_array_equal(np.array(result), expected)
 57 |     assert isinstance(result, pd.Series)
 58 |     assert_index_equal(ddf.index.compute(), result.index)
 59 | 
 60 | 
 61 | def test_geohash_points(geoseries_points):
 62 |     geohash_dask(geoseries_points)
 63 | 
 64 | 
 65 | def test_geohash_lines(geoseries_lines):
 66 |     geohash_dask(geoseries_lines)
 67 | 
 68 | 
 69 | def test_geohash_polygons(geoseries_polygons):
 70 |     geohash_dask(geoseries_polygons)
 71 | 
 72 | 
 73 | def test_geohash_range(geoseries_points):
 74 | 
 75 |     ddf = from_geopandas(geoseries_points, npartitions=1)
 76 | 
 77 |     with pytest.raises(ValueError):
 78 |         ddf.geohash(precision=0, as_string=False)
 79 |         ddf.geohash(precision=12, as_string=False)
 80 | 
 81 | 
 82 | def test_world(naturalearth_lowres):
 83 |     # world without Fiji
 84 |     geohash_dask(geopandas.read_file(naturalearth_lowres).iloc[1:])
 85 | 
 86 | 
 87 | @pytest.mark.parametrize(
 88 |     "empty",
 89 |     [
 90 |         None,
 91 |         loads("POLYGON EMPTY"),
 92 |     ],
 93 | )
 94 | def test_empty(geoseries_polygons, empty):
 95 |     s = geoseries_polygons
 96 |     s.iloc[-1] = empty
 97 |     dask_obj = from_geopandas(s, npartitions=2)
 98 |     with pytest.raises(
 99 |         ValueError, match="cannot be computed on a GeoSeries with empty"
100 |     ):
101 |         dask_obj.geohash().compute()
102 | 


--------------------------------------------------------------------------------
/dask_geopandas/tests/test_hilbert_distance.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pandas as pd
  3 | 
  4 | import geopandas
  5 | from shapely.geometry import LineString, Point, Polygon
  6 | from shapely.wkt import loads
  7 | 
  8 | from dask_geopandas import from_geopandas
  9 | from dask_geopandas.hilbert_distance import (
 10 |     _continuous_to_discrete_coords,
 11 |     _hilbert_distance,
 12 | )
 13 | 
 14 | import pytest
 15 | from pandas.testing import assert_index_equal, assert_series_equal
 16 | 
 17 | 
 18 | def test_hilbert_distance():
 19 |     # test the actual Hilbert Code algorithm against some hardcoded values
 20 |     geoms = geopandas.GeoSeries.from_wkt(
 21 |         [
 22 |             "POINT (0 0)",
 23 |             "POINT (1 1)",
 24 |             "POINT (1 0)",
 25 |             "POLYGON ((0 0, 0 1, 1 1, 1 0, 0 0))",
 26 |         ]
 27 |     )
 28 |     result = _hilbert_distance(geoms, total_bounds=(0, 0, 1, 1), level=2)
 29 |     assert result.tolist() == [0, 10, 15, 2]
 30 | 
 31 |     result = _hilbert_distance(geoms, total_bounds=(0, 0, 1, 1), level=3)
 32 |     assert result.tolist() == [0, 42, 63, 10]
 33 | 
 34 |     result = _hilbert_distance(geoms, total_bounds=(0, 0, 1, 1), level=16)
 35 |     assert result.tolist() == [0, 2863311530, 4294967295, 715827882]
 36 | 
 37 | 
 38 | @pytest.fixture
 39 | def geoseries_points():
 40 |     p1 = Point(1, 2)
 41 |     p2 = Point(2, 3)
 42 |     p3 = Point(3, 4)
 43 |     p4 = Point(4, 1)
 44 |     return geopandas.GeoSeries([p1, p2, p3, p4])
 45 | 
 46 | 
 47 | @pytest.fixture
 48 | def geoseries_lines():
 49 |     l1 = LineString([(0, 0), (0, 1), (1, 1)])
 50 |     l2 = LineString([(0, 0), (1, 0), (1, 1), (0, 1)])
 51 |     return geopandas.GeoSeries([l1, l2] * 2)
 52 | 
 53 | 
 54 | @pytest.fixture()
 55 | def geoseries_polygons():
 56 |     t1 = Polygon([(0, 3.5), (7, 2.4), (1, 0.1)])
 57 |     t2 = Polygon([(0, 0), (1, 1), (0, 1)])
 58 |     sq1 = Polygon([(0, 0), (1, 0), (1, 1), (0, 1)])
 59 |     sq2 = Polygon([(0, 0), (1, 0), (1, 2), (0, 2)])
 60 |     return geopandas.GeoSeries([t1, t2, sq1, sq2])
 61 | 
 62 | 
 63 | def hilbert_distance_dask(geoseries, level=16):
 64 |     pytest.importorskip("hilbertcurve")
 65 |     from hilbertcurve.hilbertcurve import HilbertCurve
 66 | 
 67 |     bounds = geoseries.bounds.to_numpy()
 68 |     total_bounds = geoseries.total_bounds
 69 |     x, y = _continuous_to_discrete_coords(
 70 |         bounds, level=level, total_bounds=total_bounds
 71 |     )
 72 |     coords = np.stack((x, y), axis=1)
 73 | 
 74 |     hilbert_curve = HilbertCurve(p=level, n=2)
 75 |     expected = hilbert_curve.distances_from_points(coords)
 76 | 
 77 |     ddf = from_geopandas(geoseries, npartitions=1)
 78 |     result = ddf.hilbert_distance(level=level).compute()
 79 | 
 80 |     assert list(result) == expected
 81 |     assert isinstance(result, pd.Series)
 82 |     assert_index_equal(ddf.index.compute(), result.index)
 83 | 
 84 | 
 85 | @pytest.mark.parametrize("level", [2, 10, 15, 16])
 86 | def test_hilbert_distance_points(geoseries_points, level):
 87 |     hilbert_distance_dask(geoseries_points, level)
 88 | 
 89 | 
 90 | @pytest.mark.parametrize("level", [2, 10, 15, 16])
 91 | def test_hilbert_distance_lines(geoseries_lines, level):
 92 |     hilbert_distance_dask(geoseries_lines, level)
 93 | 
 94 | 
 95 | @pytest.mark.parametrize("level", [2, 10, 15, 16])
 96 | def test_hilbert_distance_polygons(geoseries_polygons, level):
 97 |     hilbert_distance_dask(geoseries_polygons, level)
 98 | 
 99 | 
100 | def test_hilbert_distance_level(geoseries_points):
101 |     ddf = from_geopandas(geoseries_points, npartitions=1)
102 |     with pytest.raises(ValueError):
103 |         ddf.hilbert_distance(level=20).compute()
104 | 
105 | 
106 | def test_specified_total_bounds(geoseries_polygons):
107 |     ddf = from_geopandas(geoseries_polygons, npartitions=2)
108 | 
109 |     result = ddf.hilbert_distance(total_bounds=geoseries_polygons.total_bounds)
110 |     expected = ddf.hilbert_distance()
111 |     assert_series_equal(result.compute(), expected.compute())
112 | 
113 | 
114 | def test_total_bounds_from_partitions(geoseries_polygons):
115 |     ddf = from_geopandas(geoseries_polygons, npartitions=2)
116 |     expected = ddf.hilbert_distance().compute()
117 | 
118 |     ddf.calculate_spatial_partitions()
119 |     result = ddf.hilbert_distance().compute()
120 |     assert_series_equal(result, expected)
121 | 
122 | 
123 | def test_world(naturalearth_lowres):
124 |     # world without Fiji
125 |     hilbert_distance_dask(geopandas.read_file(naturalearth_lowres).iloc[1:])
126 | 
127 | 
128 | @pytest.mark.parametrize(
129 |     "empty",
130 |     [
131 |         None,
132 |         loads("POLYGON EMPTY"),
133 |     ],
134 | )
135 | def test_empty(geoseries_polygons, empty):
136 |     s = geoseries_polygons
137 |     s.iloc[-1] = empty
138 |     dask_obj = from_geopandas(s, npartitions=2)
139 |     with pytest.raises(
140 |         ValueError, match="cannot be computed on a GeoSeries with empty"
141 |     ):
142 |         dask_obj.hilbert_distance().compute()
143 | 


--------------------------------------------------------------------------------
/dask_geopandas/tests/test_morton_distance.py:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | 
  3 | import geopandas
  4 | from shapely.geometry import LineString, Point, Polygon
  5 | from shapely.wkt import loads
  6 | 
  7 | from dask_geopandas import from_geopandas
  8 | from dask_geopandas.hilbert_distance import _continuous_to_discrete_coords
  9 | 
 10 | import pytest
 11 | from pandas.testing import assert_index_equal, assert_series_equal
 12 | 
 13 | 
 14 | @pytest.fixture
 15 | def geoseries_points():
 16 |     p1 = Point(1, 2)
 17 |     p2 = Point(2, 3)
 18 |     p3 = Point(3, 4)
 19 |     p4 = Point(4, 1)
 20 |     return geopandas.GeoSeries([p1, p2, p3, p4])
 21 | 
 22 | 
 23 | @pytest.fixture
 24 | def geoseries_lines():
 25 |     l1 = LineString([(0, 0), (0, 1), (1, 1)])
 26 |     l2 = LineString([(0, 0), (1, 0), (1, 1), (0, 1)])
 27 |     return geopandas.GeoSeries([l1, l2] * 2)
 28 | 
 29 | 
 30 | @pytest.fixture()
 31 | def geoseries_polygons():
 32 |     t1 = Polygon([(0, 3.5), (7, 2.4), (1, 0.1)])
 33 |     t2 = Polygon([(0, 0), (1, 1), (0, 1)])
 34 |     sq1 = Polygon([(0, 0), (1, 0), (1, 1), (0, 1)])
 35 |     sq2 = Polygon([(0, 0), (1, 0), (1, 2), (0, 2)])
 36 |     return geopandas.GeoSeries([t1, t2, sq1, sq2])
 37 | 
 38 | 
 39 | def morton_distance_dask(geoseries):
 40 |     # https://github.com/trevorprater/pymorton
 41 |     pymorton = pytest.importorskip("pymorton")
 42 | 
 43 |     bounds = geoseries.bounds.to_numpy()
 44 |     total_bounds = geoseries.total_bounds
 45 |     x_coords, y_coords = _continuous_to_discrete_coords(
 46 |         bounds, level=16, total_bounds=total_bounds
 47 |     )
 48 | 
 49 |     ddf = from_geopandas(geoseries, npartitions=1)
 50 |     result = ddf.morton_distance().compute()
 51 | 
 52 |     expected = []
 53 | 
 54 |     for i in range(len(x_coords)):
 55 |         x = int(x_coords[i])
 56 |         y = int(y_coords[i])
 57 |         expected.append(pymorton.interleave(x, y))
 58 | 
 59 |     assert list(result) == expected
 60 |     assert isinstance(result, pd.Series)
 61 |     assert_index_equal(ddf.index.compute(), result.index)
 62 | 
 63 | 
 64 | def test_morton_distance_points(geoseries_points):
 65 |     morton_distance_dask(geoseries_points)
 66 | 
 67 | 
 68 | def test_morton_distance_lines(geoseries_lines):
 69 |     morton_distance_dask(geoseries_lines)
 70 | 
 71 | 
 72 | def test_morton_distance_polygons(geoseries_polygons):
 73 |     morton_distance_dask(geoseries_polygons)
 74 | 
 75 | 
 76 | def test_specified_total_bounds(geoseries_polygons):
 77 |     ddf = from_geopandas(geoseries_polygons, npartitions=2)
 78 | 
 79 |     result = ddf.morton_distance(total_bounds=geoseries_polygons.total_bounds)
 80 |     expected = ddf.morton_distance()
 81 |     assert_series_equal(result.compute(), expected.compute())
 82 | 
 83 | 
 84 | def test_total_bounds_from_partitions(geoseries_polygons):
 85 |     ddf = from_geopandas(geoseries_polygons, npartitions=2)
 86 |     expected = ddf.morton_distance().compute()
 87 | 
 88 |     ddf.calculate_spatial_partitions()
 89 |     result = ddf.morton_distance().compute()
 90 |     assert_series_equal(result, expected)
 91 | 
 92 | 
 93 | def test_world(naturalearth_lowres):
 94 |     # world without Fiji
 95 |     morton_distance_dask(geopandas.read_file(naturalearth_lowres).iloc[1:])
 96 | 
 97 | 
 98 | @pytest.mark.parametrize(
 99 |     "empty",
100 |     [
101 |         None,
102 |         loads("POLYGON EMPTY"),
103 |     ],
104 | )
105 | def test_empty(geoseries_polygons, empty):
106 |     s = geoseries_polygons
107 |     s.iloc[-1] = empty
108 |     dask_obj = from_geopandas(s, npartitions=2)
109 |     with pytest.raises(
110 |         ValueError, match="cannot be computed on a GeoSeries with empty"
111 |     ):
112 |         dask_obj.morton_distance().compute()
113 | 


--------------------------------------------------------------------------------
/dask_geopandas/tests/test_sjoin.py:
--------------------------------------------------------------------------------
 1 | import geopandas
 2 | import shapely
 3 | 
 4 | import dask_geopandas
 5 | 
 6 | import pytest
 7 | from geopandas.testing import assert_geodataframe_equal
 8 | 
 9 | 
10 | def test_sjoin_dask_geopandas(naturalearth_lowres, naturalearth_cities):
11 |     df_points = geopandas.read_file(naturalearth_cities)
12 |     ddf_points = dask_geopandas.from_geopandas(df_points, npartitions=4)
13 | 
14 |     df_polygons = geopandas.read_file(naturalearth_lowres)
15 |     ddf_polygons = dask_geopandas.from_geopandas(df_polygons, npartitions=4)
16 | 
17 |     expected = geopandas.sjoin(df_points, df_polygons, predicate="within", how="inner")
18 |     expected = expected.sort_index()
19 | 
20 |     # dask / geopandas
21 |     result = dask_geopandas.sjoin(
22 |         ddf_points, df_polygons, predicate="within", how="inner"
23 |     )
24 |     assert_geodataframe_equal(expected, result.compute().sort_index())
25 | 
26 |     # geopandas / dask
27 |     result = dask_geopandas.sjoin(
28 |         df_points, ddf_polygons, predicate="within", how="inner"
29 |     )
30 |     assert_geodataframe_equal(expected, result.compute().sort_index())
31 | 
32 |     # dask / dask
33 |     result = dask_geopandas.sjoin(
34 |         ddf_points, ddf_polygons, predicate="within", how="inner"
35 |     )
36 |     assert_geodataframe_equal(expected, result.compute().sort_index())
37 | 
38 |     # with spatial_partitions
39 |     ddf_points.calculate_spatial_partitions()
40 |     ddf_polygons.calculate_spatial_partitions()
41 |     result = dask_geopandas.sjoin(
42 |         ddf_points, ddf_polygons, predicate="within", how="inner"
43 |     )
44 |     assert isinstance(result.spatial_partitions, geopandas.GeoSeries)
45 |     assert_geodataframe_equal(expected, result.compute().sort_index())
46 | 
47 |     # check warning
48 |     with pytest.warns(FutureWarning, match="The `op` parameter is deprecated"):
49 |         dask_geopandas.sjoin(df_points, ddf_polygons, op="within", how="inner")
50 | 
51 | 
52 | def test_no_value_error():
53 |     # https://github.com/geopandas/dask-geopandas/issues/303
54 |     shape = shapely.geometry.box(-74.5, -74.0, 4.5, 5.0)
55 |     df = dask_geopandas.from_geopandas(
56 |         geopandas.GeoDataFrame(geometry=[shape]), npartitions=1
57 |     ).spatial_shuffle()
58 |     # no TypeError
59 |     df.sjoin(df).compute()
60 | 


--------------------------------------------------------------------------------
/dask_geopandas/tests/test_spatial_partitioning.py:
--------------------------------------------------------------------------------
 1 | import geopandas
 2 | 
 3 | import dask_geopandas
 4 | 
 5 | import pytest
 6 | from geopandas.testing import assert_geodataframe_equal, assert_geoseries_equal
 7 | 
 8 | 
 9 | def test_propagate_on_geometry_access(naturalearth_lowres):
10 |     # ensure the spatial_partitioning information is preserved in GeoSeries
11 |     df = geopandas.read_file(naturalearth_lowres)
12 |     ddf = dask_geopandas.from_geopandas(df, npartitions=4)
13 |     ddf.calculate_spatial_partitions()
14 |     spatial_partitions = ddf.spatial_partitions.copy()
15 | 
16 |     # geometry attribute
17 |     gs = ddf.geometry
18 |     assert gs.spatial_partitions is not None
19 |     assert_geoseries_equal(gs.spatial_partitions, spatial_partitions)
20 | 
21 |     # column access
22 |     gs = ddf["geometry"]
23 |     assert gs.spatial_partitions is not None
24 |     assert_geoseries_equal(gs.spatial_partitions, spatial_partitions)
25 | 
26 |     # subset geodataframe
27 |     subset = ddf[["continent", "geometry"]]
28 |     assert subset.spatial_partitions is not None
29 |     assert_geoseries_equal(subset.spatial_partitions, spatial_partitions)
30 | 
31 | 
32 | @pytest.mark.parametrize(
33 |     "attr", ["boundary", "centroid", "convex_hull", "envelope", "exterior"]
34 | )
35 | def test_propagate_geoseries_properties(naturalearth_lowres, attr):
36 |     df = geopandas.read_file(naturalearth_lowres)
37 |     ddf = dask_geopandas.from_geopandas(df, npartitions=4)
38 |     ddf.calculate_spatial_partitions()
39 |     spatial_partitions = ddf.spatial_partitions.copy()
40 | 
41 |     result = getattr(ddf, attr)
42 |     assert result.spatial_partitions is not None
43 |     assert_geoseries_equal(result.spatial_partitions, spatial_partitions)
44 |     assert_geoseries_equal(result.compute(), getattr(df, attr))
45 | 
46 | 
47 | def test_cx(naturalearth_lowres):
48 |     # test cx using spatial partitions
49 |     df = geopandas.read_file(naturalearth_lowres)
50 |     ddf = dask_geopandas.from_geopandas(df, npartitions=4)
51 |     ddf.calculate_spatial_partitions()
52 | 
53 |     subset = ddf.cx[-180:-70, 0:-80]
54 |     assert len(subset) == 8
55 |     expected = df.cx[-180:-70, 0:-80]
56 |     assert_geodataframe_equal(subset.compute(), expected)
57 | 
58 |     # empty slice
59 |     subset = ddf.cx[-200:-190, 300:400]
60 |     assert len(subset) == 0
61 |     expected = df.cx[-200:-190, 300:400]
62 |     assert_geodataframe_equal(subset.compute(), expected)
63 | 


--------------------------------------------------------------------------------
/doc/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?=
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SOURCEDIR     = source
 9 | BUILDDIR      = build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 


--------------------------------------------------------------------------------
/doc/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=source
11 | set BUILDDIR=build
12 | 
13 | if "%1" == "" goto help
14 | 
15 | %SPHINXBUILD% >NUL 2>NUL
16 | if errorlevel 9009 (
17 | 	echo.
18 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
19 | 	echo.installed, then set the SPHINXBUILD environment variable to point
20 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
21 | 	echo.may add the Sphinx directory to PATH.
22 | 	echo.
23 | 	echo.If you don't have Sphinx installed, grab it from
24 | 	echo.http://sphinx-doc.org/
25 | 	exit /b 1
26 | )
27 | 
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 | 
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 | 
34 | :end
35 | popd
36 | 


--------------------------------------------------------------------------------
/doc/requirements.txt:
--------------------------------------------------------------------------------
1 | geopandas>=0.10
2 | numpydoc==1.1.0
3 | sphinx-book-theme
4 | myst-nb
5 | myst-parser
6 | sphinx_copybutton
7 | sphinx
8 | matplotlib
9 | 


--------------------------------------------------------------------------------
/doc/source/_static/binary_geo-difference.svg:
--------------------------------------------------------------------------------
1 | <svg xmlns="http://www.w3.org/2000/svg" width="105.44mm" height="35.61mm" viewBox="0 0 298.88 100.95"><line x1="120.15" y1="48.9" x2="162.49" y2="48.9" style="fill:none;stroke:#1d1d1b;stroke-miterlimit:10"/><polygon points="161.03 53.89 169.67 48.9 161.03 43.91 161.03 53.89" style="fill:#1d1d1b"/><circle cx="45.9" cy="48.9" r="34.98" style="fill:#e21c84;stroke:#fff;stroke-miterlimit:10;opacity:0.4"/><circle cx="80.88" cy="48.9" r="34.98" style="fill:#0f9c5a;stroke:#fff;stroke-miterlimit:10;opacity:0.6"/><g style="opacity:0.8"><circle cx="214.68" cy="48.9" r="34.98" style="fill:none;stroke:#fec905;stroke-miterlimit:10;stroke-dasharray:1.9972946643829346,1.9972946643829346"/></g><g style="opacity:0.8"><circle cx="249.65" cy="48.9" r="34.98" style="fill:none;stroke:#fec905;stroke-miterlimit:10;stroke-dasharray:1.9972946643829346,1.9972946643829346"/></g><path d="M214.68,48.9a35,35,0,0,1,17.48-30.29,35,35,0,1,0,0,60.57A34.93,34.93,0,0,1,214.68,48.9Z" style="fill:#fec905;opacity:0.8"/></svg>


--------------------------------------------------------------------------------
/doc/source/_static/binary_geo-intersection.svg:
--------------------------------------------------------------------------------
1 | <svg xmlns="http://www.w3.org/2000/svg" width="105.44mm" height="35.61mm" viewBox="0 0 298.88 100.95"><g style="opacity:0.8"><circle cx="214.68" cy="51.28" r="34.98" transform="matrix(0.94, -0.33, 0.33, 0.94, -4.87, 73.95)" style="fill:none;stroke:#fec905;stroke-miterlimit:10;stroke-dasharray:1.9972946643829346,1.9972946643829346"/></g><g style="opacity:0.8"><circle cx="249.65" cy="51.28" r="34.98" transform="translate(74.78 236.69) rotate(-58.28)" style="fill:none;stroke:#fec905;stroke-miterlimit:10;stroke-dasharray:1.9972946643829346,1.9972946643829346"/></g><line x1="120.15" y1="51.28" x2="162.49" y2="51.28" style="fill:none;stroke:#1d1d1b;stroke-miterlimit:10"/><polygon points="161.03 56.27 169.67 51.28 161.03 46.3 161.03 56.27" style="fill:#1d1d1b"/><circle cx="45.9" cy="51.28" r="34.98" style="fill:#e21c84;stroke:#fff;stroke-miterlimit:10;opacity:0.4"/><circle cx="80.88" cy="51.28" r="34.98" style="fill:#0f9c5a;stroke:#fff;stroke-miterlimit:10;opacity:0.6"/><ellipse cx="232.16" cy="51.28" rx="17.49" ry="30.29" style="fill:#fec905;opacity:0.8"/></svg>


--------------------------------------------------------------------------------
/doc/source/_static/binary_geo-symm_diff.svg:
--------------------------------------------------------------------------------
1 | <svg xmlns="http://www.w3.org/2000/svg" width="105.44mm" height="35.61mm" viewBox="0 0 298.88 100.95"><g style="opacity:0.8"><circle cx="214.68" cy="51.28" r="34.98" transform="matrix(0.94, -0.33, 0.33, 0.94, -4.87, 73.95)" style="fill:none;stroke:#fec905;stroke-miterlimit:10;stroke-dasharray:1.9972946643829346,1.9972946643829346"/></g><g style="opacity:0.8"><circle cx="249.65" cy="51.28" r="34.98" transform="translate(74.78 236.69) rotate(-58.28)" style="fill:none;stroke:#fec905;stroke-miterlimit:10;stroke-dasharray:1.9972946643829346,1.9972946643829346"/></g><line x1="120.15" y1="51.28" x2="162.49" y2="51.28" style="fill:none;stroke:#1d1d1b;stroke-miterlimit:10"/><polygon points="161.03 56.27 169.67 51.28 161.03 46.3 161.03 56.27" style="fill:#1d1d1b"/><circle cx="45.9" cy="51.28" r="34.98" style="fill:#e21c84;stroke:#fff;stroke-miterlimit:10;opacity:0.4"/><circle cx="80.88" cy="51.28" r="34.98" style="fill:#0f9c5a;stroke:#fff;stroke-miterlimit:10;opacity:0.6"/><g style="opacity:0.8"><path d="M214.68,51.28A35,35,0,0,1,232.16,21a35,35,0,1,0,0,60.57A35,35,0,0,1,214.68,51.28Z" style="fill:#fec905"/><path d="M249.65,16.31A34.81,34.81,0,0,0,232.16,21a35,35,0,0,1,0,60.57,35,35,0,1,0,17.49-65.26Z" style="fill:#fec905"/></g></svg>


--------------------------------------------------------------------------------
/doc/source/_static/binary_geo-union.svg:
--------------------------------------------------------------------------------
1 | <svg xmlns="http://www.w3.org/2000/svg" width="105.44mm" height="35.61mm" viewBox="0 0 298.88 100.95"><g style="opacity:0.8"><circle cx="214.68" cy="50.49" r="34.98" transform="matrix(0.94, -0.33, 0.33, 0.94, -4.61, 73.91)" style="fill:none;stroke:#fec905;stroke-miterlimit:10;stroke-dasharray:1.9972946643829346,1.9972946643829346"/></g><g style="opacity:0.8"><circle cx="249.65" cy="50.49" r="34.98" transform="translate(75.45 236.31) rotate(-58.28)" style="fill:none;stroke:#fec905;stroke-miterlimit:10;stroke-dasharray:1.9972946643829346,1.9972946643829346"/></g><line x1="120.15" y1="50.49" x2="162.49" y2="50.49" style="fill:none;stroke:#1d1d1b;stroke-miterlimit:10"/><polygon points="161.03 55.48 169.67 50.49 161.03 45.5 161.03 55.48" style="fill:#1d1d1b"/><circle cx="45.9" cy="50.49" r="34.98" style="fill:#e21c84;stroke:#fff;stroke-miterlimit:10;opacity:0.4"/><circle cx="80.88" cy="50.49" r="34.98" style="fill:#0f9c5a;stroke:#fff;stroke-miterlimit:10;opacity:0.6"/><path d="M249.65,15.51a34.91,34.91,0,0,0-17.49,4.69,35,35,0,1,0,0,60.57,35,35,0,1,0,17.49-65.26Z" style="fill:#fec905;opacity:0.8"/></svg>


--------------------------------------------------------------------------------
/doc/source/_static/binary_op-01.svg:
--------------------------------------------------------------------------------
1 | <svg xmlns="http://www.w3.org/2000/svg" width="115.16mm" height="58.15mm" viewBox="0 0 326.44 164.84"><rect x="5.24" y="27.78" width="38.2" height="19.1" style="fill:#0f9c5a;stroke:#fff;stroke-miterlimit:10;opacity:0.2"/><rect x="5.24" y="46.88" width="38.2" height="19.1" style="fill:#0f9c5a;stroke:#fff;stroke-miterlimit:10;opacity:0.4"/><rect x="5.24" y="65.98" width="38.2" height="19.1" style="fill:#0f9c5a;stroke:#fff;stroke-miterlimit:10;opacity:0.6"/><rect x="5.24" y="85.08" width="38.2" height="19.1" style="fill:#0f9c5a;stroke:#fff;stroke-miterlimit:10;opacity:0.8"/><rect x="5.24" y="104.19" width="38.2" height="19.1" style="fill:#0f9c5a;stroke:#fff;stroke-miterlimit:10"/><rect x="48.04" y="27.78" width="83.41" height="19.1" style="fill:#e21c84;stroke:#fff;stroke-miterlimit:10;opacity:0.2"/><rect x="48.04" y="8.68" width="83.41" height="19.1" style="fill:#e21c84;stroke:#fff;stroke-miterlimit:10;opacity:0.4"/><rect x="48.04" y="46.88" width="83.41" height="19.1" style="fill:#e21c84;stroke:#fff;stroke-miterlimit:10;opacity:0.2"/><rect x="48.04" y="65.98" width="83.41" height="19.1" style="fill:#e21c84;stroke:#fff;stroke-miterlimit:10;opacity:0.2"/><rect x="48.04" y="85.08" width="83.41" height="19.1" style="fill:#e21c84;stroke:#fff;stroke-miterlimit:10;opacity:0.2"/><rect x="48.04" y="104.19" width="83.41" height="19.1" style="fill:#e21c84;stroke:#fff;stroke-miterlimit:10;opacity:0.2"/><rect x="194.49" y="27.78" width="38.2" height="19.1" style="fill:#0f9c5a;stroke:#fff;stroke-miterlimit:10;opacity:0.2"/><rect x="194.49" y="46.88" width="38.2" height="19.1" style="fill:#0f9c5a;stroke:#fff;stroke-miterlimit:10;opacity:0.4"/><rect x="194.49" y="65.98" width="38.2" height="19.1" style="fill:#0f9c5a;stroke:#fff;stroke-miterlimit:10;opacity:0.6"/><rect x="194.49" y="85.08" width="38.2" height="19.1" style="fill:#0f9c5a;stroke:#fff;stroke-miterlimit:10;opacity:0.8"/><rect x="194.49" y="104.19" width="38.2" height="19.1" style="fill:#0f9c5a;stroke:#fff;stroke-miterlimit:10"/><rect x="237.29" y="27.78" width="83.41" height="19.1" style="fill:#e21c84;stroke:#fff;stroke-miterlimit:10;opacity:0.2"/><rect x="237.29" y="8.68" width="83.41" height="19.1" style="fill:#e21c84;stroke:#fff;stroke-miterlimit:10;opacity:0.4"/><rect x="237.29" y="46.88" width="83.41" height="19.1" style="fill:#e21c84;stroke:#fff;stroke-miterlimit:10;opacity:0.2"/><rect x="237.29" y="65.98" width="83.41" height="19.1" style="fill:#e21c84;stroke:#fff;stroke-miterlimit:10;opacity:0.2"/><rect x="237.29" y="85.08" width="83.41" height="19.1" style="fill:#e21c84;stroke:#fff;stroke-miterlimit:10;opacity:0.2"/><rect x="237.29" y="104.19" width="83.41" height="19.1" style="fill:#e21c84;stroke:#fff;stroke-miterlimit:10;opacity:0.2"/><line x1="138.48" y1="37.33" x2="180.83" y2="37.33" style="fill:none;stroke:#1d1d1b;stroke-miterlimit:10"/><polygon points="179.37 42.32 188 37.33 179.37 32.35 179.37 42.32" style="fill:#1d1d1b"/><line x1="138.48" y1="56.43" x2="180.83" y2="56.43" style="fill:none;stroke:#1d1d1b;stroke-miterlimit:10"/><polygon points="179.37 61.42 188 56.43 179.37 51.45 179.37 61.42" style="fill:#1d1d1b"/><line x1="138.48" y1="75.53" x2="180.83" y2="75.53" style="fill:none;stroke:#1d1d1b;stroke-miterlimit:10"/><polygon points="179.37 80.52 188 75.53 179.37 70.55 179.37 80.52" style="fill:#1d1d1b"/><line x1="138.48" y1="94.64" x2="180.83" y2="94.64" style="fill:none;stroke:#1d1d1b;stroke-miterlimit:10"/><polygon points="179.37 99.62 188 94.64 179.37 89.65 179.37 99.62" style="fill:#1d1d1b"/><line x1="138.48" y1="113.74" x2="180.83" y2="113.74" style="fill:none;stroke:#1d1d1b;stroke-miterlimit:10"/><polygon points="179.37 118.72 188 113.74 179.37 108.75 179.37 118.72" style="fill:#1d1d1b"/><rect x="3.19" y="25.4" width="42.25" height="99.95" style="fill:none;stroke:#1d1d1b;stroke-miterlimit:10;stroke-width:0.5px"/><path d="M7.38,143.15H9.76v-4.21H7.68v-1.19H11v5.4H13v1.2H7.38Zm1.9-7.21a.73.73,0,0,1,.22-.56.84.84,0,0,1,.59-.21h.29a.84.84,0,0,1,.6.21.77.77,0,0,1,.22.56.75.75,0,0,1-.22.55.85.85,0,0,1-.61.2h-.28a.83.83,0,0,1-.59-.2A.71.71,0,0,1,9.28,135.94Z" style="fill:#1d1d1b"/><path d="M14.46,137.75h1.25v1.14h.11a1.48,1.48,0,0,1,.59-.94,2,2,0,0,1,1.17-.32,2.4,2.4,0,0,1,.92.17,1.89,1.89,0,0,1,.68.49,2.13,2.13,0,0,1,.43.75,3,3,0,0,1,.14,1v4.34H18.43v-4.16a1.5,1.5,0,0,0-.34-1.06,1.27,1.27,0,0,0-1-.37,1.28,1.28,0,0,0-1,.39,1.57,1.57,0,0,0-.36,1.08v4.12H14.46Z" style="fill:#1d1d1b"/><path d="M21.54,140.21a3.49,3.49,0,0,1,.16-1.07,2.29,2.29,0,0,1,.46-.81,1.85,1.85,0,0,1,.71-.52,2.34,2.34,0,0,1,.93-.18A2,2,0,0,1,25,138a1.47,1.47,0,0,1,.61.93h.11l0-.41c0-.12,0-.24,0-.38s0-.25,0-.35v-2H27v8.64H25.71v-1.14H25.6a1.47,1.47,0,0,1-.61.93,2,2,0,0,1-1.19.33,2.34,2.34,0,0,1-.93-.18,1.85,1.85,0,0,1-.71-.52,2.25,2.25,0,0,1-.46-.82,3.6,3.6,0,0,1-.16-1.08Zm1.32,0v1.62a1.49,1.49,0,0,0,.38,1.07,1.35,1.35,0,0,0,1,.4,1.28,1.28,0,0,0,1-.4,1.45,1.45,0,0,0,.37-1v-1.64a1.47,1.47,0,0,0-.37-1.06,1.3,1.3,0,0,0-1-.4,1.38,1.38,0,0,0-1,.39A1.49,1.49,0,0,0,22.86,140.23Z" style="fill:#1d1d1b"/><path d="M28.74,140.19a3,3,0,0,1,.19-1.07,2.27,2.27,0,0,1,.56-.8,2.5,2.5,0,0,1,.87-.51,3.51,3.51,0,0,1,1.15-.18,3.43,3.43,0,0,1,1.15.18,2.5,2.5,0,0,1,.87.51,2.24,2.24,0,0,1,.55.8,2.8,2.8,0,0,1,.19,1.07v1.21H30.05v.47a1.49,1.49,0,0,0,.39,1.1,1.46,1.46,0,0,0,1.07.4,2.34,2.34,0,0,0,.88-.16,1,1,0,0,0,.54-.47h1.3a2.29,2.29,0,0,1-.35.72,2.23,2.23,0,0,1-.6.54,2.77,2.77,0,0,1-.8.35,3.74,3.74,0,0,1-1,.12,3.41,3.41,0,0,1-1.14-.18,2.38,2.38,0,0,1-.87-.52,2.22,2.22,0,0,1-.56-.81,2.74,2.74,0,0,1-.2-1.06Zm1.31.25H33v-.25a1.35,1.35,0,0,0-1.47-1.46,1.34,1.34,0,0,0-1.46,1.46Z" style="fill:#1d1d1b"/><path d="M38,141l-2.23-3.2h1.53l1.23,1.86a1.05,1.05,0,0,1,.15.3.43.43,0,0,1,0,.13h.06a.43.43,0,0,1,0-.13,1,1,0,0,1,.15-.3l1.24-1.86H41.7l-2.22,3.19,2.37,3.41H40.31l-1.33-2a.59.59,0,0,1-.1-.17l-.07-.14-.06-.14h-.07s0,.1-.06.15l-.06.14a1.4,1.4,0,0,1-.1.16l-1.35,2H35.58Z" style="fill:#1d1d1b"/><rect x="46.28" y="6.23" width="86.81" height="119.12" style="fill:none;stroke:#1d1d1b;stroke-miterlimit:10;stroke-width:0.5px"/><path d="M135.05,144.47a3.23,3.23,0,0,1-1.15-.19,2.52,2.52,0,0,1-.87-.52,2.36,2.36,0,0,1-.55-.83,2.6,2.6,0,0,1-.2-1.06v-1.65a2.65,2.65,0,0,1,.2-1.07,2.32,2.32,0,0,1,.55-.82,2.54,2.54,0,0,1,.87-.53,3.76,3.76,0,0,1,2.3,0,2.38,2.38,0,0,1,.87.52,2.32,2.32,0,0,1,.55.82,2.59,2.59,0,0,1,.2,1.07v1.66a2.83,2.83,0,0,1-.19,1.07,2.24,2.24,0,0,1-.55.82,2.34,2.34,0,0,1-.88.52A3.23,3.23,0,0,1,135.05,144.47Zm-1.45-2.6a1.5,1.5,0,0,0,2.51,1,1.4,1.4,0,0,0,.39-1v-1.65a1.39,1.39,0,0,0-.39-1.05,1.64,1.64,0,0,0-2.12,0,1.39,1.39,0,0,0-.39,1.05Z" style="fill:#1d1d1b"/><path d="M139.61,137.75h1.26v1.14h.1a1.46,1.46,0,0,1,.6-.93,2.07,2.07,0,0,1,1.2-.33,2.32,2.32,0,0,1,.92.18,1.85,1.85,0,0,1,.71.52,2.29,2.29,0,0,1,.46.81,3.21,3.21,0,0,1,.16,1.07v1.66a3.31,3.31,0,0,1-.16,1.08,2.25,2.25,0,0,1-.46.82,1.85,1.85,0,0,1-.71.52,2.32,2.32,0,0,1-.92.18,2.07,2.07,0,0,1-1.2-.33,1.46,1.46,0,0,1-.6-.93h-.1a1,1,0,0,0,0,.13,2.33,2.33,0,0,1,0,.29c0,.11,0,.23,0,.36s0,.25,0,.36v2h-1.32Zm1.32,2.48v1.64a1.45,1.45,0,0,0,.37,1,1.25,1.25,0,0,0,1,.4,1.33,1.33,0,0,0,1-.4,1.45,1.45,0,0,0,.38-1.07v-1.62a1.45,1.45,0,0,0-.38-1.07,1.36,1.36,0,0,0-1-.39,1.28,1.28,0,0,0-1,.4A1.47,1.47,0,0,0,140.93,140.23Z" style="fill:#1d1d1b"/><path d="M146.68,140.19a2.8,2.8,0,0,1,.19-1.07,2.24,2.24,0,0,1,.55-.8,2.46,2.46,0,0,1,.88-.51,3.43,3.43,0,0,1,1.15-.18,3.47,3.47,0,0,1,1.15.18,2.6,2.6,0,0,1,.87.51,2.24,2.24,0,0,1,.55.8,3,3,0,0,1,.19,1.07v1.21H148v.47a1.46,1.46,0,0,0,.4,1.1,1.44,1.44,0,0,0,1.07.4,2.29,2.29,0,0,0,.87-.16,1,1,0,0,0,.54-.47h1.3a2.1,2.1,0,0,1-.35.72,2.35,2.35,0,0,1-.59.54,2.9,2.9,0,0,1-.8.35,3.85,3.85,0,0,1-1,.12,3.43,3.43,0,0,1-1.15-.18,2.38,2.38,0,0,1-.87-.52,2.2,2.2,0,0,1-.55-.81,2.57,2.57,0,0,1-.2-1.06Zm1.3.25h2.93v-.25a1.52,1.52,0,0,0-2.54-1.07,1.42,1.42,0,0,0-.39,1.07Z" style="fill:#1d1d1b"/><path d="M155.52,137.75v1.14h.11a1.45,1.45,0,0,1,.62-.93,2.11,2.11,0,0,1,1.22-.33,2.15,2.15,0,0,1,1.65.65,2.61,2.61,0,0,1,.6,1.82v.43h-1.35v-.3a1.5,1.5,0,0,0-.38-1.08,1.32,1.32,0,0,0-1-.39,1.3,1.3,0,0,0-1,.39,1.53,1.53,0,0,0-.38,1.08v4.12h-1.32v-6.6Z" style="fill:#1d1d1b"/><path d="M160.92,142.47a1.9,1.9,0,0,1,.67-1.53,2.71,2.71,0,0,1,1.8-.56h1.79v-.55a1,1,0,0,0-.36-.83,1.67,1.67,0,0,0-1-.29,1.82,1.82,0,0,0-.88.19.9.9,0,0,0-.48.52h-1.29a1.94,1.94,0,0,1,.87-1.31,3.17,3.17,0,0,1,1.79-.48,3.1,3.1,0,0,1,2,.58,1.91,1.91,0,0,1,.72,1.58v4.56h-1.22v-1.28h-.1a1.56,1.56,0,0,1-.7,1,2.51,2.51,0,0,1-1.41.38,2.18,2.18,0,0,1-1.57-.55A1.92,1.92,0,0,1,160.92,142.47Zm1.32-.11a1,1,0,0,0,.33.78,1.37,1.37,0,0,0,.91.27,2.43,2.43,0,0,0,.68-.09,2,2,0,0,0,.54-.27,1.4,1.4,0,0,0,.36-.41,1.13,1.13,0,0,0,.12-.51v-.83h-1.77a1.24,1.24,0,0,0-.86.28A1,1,0,0,0,162.24,142.36Z" style="fill:#1d1d1b"/><path d="M168.07,137.75h1.86v-2h1.32v2h2.53v1.19h-2.53v3.51a.69.69,0,0,0,.19.51.73.73,0,0,0,.54.19h1.68v1.2h-1.74a2,2,0,0,1-1.45-.52,1.81,1.81,0,0,1-.54-1.38v-3.51h-1.86Z" style="fill:#1d1d1b"/><path d="M175.72,143.15h2.37v-4.21H176v-1.19h3.36v5.4h1.93v1.2h-5.59Zm1.89-7.21a.77.77,0,0,1,.22-.56.84.84,0,0,1,.6-.21h.29a.84.84,0,0,1,.59.21.73.73,0,0,1,.22.56.71.71,0,0,1-.22.55.85.85,0,0,1-.61.2h-.27a.84.84,0,0,1-.6-.2A.75.75,0,0,1,177.61,135.94Z" style="fill:#1d1d1b"/><path d="M185.45,144.47a3.19,3.19,0,0,1-1.15-.19,2.52,2.52,0,0,1-.87-.52,2.36,2.36,0,0,1-.55-.83,2.6,2.6,0,0,1-.2-1.06v-1.65a2.65,2.65,0,0,1,.2-1.07,2.32,2.32,0,0,1,.55-.82,2.54,2.54,0,0,1,.87-.53,3.43,3.43,0,0,1,1.15-.18,3.47,3.47,0,0,1,1.15.18,2.46,2.46,0,0,1,.87.52,2.32,2.32,0,0,1,.55.82,2.76,2.76,0,0,1,.2,1.07v1.66a2.83,2.83,0,0,1-.19,1.07,2.24,2.24,0,0,1-.55.82,2.42,2.42,0,0,1-.88.52A3.23,3.23,0,0,1,185.45,144.47Zm-1.45-2.6a1.4,1.4,0,0,0,.38,1,1.65,1.65,0,0,0,2.13,0,1.4,1.4,0,0,0,.39-1v-1.65a1.39,1.39,0,0,0-.39-1.05,1.64,1.64,0,0,0-2.12,0,1.39,1.39,0,0,0-.39,1.05Z" style="fill:#1d1d1b"/><path d="M190,137.75h1.24v1.14h.11a1.52,1.52,0,0,1,.59-.94,2,2,0,0,1,1.18-.32,2.34,2.34,0,0,1,.91.17,2,2,0,0,1,.69.49,2,2,0,0,1,.42.75,3,3,0,0,1,.15,1v4.34H194v-4.16a1.5,1.5,0,0,0-.35-1.06,1.24,1.24,0,0,0-1-.37,1.26,1.26,0,0,0-1,.39,1.57,1.57,0,0,0-.36,1.08v4.12H190Z" style="fill:#1d1d1b"/><path d="M61.67,140.17a3.51,3.51,0,0,1,.15-1,2.33,2.33,0,0,1,.46-.8,2.05,2.05,0,0,1,.72-.51,2.34,2.34,0,0,1,.93-.18,2.1,2.1,0,0,1,1.23.35,1.56,1.56,0,0,1,.63,1h.09v-1.2h1.25V144a2.3,2.3,0,0,1-.72,1.79,2.83,2.83,0,0,1-2,.65h-1.7v-1.13h1.7a1.36,1.36,0,0,0,1-.35,1.27,1.27,0,0,0,.37-1v-.22l.05-1.2h-.08a1.61,1.61,0,0,1-.65,1,2,2,0,0,1-1.22.35,2.34,2.34,0,0,1-.93-.18,2.11,2.11,0,0,1-.71-.51,2.31,2.31,0,0,1-.45-.8,3.13,3.13,0,0,1-.16-1.05ZM63,141.29a1.31,1.31,0,0,0,1.4,1.42,1.31,1.31,0,0,0,1.42-1.42v-1.1a1.31,1.31,0,0,0-1.42-1.42,1.31,1.31,0,0,0-1.4,1.42Z" style="fill:#1d1d1b"/><path d="M68.91,140.19a3,3,0,0,1,.19-1.07,2.27,2.27,0,0,1,.56-.8,2.5,2.5,0,0,1,.87-.51,3.51,3.51,0,0,1,1.15-.18,3.43,3.43,0,0,1,1.15.18,2.5,2.5,0,0,1,.87.51,2.12,2.12,0,0,1,.55.8,2.79,2.79,0,0,1,.2,1.07v1.21H70.22v.47a1.49,1.49,0,0,0,.39,1.1,1.46,1.46,0,0,0,1.07.4,2.34,2.34,0,0,0,.88-.16,1,1,0,0,0,.54-.47h1.3a2.1,2.1,0,0,1-.35.72,2.35,2.35,0,0,1-.59.54,3,3,0,0,1-.81.35,3.74,3.74,0,0,1-1,.12,3.41,3.41,0,0,1-1.14-.18,2.38,2.38,0,0,1-.87-.52,2.22,2.22,0,0,1-.56-.81,2.74,2.74,0,0,1-.2-1.06Zm1.31.25h2.93v-.25a1.47,1.47,0,1,0-2.93,0Z" style="fill:#1d1d1b"/><path d="M78.88,144.47a3.17,3.17,0,0,1-1.14-.19,2.52,2.52,0,0,1-.87-.52,2.39,2.39,0,0,1-.56-.83,2.78,2.78,0,0,1-.2-1.06v-1.65a2.83,2.83,0,0,1,.2-1.07,2.35,2.35,0,0,1,.56-.82,2.54,2.54,0,0,1,.87-.53,3.73,3.73,0,0,1,2.29,0,2.38,2.38,0,0,1,.87.52,2.35,2.35,0,0,1,.56.82,2.76,2.76,0,0,1,.2,1.07v1.66a2.83,2.83,0,0,1-.2,1.07,2.12,2.12,0,0,1-.55.82,2.38,2.38,0,0,1-.87.52A3.28,3.28,0,0,1,78.88,144.47Zm-1.45-2.6a1.35,1.35,0,0,0,1.45,1.44,1.45,1.45,0,0,0,1.07-.39,1.4,1.4,0,0,0,.39-1v-1.65a1.39,1.39,0,0,0-.39-1.05,1.65,1.65,0,0,0-2.13,0,1.39,1.39,0,0,0-.39,1.05Z" style="fill:#1d1d1b"/><path d="M83.14,144.35v-6.6h1.1v1h.09a1.24,1.24,0,0,1,.32-.8,1,1,0,0,1,.76-.29.91.91,0,0,1,.72.28,1.62,1.62,0,0,1,.39.81h.08a1.15,1.15,0,0,1,.33-.8,1,1,0,0,1,.77-.29,1.19,1.19,0,0,1,1,.45,1.93,1.93,0,0,1,.37,1.24v5H87.82v-5a1,1,0,0,0-.15-.59.57.57,0,0,0-.46-.2q-.63,0-.63.84v4.92h-1v-5a.94.94,0,0,0-.16-.59.63.63,0,0,0-.48-.2q-.63,0-.63.84v4.92Z" style="fill:#1d1d1b"/><path d="M90.51,140.19a3,3,0,0,1,.19-1.07,2.27,2.27,0,0,1,.56-.8,2.5,2.5,0,0,1,.87-.51,3.51,3.51,0,0,1,1.15-.18,3.43,3.43,0,0,1,1.15.18,2.5,2.5,0,0,1,.87.51,2.12,2.12,0,0,1,.55.8,2.79,2.79,0,0,1,.2,1.07v1.21H91.82v.47a1.49,1.49,0,0,0,.39,1.1,1.46,1.46,0,0,0,1.07.4,2.34,2.34,0,0,0,.88-.16,1,1,0,0,0,.54-.47H96a2.1,2.1,0,0,1-.35.72,2.35,2.35,0,0,1-.59.54,3,3,0,0,1-.81.35,3.74,3.74,0,0,1-1,.12,3.41,3.41,0,0,1-1.14-.18,2.38,2.38,0,0,1-.87-.52,2.22,2.22,0,0,1-.56-.81,2.74,2.74,0,0,1-.2-1.06Zm1.31.25h2.93v-.25a1.35,1.35,0,0,0-1.47-1.46,1.34,1.34,0,0,0-1.46,1.46Z" style="fill:#1d1d1b"/><path d="M97.51,137.75h1.86v-2h1.32v2h2.53v1.19h-2.53v3.51a.66.66,0,0,0,.19.51.73.73,0,0,0,.54.19h1.68v1.2h-1.74a2,2,0,0,1-1.46-.52,1.84,1.84,0,0,1-.53-1.38v-3.51H97.51Z" style="fill:#1d1d1b"/><path d="M106.56,137.75v1.14h.1a1.48,1.48,0,0,1,.62-.93,2.14,2.14,0,0,1,1.22-.33,2.17,2.17,0,0,1,1.66.65,2.61,2.61,0,0,1,.6,1.82v.43H109.4v-.3a1.49,1.49,0,0,0-.37-1.08,1.35,1.35,0,0,0-1-.39,1.33,1.33,0,0,0-1,.39,1.52,1.52,0,0,0-.37,1.08v4.12h-1.32v-6.6Z" style="fill:#1d1d1b"/><path d="M111.79,137.75h1.44l1.39,3.64a4.26,4.26,0,0,1,.15.47,4.31,4.31,0,0,1,.09.44l.06.43h.1a3.28,3.28,0,0,1,0-.43c0-.13.06-.27.09-.43s.09-.32.14-.48l1.3-3.64H118l-3.17,8.64h-1.39l.91-2.46Z" style="fill:#1d1d1b"/></svg>


--------------------------------------------------------------------------------
/doc/source/_static/binary_op-02.svg:
--------------------------------------------------------------------------------
1 | <svg xmlns="http://www.w3.org/2000/svg" width="255.6mm" height="57.81mm" viewBox="0 0 724.53 163.88"><rect x="11" y="28.59" width="38.2" height="19.1" style="fill:#0f9c5a;stroke:#fff;stroke-miterlimit:10;opacity:0.2"/><rect x="11" y="47.7" width="38.2" height="19.1" style="fill:#0f9c5a;stroke:#fff;stroke-miterlimit:10;opacity:0.4"/><rect x="11" y="66.8" width="38.2" height="19.1" style="fill:#0f9c5a;stroke:#fff;stroke-miterlimit:10;opacity:0.6"/><rect x="11" y="85.9" width="38.2" height="19.1" style="fill:#0f9c5a;stroke:#fff;stroke-miterlimit:10;opacity:0.8"/><rect x="11" y="105" width="38.2" height="19.1" style="fill:#0f9c5a;stroke:#fff;stroke-miterlimit:10"/><rect x="53.8" y="28.59" width="83.41" height="19.1" style="fill:#e21c84;stroke:#fff;stroke-miterlimit:10;opacity:0.2"/><rect x="53.8" y="9.49" width="83.41" height="19.1" style="fill:#e21c84;stroke:#fff;stroke-miterlimit:10;opacity:0.4"/><rect x="53.8" y="47.7" width="83.41" height="19.1" style="fill:#e21c84;stroke:#fff;stroke-miterlimit:10;opacity:0.2"/><rect x="53.8" y="66.8" width="83.41" height="19.1" style="fill:#e21c84;stroke:#fff;stroke-miterlimit:10;opacity:0.2"/><rect x="53.8" y="85.9" width="83.41" height="19.1" style="fill:#e21c84;stroke:#fff;stroke-miterlimit:10;opacity:0.2"/><rect x="53.8" y="105" width="83.41" height="19.1" style="fill:#e21c84;stroke:#fff;stroke-miterlimit:10;opacity:0.2"/><rect x="200.25" y="28.59" width="38.2" height="19.1" style="fill:#0f9c5a;stroke:#fff;stroke-miterlimit:10;opacity:0.8"/><rect x="200.25" y="47.7" width="38.2" height="19.1" style="fill:#0f9c5a;stroke:#fff;stroke-miterlimit:10;opacity:0.2"/><rect x="200.25" y="66.8" width="38.2" height="19.1" style="fill:#0f9c5a;stroke:#fff;stroke-miterlimit:10"/><rect x="200.25" y="85.9" width="38.2" height="19.1" style="fill:#0f9c5a;stroke:#fff;stroke-miterlimit:10;opacity:0.4"/><rect x="200.25" y="105" width="38.2" height="19.1" style="fill:#0f9c5a;stroke:#fff;stroke-miterlimit:10;opacity:0.6"/><rect x="243.05" y="28.59" width="83.41" height="19.1" style="fill:#e21c84;stroke:#fff;stroke-miterlimit:10;opacity:0.2"/><rect x="243.05" y="9.49" width="83.41" height="19.1" style="fill:#e21c84;stroke:#fff;stroke-miterlimit:10;opacity:0.4"/><rect x="243.05" y="47.7" width="83.41" height="19.1" style="fill:#e21c84;stroke:#fff;stroke-miterlimit:10;opacity:0.2"/><rect x="243.05" y="66.8" width="83.41" height="19.1" style="fill:#e21c84;stroke:#fff;stroke-miterlimit:10;opacity:0.2"/><rect x="243.05" y="85.9" width="83.41" height="19.1" style="fill:#e21c84;stroke:#fff;stroke-miterlimit:10;opacity:0.2"/><rect x="243.05" y="105" width="83.41" height="19.1" style="fill:#e21c84;stroke:#fff;stroke-miterlimit:10;opacity:0.2"/><line x1="144.24" y1="38.14" x2="187.07" y2="54.66" style="fill:none;stroke:#1d1d1b;stroke-miterlimit:10"/><polygon points="183.91 58.79 193.76 57.25 187.5 49.49 183.91 58.79" style="fill:#1d1d1b"/><line x1="144.24" y1="57.25" x2="188.08" y2="91.07" style="fill:none;stroke:#1d1d1b;stroke-miterlimit:10"/><polygon points="183.88 94.12 193.76 95.45 189.97 86.23 183.88 94.12" style="fill:#1d1d1b"/><line x1="144.24" y1="76.35" x2="188.08" y2="110.17" style="fill:none;stroke:#1d1d1b;stroke-miterlimit:10"/><polygon points="183.88 113.22 193.76 114.55 189.97 105.33 183.88 113.22" style="fill:#1d1d1b"/><line x1="144.24" y1="95.45" x2="189.07" y2="43.57" style="fill:none;stroke:#1d1d1b;stroke-miterlimit:10"/><polygon points="191.89 47.94 193.76 38.15 184.34 41.42 191.89 47.94" style="fill:#1d1d1b"/><line x1="144.24" y1="114.55" x2="188.08" y2="80.73" style="fill:none;stroke:#1d1d1b;stroke-miterlimit:10"/><polygon points="189.97 85.57 193.76 76.35 183.88 77.67 189.97 85.57" style="fill:#1d1d1b"/><line x1="533.72" y1="38.14" x2="576.06" y2="38.14" style="fill:none;stroke:#1d1d1b;stroke-miterlimit:10"/><polygon points="574.6 43.13 583.23 38.15 574.6 33.16 574.6 43.13" style="fill:#1d1d1b"/><line x1="533.72" y1="57.25" x2="576.06" y2="57.25" style="fill:none;stroke:#1d1d1b;stroke-miterlimit:10"/><polygon points="574.6 62.23 583.23 57.25 574.6 52.26 574.6 62.23" style="fill:#1d1d1b"/><line x1="533.72" y1="76.35" x2="576.06" y2="76.35" style="fill:none;stroke:#1d1d1b;stroke-miterlimit:10"/><polygon points="574.6 81.33 583.23 76.35 574.6 71.36 574.6 81.33" style="fill:#1d1d1b"/><line x1="533.72" y1="95.45" x2="576.06" y2="95.45" style="fill:none;stroke:#1d1d1b;stroke-miterlimit:10"/><polygon points="574.6 100.44 583.23 95.45 574.6 90.46 574.6 100.44" style="fill:#1d1d1b"/><line x1="533.72" y1="114.55" x2="576.06" y2="114.55" style="fill:none;stroke:#1d1d1b;stroke-miterlimit:10"/><polygon points="574.6 119.54 583.23 114.55 574.6 109.56 574.6 119.54" style="fill:#1d1d1b"/><rect x="400.47" y="28.59" width="38.2" height="19.1" style="fill:#0f9c5a;stroke:#fff;stroke-miterlimit:10;opacity:0.2"/><rect x="400.47" y="47.7" width="38.2" height="19.1" style="fill:#0f9c5a;stroke:#fff;stroke-miterlimit:10;opacity:0.4"/><rect x="400.47" y="66.8" width="38.2" height="19.1" style="fill:#0f9c5a;stroke:#fff;stroke-miterlimit:10;opacity:0.6"/><rect x="400.47" y="85.9" width="38.2" height="19.1" style="fill:#0f9c5a;stroke:#fff;stroke-miterlimit:10;opacity:0.8"/><rect x="400.47" y="105" width="38.2" height="19.1" style="fill:#0f9c5a;stroke:#fff;stroke-miterlimit:10"/><rect x="443.27" y="28.59" width="83.41" height="19.1" style="fill:#e21c84;stroke:#fff;stroke-miterlimit:10;opacity:0.2"/><rect x="443.27" y="9.49" width="83.41" height="19.1" style="fill:#e21c84;stroke:#fff;stroke-miterlimit:10;opacity:0.4"/><rect x="443.27" y="47.7" width="83.41" height="19.1" style="fill:#e21c84;stroke:#fff;stroke-miterlimit:10;opacity:0.2"/><rect x="443.27" y="66.8" width="83.41" height="19.1" style="fill:#e21c84;stroke:#fff;stroke-miterlimit:10;opacity:0.2"/><rect x="443.27" y="85.9" width="83.41" height="19.1" style="fill:#e21c84;stroke:#fff;stroke-miterlimit:10;opacity:0.2"/><rect x="443.27" y="105" width="83.41" height="19.1" style="fill:#e21c84;stroke:#fff;stroke-miterlimit:10;opacity:0.2"/><rect x="589.72" y="28.59" width="38.2" height="19.1" style="fill:#0f9c5a;stroke:#fff;stroke-miterlimit:10;opacity:0.8"/><rect x="589.72" y="47.7" width="38.2" height="19.1" style="fill:#0f9c5a;stroke:#fff;stroke-miterlimit:10;opacity:0.2"/><rect x="589.72" y="66.8" width="38.2" height="19.1" style="fill:#0f9c5a;stroke:#fff;stroke-miterlimit:10"/><rect x="589.72" y="85.9" width="38.2" height="19.1" style="fill:#0f9c5a;stroke:#fff;stroke-miterlimit:10;opacity:0.4"/><rect x="589.72" y="105" width="38.2" height="19.1" style="fill:#0f9c5a;stroke:#fff;stroke-miterlimit:10;opacity:0.6"/><rect x="632.52" y="28.59" width="83.41" height="19.1" style="fill:#e21c84;stroke:#fff;stroke-miterlimit:10;opacity:0.2"/><rect x="632.52" y="9.49" width="83.41" height="19.1" style="fill:#e21c84;stroke:#fff;stroke-miterlimit:10;opacity:0.4"/><rect x="632.52" y="47.7" width="83.41" height="19.1" style="fill:#e21c84;stroke:#fff;stroke-miterlimit:10;opacity:0.2"/><rect x="632.52" y="66.8" width="83.41" height="19.1" style="fill:#e21c84;stroke:#fff;stroke-miterlimit:10;opacity:0.2"/><rect x="632.52" y="85.9" width="83.41" height="19.1" style="fill:#e21c84;stroke:#fff;stroke-miterlimit:10;opacity:0.2"/><rect x="632.52" y="105" width="83.41" height="19.1" style="fill:#e21c84;stroke:#fff;stroke-miterlimit:10;opacity:0.2"/><path d="M130.89,145.13a1.9,1.9,0,0,1,.66-1.52,2.71,2.71,0,0,1,1.81-.57h1.79v-.55a1,1,0,0,0-.37-.82,1.59,1.59,0,0,0-1-.29,1.93,1.93,0,0,0-.87.18,1,1,0,0,0-.48.53h-1.3a1.93,1.93,0,0,1,.88-1.31,3.65,3.65,0,0,1,3.77.09,2,2,0,0,1,.73,1.59V147h-1.23v-1.29h-.09a1.57,1.57,0,0,1-.71,1,2.51,2.51,0,0,1-1.41.38,2.17,2.17,0,0,1-1.56-.55A1.89,1.89,0,0,1,130.89,145.13Zm1.32-.11a.94.94,0,0,0,.32.78,1.34,1.34,0,0,0,.91.28,2.17,2.17,0,0,0,.69-.1,1.71,1.71,0,0,0,.54-.27,1.21,1.21,0,0,0,.35-.4,1.14,1.14,0,0,0,.13-.51V144h-1.78a1.21,1.21,0,0,0-.85.28A1,1,0,0,0,132.21,145Z" style="fill:#1d1d1b"/><path d="M137.83,139.49v-1.2h3.46V145a.8.8,0,0,0,.22.61.78.78,0,0,0,.6.23h2V147h-2a2.07,2.07,0,0,1-1.53-.55A2,2,0,0,1,140,145v-5.49Z" style="fill:#1d1d1b"/><path d="M145.68,145.82h2.38V141.6H146v-1.18h3.36v5.4h1.93V147h-5.59Zm1.9-7.22a.71.71,0,0,1,.22-.55.84.84,0,0,1,.59-.21h.29a.8.8,0,0,1,.59.21.78.78,0,0,1,0,1.11.87.87,0,0,1-.6.2h-.28a.88.88,0,0,1-.59-.2A.72.72,0,0,1,147.58,138.6Z" style="fill:#1d1d1b"/><path d="M152.59,142.84a3.5,3.5,0,0,1,.16-1.05,2.22,2.22,0,0,1,.45-.8,2.09,2.09,0,0,1,.73-.51,2.3,2.3,0,0,1,.93-.18,2.1,2.1,0,0,1,1.23.34,1.65,1.65,0,0,1,.63,1h.09v-1.2h1.24v6.2a2.3,2.3,0,0,1-.71,1.78,2.78,2.78,0,0,1-2,.66h-1.71v-1.13h1.71a1.4,1.4,0,0,0,1-.35,1.27,1.27,0,0,0,.36-1v-.23l0-1.2h-.07a1.64,1.64,0,0,1-.65,1,2.07,2.07,0,0,1-1.22.34,2.3,2.3,0,0,1-.93-.18,2,2,0,0,1-.71-.51,2.22,2.22,0,0,1-.45-.8,3.06,3.06,0,0,1-.17-1Zm1.34,1.12a1.41,1.41,0,1,0,2.81,0v-1.11a1.41,1.41,0,1,0-2.81,0Z" style="fill:#1d1d1b"/><path d="M160,140.42h1.25v1.14h.11a1.47,1.47,0,0,1,.58-.94,2.05,2.05,0,0,1,1.18-.32,2.22,2.22,0,0,1,.92.17,1.89,1.89,0,0,1,.68.49,2.13,2.13,0,0,1,.43.75,2.93,2.93,0,0,1,.14,1V147h-1.32v-4.17a1.48,1.48,0,0,0-.34-1,1.27,1.27,0,0,0-1-.38,1.29,1.29,0,0,0-1,.4,1.57,1.57,0,0,0-.36,1.08V147H160Z" style="fill:#1d1d1b"/><path d="M172.45,141v1.2h-5.28V141Zm-5.28,2.93h5.28v1.2h-5.28Z" style="fill:#1d1d1b"/><path d="M173.92,139.56v-1.18h6.19v1.18h-2.42V147h-1.35v-7.46Z" style="fill:#1d1d1b"/><path d="M183.08,140.42v1.14h.11a1.48,1.48,0,0,1,.62-.93,2.43,2.43,0,0,1,2.88.32,2.64,2.64,0,0,1,.59,1.82v.43h-1.35v-.3a1.58,1.58,0,0,0-.37-1.09,1.35,1.35,0,0,0-1-.39,1.33,1.33,0,0,0-1,.4,1.52,1.52,0,0,0-.37,1.08V147h-1.32v-6.6Z" style="fill:#1d1d1b"/><path d="M190.12,140.42v4.18c0,.93.42,1.38,1.28,1.38s1.31-.45,1.31-1.38v-4.18H194v4.18a2.48,2.48,0,0,1-.69,1.88,2.67,2.67,0,0,1-1.94.66,2.63,2.63,0,0,1-1.92-.67,2.46,2.46,0,0,1-.68-1.87v-4.18Z" style="fill:#1d1d1b"/><path d="M195.84,142.85a2.78,2.78,0,0,1,.19-1.06,2.16,2.16,0,0,1,.55-.8,2.55,2.55,0,0,1,.88-.51,3.47,3.47,0,0,1,1.15-.18,3.43,3.43,0,0,1,1.15.18,2.5,2.5,0,0,1,.87.51,2.16,2.16,0,0,1,.55.8,2.78,2.78,0,0,1,.19,1.06v1.21h-4.22v.47a1.53,1.53,0,0,0,.39,1.11,1.45,1.45,0,0,0,1.07.39,2.35,2.35,0,0,0,.88-.15,1.05,1.05,0,0,0,.54-.47h1.29a2,2,0,0,1-.34.71,2.11,2.11,0,0,1-.6.55,2.77,2.77,0,0,1-.8.35,3.74,3.74,0,0,1-1,.12,3.41,3.41,0,0,1-1.14-.18,2.38,2.38,0,0,1-.87-.52,2.22,2.22,0,0,1-.56-.81,2.78,2.78,0,0,1-.2-1.06Zm1.31.25h2.93v-.25a1.41,1.41,0,0,0-.39-1.06,1.43,1.43,0,0,0-1.08-.39,1.33,1.33,0,0,0-1.46,1.45Z" style="fill:#1d1d1b"/><path d="M519.27,145.13a1.88,1.88,0,0,1,.67-1.52,2.66,2.66,0,0,1,1.8-.57h1.79v-.55a1,1,0,0,0-.36-.82,1.61,1.61,0,0,0-1-.29,1.94,1.94,0,0,0-.88.18,1,1,0,0,0-.48.53h-1.29a1.92,1.92,0,0,1,.87-1.31,3.67,3.67,0,0,1,3.78.09,1.94,1.94,0,0,1,.72,1.59V147h-1.22v-1.29h-.1a1.56,1.56,0,0,1-.7,1,2.51,2.51,0,0,1-1.41.38,2.15,2.15,0,0,1-1.56-.55A1.9,1.9,0,0,1,519.27,145.13Zm1.32-.11a1,1,0,0,0,.33.78,1.32,1.32,0,0,0,.91.28,2.07,2.07,0,0,0,.68-.1,1.52,1.52,0,0,0,.54-.27,1.37,1.37,0,0,0,.36-.4,1.13,1.13,0,0,0,.12-.51V144h-1.77a1.24,1.24,0,0,0-.86.28A1,1,0,0,0,520.59,145Z" style="fill:#1d1d1b"/><path d="M526.22,139.49v-1.2h3.46V145a.84.84,0,0,0,.21.61.81.81,0,0,0,.6.23h2V147h-2a2.06,2.06,0,0,1-1.52-.55,2,2,0,0,1-.55-1.49v-5.49Z" style="fill:#1d1d1b"/><path d="M534.07,145.82h2.37V141.6h-2.07v-1.18h3.36v5.4h1.93V147h-5.59ZM536,138.6a.72.72,0,0,1,.23-.55.8.8,0,0,1,.59-.21h.29a.82.82,0,0,1,.59.21.71.71,0,0,1,.22.55.72.72,0,0,1-.22.56.89.89,0,0,1-.6.2h-.28a.84.84,0,0,1-.59-.2A.72.72,0,0,1,536,138.6Z" style="fill:#1d1d1b"/><path d="M541,142.84a3.21,3.21,0,0,1,.16-1.05,2.09,2.09,0,0,1,.45-.8,2.05,2.05,0,0,1,.72-.51,2.35,2.35,0,0,1,.94-.18,2,2,0,0,1,1.22.34,1.62,1.62,0,0,1,.64,1h.08v-1.2h1.25v6.2a2.3,2.3,0,0,1-.71,1.78,2.78,2.78,0,0,1-2,.66h-1.71v-1.13h1.71a1.42,1.42,0,0,0,1-.35,1.27,1.27,0,0,0,.36-1v-.23l0-1.2h-.07a1.68,1.68,0,0,1-.65,1,2.09,2.09,0,0,1-1.22.34,2.27,2.27,0,0,1-.93-.18,1.93,1.93,0,0,1-.71-.51,2.24,2.24,0,0,1-.46-.8,3.07,3.07,0,0,1-.16-1Zm1.33,1.12a1.43,1.43,0,0,0,.38,1,1.41,1.41,0,0,0,2.44-1v-1.11a1.47,1.47,0,0,0-2.44-1,1.43,1.43,0,0,0-.38,1Z" style="fill:#1d1d1b"/><path d="M548.35,140.42h1.25v1.14h.1a1.52,1.52,0,0,1,.59-.94,2.05,2.05,0,0,1,1.18-.32,2.25,2.25,0,0,1,.92.17,2.08,2.08,0,0,1,.68.49,2.13,2.13,0,0,1,.43.75,3.22,3.22,0,0,1,.14,1V147h-1.32v-4.17a1.48,1.48,0,0,0-.35-1,1.24,1.24,0,0,0-1-.38,1.28,1.28,0,0,0-1,.4,1.57,1.57,0,0,0-.36,1.08V147h-1.32Z" style="fill:#1d1d1b"/><path d="M560.84,141v1.2h-5.28V141Zm-5.28,2.93h5.28v1.2h-5.28Z" style="fill:#1d1d1b"/><path d="M562.84,138.37h5.32v1.19h-4V142h3.71v1.2h-3.69V147h-1.32Z" style="fill:#1d1d1b"/><path d="M569.67,145.13a1.88,1.88,0,0,1,.67-1.52,2.66,2.66,0,0,1,1.8-.57h1.79v-.55a1,1,0,0,0-.36-.82,1.61,1.61,0,0,0-1-.29,1.94,1.94,0,0,0-.88.18,1,1,0,0,0-.48.53h-1.29a1.92,1.92,0,0,1,.87-1.31,3.67,3.67,0,0,1,3.78.09,1.94,1.94,0,0,1,.72,1.59V147H574v-1.29h-.1a1.56,1.56,0,0,1-.7,1,2.51,2.51,0,0,1-1.41.38,2.18,2.18,0,0,1-1.57-.55A1.93,1.93,0,0,1,569.67,145.13ZM571,145a1,1,0,0,0,.33.78,1.32,1.32,0,0,0,.91.28,2.11,2.11,0,0,0,.68-.1,1.52,1.52,0,0,0,.54-.27,1.37,1.37,0,0,0,.36-.4,1.13,1.13,0,0,0,.12-.51V144h-1.77a1.24,1.24,0,0,0-.86.28A1,1,0,0,0,571,145Z" style="fill:#1d1d1b"/><path d="M576.62,139.49v-1.2h3.46V145a.84.84,0,0,0,.21.61.81.81,0,0,0,.6.23h2V147h-2a2,2,0,0,1-1.52-.55,2,2,0,0,1-.55-1.49v-5.49Z" style="fill:#1d1d1b"/><path d="M584.19,145.36h1.37a.79.79,0,0,0,.4.52,1.67,1.67,0,0,0,.81.19h.43a1.53,1.53,0,0,0,.92-.24.77.77,0,0,0,.33-.66q0-.72-1.05-.87l-1-.13a2.75,2.75,0,0,1-1.58-.62,1.72,1.72,0,0,1-.51-1.33,1.7,1.7,0,0,1,.64-1.42,2.89,2.89,0,0,1,1.83-.5h.42a2.87,2.87,0,0,1,1.7.46,1.82,1.82,0,0,1,.77,1.25h-1.35a.76.76,0,0,0-.38-.47,1.4,1.4,0,0,0-.74-.17h-.42a1.4,1.4,0,0,0-.86.21.7.7,0,0,0-.3.63.67.67,0,0,0,.22.54,1.39,1.39,0,0,0,.71.25l1,.13a3.08,3.08,0,0,1,1.65.63,2,2,0,0,1-.13,2.86,3,3,0,0,1-1.9.52h-.43a3.08,3.08,0,0,1-1.79-.48A1.77,1.77,0,0,1,584.19,145.36Z" style="fill:#1d1d1b"/><path d="M591.43,142.85a2.78,2.78,0,0,1,.19-1.06,2.16,2.16,0,0,1,.55-.8,2.46,2.46,0,0,1,.88-.51,3.76,3.76,0,0,1,2.3,0,2.5,2.5,0,0,1,.87.51,2.16,2.16,0,0,1,.55.8,2.78,2.78,0,0,1,.19,1.06v1.21h-4.22v.47a1.53,1.53,0,0,0,.39,1.11,1.44,1.44,0,0,0,1.07.39,2.35,2.35,0,0,0,.88-.15,1.14,1.14,0,0,0,.54-.47h1.29a2.22,2.22,0,0,1-.94,1.26,2.9,2.9,0,0,1-.8.35,3.79,3.79,0,0,1-1,.12,3.47,3.47,0,0,1-1.15-.18,2.38,2.38,0,0,1-.87-.52,2.2,2.2,0,0,1-.55-.81,2.6,2.6,0,0,1-.2-1.06Zm1.31.25h2.92v-.25a1.41,1.41,0,0,0-.38-1.06,1.69,1.69,0,0,0-2.16,0,1.41,1.41,0,0,0-.38,1.06Z" style="fill:#1d1d1b"/></svg>


--------------------------------------------------------------------------------
/doc/source/_static/binary_op-03.svg:
--------------------------------------------------------------------------------
1 | <svg xmlns="http://www.w3.org/2000/svg" width="107.81mm" height="51.72mm" viewBox="0 0 305.59 146.59"><path d="M209,116.05h1.36a.81.81,0,0,0,.41.52,1.67,1.67,0,0,0,.81.19H212a1.55,1.55,0,0,0,.92-.24.77.77,0,0,0,.33-.66c0-.48-.36-.77-1.06-.87l-1-.13a2.78,2.78,0,0,1-1.58-.62,1.94,1.94,0,0,1,.13-2.75,2.89,2.89,0,0,1,1.83-.5h.42a2.85,2.85,0,0,1,1.7.46,1.79,1.79,0,0,1,.78,1.25h-1.36a.72.72,0,0,0-.37-.47,1.44,1.44,0,0,0-.75-.18h-.42a1.5,1.5,0,0,0-.86.21.72.72,0,0,0-.3.63.65.65,0,0,0,.22.54,1.34,1.34,0,0,0,.72.26l1,.13a3.08,3.08,0,0,1,1.66.63,2,2,0,0,1-.14,2.85,3,3,0,0,1-1.89.53h-.43a3.09,3.09,0,0,1-1.8-.48A1.73,1.73,0,0,1,209,116.05Z" style="fill:#1d1d1b"/><path d="M216.36,117.71v-8.64h1.32v2c0,.11,0,.23,0,.36s0,.26,0,.37,0,.25,0,.41h.11a1.49,1.49,0,0,1,.59-.93,2,2,0,0,1,1.17-.33,2.36,2.36,0,0,1,.9.16,2.2,2.2,0,0,1,.68.48,2.32,2.32,0,0,1,.44.75,3.05,3.05,0,0,1,.15,1v4.35h-1.3v-4.17a1.45,1.45,0,0,0-.36-1.05,1.26,1.26,0,0,0-1-.38,1.29,1.29,0,0,0-1,.4,1.53,1.53,0,0,0-.36,1.08v4.12Z" style="fill:#1d1d1b"/><path d="M223.29,115.82a1.88,1.88,0,0,1,.66-1.52,2.71,2.71,0,0,1,1.81-.57h1.79v-.55a1,1,0,0,0-.37-.82,1.59,1.59,0,0,0-1-.29,1.93,1.93,0,0,0-.87.18,1,1,0,0,0-.48.52h-1.3a1.93,1.93,0,0,1,.88-1.3,3.65,3.65,0,0,1,3.77.09,1.93,1.93,0,0,1,.73,1.59v4.56h-1.23v-1.29h-.09a1.59,1.59,0,0,1-.71,1,2.51,2.51,0,0,1-1.41.38,2.21,2.21,0,0,1-1.56-.55A1.89,1.89,0,0,1,223.29,115.82Zm1.32-.11a.94.94,0,0,0,.32.78,1.34,1.34,0,0,0,.91.28,2.17,2.17,0,0,0,.69-.1,1.71,1.71,0,0,0,.54-.27,1.21,1.21,0,0,0,.35-.4,1.14,1.14,0,0,0,.13-.51v-.83h-1.78a1.26,1.26,0,0,0-.85.27A1,1,0,0,0,224.61,115.71Z" style="fill:#1d1d1b"/><path d="M230.77,111.11H232v1.14h.1a1.47,1.47,0,0,1,.61-.93,2,2,0,0,1,1.19-.33,2.29,2.29,0,0,1,.92.18,2.05,2.05,0,0,1,.72.51,2.46,2.46,0,0,1,.45.81,3.31,3.31,0,0,1,.17,1.08v1.65a3.41,3.41,0,0,1-.17,1.09,2.32,2.32,0,0,1-.45.82,2.08,2.08,0,0,1-.72.52,2.29,2.29,0,0,1-.92.18,2,2,0,0,1-1.19-.33,1.47,1.47,0,0,1-.61-.93H232s0,.05,0,.13,0,.17,0,.29,0,.23,0,.36,0,.24,0,.36v2h-1.32Zm1.32,2.48v1.63a1.54,1.54,0,0,0,.37,1.06,1.28,1.28,0,0,0,1,.39,1.38,1.38,0,0,0,1-.39,1.5,1.5,0,0,0,.38-1.07v-1.62a1.47,1.47,0,0,0-.38-1.07,1.38,1.38,0,0,0-1-.39,1.27,1.27,0,0,0-1,.39A1.5,1.5,0,0,0,232.09,113.59Z" style="fill:#1d1d1b"/><path d="M237.84,113.54a3,3,0,0,1,.19-1.06,2.18,2.18,0,0,1,.56-.8,2.5,2.5,0,0,1,.87-.51,3.51,3.51,0,0,1,1.15-.18,3.43,3.43,0,0,1,1.15.18,2.5,2.5,0,0,1,.87.51,2.16,2.16,0,0,1,.55.8,2.78,2.78,0,0,1,.19,1.06v1.21h-4.22v.47a1.53,1.53,0,0,0,.39,1.11,1.45,1.45,0,0,0,1.07.39,2.35,2.35,0,0,0,.88-.15,1.05,1.05,0,0,0,.54-.47h1.3a2.18,2.18,0,0,1-1,1.26,2.77,2.77,0,0,1-.8.35,3.74,3.74,0,0,1-1,.12,3.41,3.41,0,0,1-1.14-.18,2.52,2.52,0,0,1-.87-.52,2.22,2.22,0,0,1-.56-.81,2.78,2.78,0,0,1-.2-1.06Zm1.31.25h2.93v-.25a1.34,1.34,0,0,0-1.47-1.45,1.32,1.32,0,0,0-1.46,1.45Z" style="fill:#1d1d1b"/><path d="M244.63,110.18V109h3.46v6.69a.8.8,0,0,0,.22.61.78.78,0,0,0,.6.23h2v1.2h-2a2.11,2.11,0,0,1-1.53-.55,2,2,0,0,1-.55-1.49v-5.49Z" style="fill:#1d1d1b"/><path d="M251.92,111.11h1.44l1.39,3.63a4.42,4.42,0,0,1,.15.48,4.15,4.15,0,0,1,.09.43c0,.16,0,.3.06.44h.1a3.59,3.59,0,0,1,0-.44c0-.12.06-.27.09-.43s.09-.32.14-.48l1.3-3.63h1.39l-3.17,8.64h-1.39l.91-2.46Z" style="fill:#1d1d1b"/><path d="M205.39,127.93a3.5,3.5,0,0,1,.16-1.05,2.1,2.1,0,0,1,.46-.8,2.05,2.05,0,0,1,.72-.51,2.34,2.34,0,0,1,.93-.18,2.1,2.1,0,0,1,1.23.34,1.61,1.61,0,0,1,.63,1h.09v-1.2h1.24v6.2a2.27,2.27,0,0,1-.71,1.78,2.78,2.78,0,0,1-2,.66h-1.71V133h1.71a1.4,1.4,0,0,0,1-.35,1.27,1.27,0,0,0,.37-1v-.23l0-1.2h-.07a1.59,1.59,0,0,1-.65,1,2,2,0,0,1-1.22.35,2.34,2.34,0,0,1-.93-.18,2,2,0,0,1-.71-.51,2.22,2.22,0,0,1-.45-.8,3.11,3.11,0,0,1-.17-1Zm1.34,1.12a1.41,1.41,0,1,0,2.82,0v-1.11a1.43,1.43,0,0,0-.38-1,1.63,1.63,0,0,0-2.07,0,1.42,1.42,0,0,0-.37,1Z" style="fill:#1d1d1b"/><path d="M212.64,127.94a3,3,0,0,1,.19-1.06,2.18,2.18,0,0,1,.56-.8,2.5,2.5,0,0,1,.87-.51,3.51,3.51,0,0,1,1.15-.18,3.43,3.43,0,0,1,1.15.18,2.5,2.5,0,0,1,.87.51,2.16,2.16,0,0,1,.55.8,2.78,2.78,0,0,1,.19,1.06v1.21H214v.47a1.53,1.53,0,0,0,.39,1.11,1.45,1.45,0,0,0,1.07.39,2.35,2.35,0,0,0,.88-.15,1.05,1.05,0,0,0,.54-.47h1.3a2.18,2.18,0,0,1-.95,1.26,2.77,2.77,0,0,1-.8.35,3.74,3.74,0,0,1-1,.12,3.41,3.41,0,0,1-1.14-.18,2.52,2.52,0,0,1-.87-.52,2.22,2.22,0,0,1-.56-.81,2.78,2.78,0,0,1-.2-1.06Zm1.31.25h2.93v-.25a1.34,1.34,0,0,0-1.47-1.45,1.32,1.32,0,0,0-1.46,1.45Z" style="fill:#1d1d1b"/><path d="M222.61,132.23a3.41,3.41,0,0,1-1.14-.19,2.4,2.4,0,0,1-.87-.53,2.35,2.35,0,0,1-.56-.82,2.83,2.83,0,0,1-.2-1.07V128a2.83,2.83,0,0,1,.2-1.07,2.35,2.35,0,0,1,.56-.82,2.4,2.4,0,0,1,.87-.53,3.64,3.64,0,0,1,2.29,0,2.4,2.4,0,0,1,.87.53,2.31,2.31,0,0,1,.56.81,2.85,2.85,0,0,1,.2,1.08v1.65a2.92,2.92,0,0,1-.2,1.08,2.32,2.32,0,0,1-.55.82,2.52,2.52,0,0,1-.87.52A3.53,3.53,0,0,1,222.61,132.23Zm-1.45-2.61a1.43,1.43,0,0,0,.39,1.06,1.45,1.45,0,0,0,1.06.38,1.49,1.49,0,0,0,1.07-.38,1.42,1.42,0,0,0,.38-1.06V128a1.42,1.42,0,0,0-.38-1.06,1.68,1.68,0,0,0-2.13,0,1.43,1.43,0,0,0-.39,1.06Z" style="fill:#1d1d1b"/><path d="M226.87,132.11v-6.6H228v1h.09a1.27,1.27,0,0,1,.32-.81,1,1,0,0,1,.76-.28.94.94,0,0,1,.72.27,1.7,1.7,0,0,1,.39.82h.08a1.22,1.22,0,0,1,.32-.81,1.08,1.08,0,0,1,.78-.28,1.2,1.2,0,0,1,1,.45,1.93,1.93,0,0,1,.37,1.24v5h-1.22v-5a1,1,0,0,0-.15-.59.54.54,0,0,0-.46-.2q-.63,0-.63.84v4.92h-1v-5a.92.92,0,0,0-.16-.59.59.59,0,0,0-.48-.2q-.63,0-.63.84v4.92Z" style="fill:#1d1d1b"/><path d="M234.24,127.94a3,3,0,0,1,.19-1.06,2.18,2.18,0,0,1,.56-.8,2.5,2.5,0,0,1,.87-.51,3.51,3.51,0,0,1,1.15-.18,3.43,3.43,0,0,1,1.15.18,2.5,2.5,0,0,1,.87.51,2.16,2.16,0,0,1,.55.8,2.78,2.78,0,0,1,.19,1.06v1.21h-4.22v.47a1.53,1.53,0,0,0,.39,1.11,1.45,1.45,0,0,0,1.07.39,2.35,2.35,0,0,0,.88-.15,1.05,1.05,0,0,0,.54-.47h1.3a2.18,2.18,0,0,1-.95,1.26,2.77,2.77,0,0,1-.8.35,3.74,3.74,0,0,1-1,.12,3.41,3.41,0,0,1-1.14-.18,2.52,2.52,0,0,1-.87-.52,2.22,2.22,0,0,1-.56-.81,2.78,2.78,0,0,1-.2-1.06Zm1.31.25h2.93v-.25a1.34,1.34,0,0,0-1.47-1.45,1.32,1.32,0,0,0-1.46,1.45Z" style="fill:#1d1d1b"/><path d="M241.24,125.51h1.86v-2h1.32v2H247v1.18h-2.53v3.52a.66.66,0,0,0,.19.51.77.77,0,0,0,.54.19h1.68v1.2h-1.74a2,2,0,0,1-1.46-.52,1.8,1.8,0,0,1-.53-1.38v-3.52h-1.86Z" style="fill:#1d1d1b"/><path d="M250.28,125.51v1.14h.11a1.48,1.48,0,0,1,.62-.93,2.43,2.43,0,0,1,2.88.32,2.64,2.64,0,0,1,.59,1.82v.43h-1.35V128a1.56,1.56,0,0,0-.37-1.09,1.35,1.35,0,0,0-1-.39,1.33,1.33,0,0,0-1,.4,1.49,1.49,0,0,0-.37,1.08v4.12H249v-6.6Z" style="fill:#1d1d1b"/><path d="M255.52,125.51H257l1.39,3.63a4.42,4.42,0,0,1,.15.48c0,.15.06.3.09.43s0,.3.06.44h.1a3.59,3.59,0,0,1,0-.44c0-.12.06-.27.09-.43s.09-.32.14-.48l1.3-3.63h1.39l-3.17,8.64h-1.39l.91-2.46Z" style="fill:#1d1d1b"/><rect x="11" y="31.75" width="38.2" height="19.1" style="fill:#0f9c5a;stroke:#fff;stroke-miterlimit:10;opacity:0.2"/><rect x="11" y="50.85" width="38.2" height="19.1" style="fill:#0f9c5a;stroke:#fff;stroke-miterlimit:10;opacity:0.4"/><rect x="11" y="69.95" width="38.2" height="19.1" style="fill:#0f9c5a;stroke:#fff;stroke-miterlimit:10;opacity:0.6"/><rect x="11" y="89.05" width="38.2" height="19.1" style="fill:#0f9c5a;stroke:#fff;stroke-miterlimit:10;opacity:0.8"/><rect x="11" y="108.16" width="38.2" height="19.1" style="fill:#0f9c5a;stroke:#fff;stroke-miterlimit:10"/><rect x="53.8" y="31.75" width="83.41" height="19.1" style="fill:#e21c84;stroke:#fff;stroke-miterlimit:10;opacity:0.2"/><rect x="53.8" y="12.65" width="83.41" height="19.1" style="fill:#e21c84;stroke:#fff;stroke-miterlimit:10;opacity:0.4"/><rect x="53.8" y="50.85" width="83.41" height="19.1" style="fill:#e21c84;stroke:#fff;stroke-miterlimit:10;opacity:0.2"/><rect x="53.8" y="69.95" width="83.41" height="19.1" style="fill:#e21c84;stroke:#fff;stroke-miterlimit:10;opacity:0.2"/><rect x="53.8" y="89.05" width="83.41" height="19.1" style="fill:#e21c84;stroke:#fff;stroke-miterlimit:10;opacity:0.2"/><rect x="53.8" y="108.16" width="83.41" height="19.1" style="fill:#e21c84;stroke:#fff;stroke-miterlimit:10;opacity:0.2"/><line x1="144.24" y1="41.3" x2="193.76" y2="79.5" style="fill:none;stroke:#1d1d1b;stroke-miterlimit:10"/><line x1="144.24" y1="60.4" x2="193.76" y2="79.5" style="fill:none;stroke:#1d1d1b;stroke-miterlimit:10"/><line x1="144.24" y1="79.5" x2="193.76" y2="79.5" style="fill:none;stroke:#1d1d1b;stroke-miterlimit:10"/><polygon points="192.3 84.49 200.94 79.5 192.3 74.52 192.3 84.49" style="fill:#1d1d1b"/><line x1="144.24" y1="98.6" x2="193.76" y2="79.5" style="fill:none;stroke:#1d1d1b;stroke-miterlimit:10"/><line x1="144.24" y1="117.71" x2="193.76" y2="79.5" style="fill:none;stroke:#1d1d1b;stroke-miterlimit:10"/><polygon points="254.5 75.67 247.28 95.85 226.19 99.69 212.32 83.34 219.55 63.16 240.64 59.32 254.5 75.67" style="fill:#e21c84;stroke:#fff;stroke-miterlimit:10;opacity:0.2"/></svg>


--------------------------------------------------------------------------------
/doc/source/_static/custom.css:
--------------------------------------------------------------------------------
1 | /* colors */
2 | 
3 | :root {
4 |     --pst-color-primary: 19, 156, 90;
5 |     --pst-color-active-navigation: 19, 156, 90;
6 |     --pst-color-h2: var(--color-text-base);
7 |     --pst-color-link: 19, 156, 90;
8 | }


--------------------------------------------------------------------------------
/doc/source/api.rst:
--------------------------------------------------------------------------------
 1 | .. _reference:
 2 | 
 3 | API Reference
 4 | =============
 5 | 
 6 | The API Reference provides an overview of all public objects, functions and methods implemented in Dask-GeoPandas.
 7 | 
 8 | .. warning::
 9 |    Some docstrings are taken directly from GeoPandas or Dask. Some inconsistencies with the Dask-GeoPandas version may exist.
10 | 
11 | .. toctree::
12 |   :maxdepth: 2
13 | 
14 |   GeoSeries <docs/reference/geoseries>
15 |   GeoDataFrame <docs/reference/geodataframe>
16 |   Input/output <docs/reference/io>
17 |   Tools <docs/reference/tools>
18 | 
19 | 


--------------------------------------------------------------------------------
/doc/source/changelog.rst:
--------------------------------------------------------------------------------
1 | .. include:: ../../CHANGELOG.md
2 | 


--------------------------------------------------------------------------------
/doc/source/conf.py:
--------------------------------------------------------------------------------
 1 | # Configuration file for the Sphinx documentation builder.
 2 | #
 3 | # This file only contains a selection of the most common options. For a full
 4 | # list see the documentation:
 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
 6 | 
 7 | # -- Path setup --------------------------------------------------------------
 8 | 
 9 | # If extensions (or modules to document with autodoc) are in another directory,
10 | # add these directories to sys.path here. If the directory is relative to the
11 | # documentation root, use os.path.abspath to make it absolute, like shown here.
12 | #
13 | # import os
14 | # import sys
15 | # sys.path.insert(0, os.path.abspath('.'))
16 | import dask_geopandas  # noqa
17 | 
18 | autodoc_mock_imports = [
19 |     "shapely",
20 |     "dask",
21 | ]
22 | 
23 | # -- Project information -----------------------------------------------------
24 | 
25 | project = "dask-geopandas"
26 | copyright = "2020-, GeoPandas development team"
27 | author = "GeoPandas development team"
28 | 
29 | # The full version, including alpha/beta/rc tags
30 | release = version = dask_geopandas.__version__
31 | 
32 | 
33 | # -- General configuration ---------------------------------------------------
34 | 
35 | # Add any Sphinx extension module names here, as strings. They can be
36 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
37 | # ones.
38 | extensions = [
39 |     "sphinx.ext.autodoc",
40 |     "numpydoc",
41 |     "sphinx.ext.autosummary",
42 |     "myst_nb",
43 |     "sphinx_copybutton",
44 | ]
45 | 
46 | numpydoc_show_class_members = False
47 | autosummary_generate = True
48 | jupyter_execute_notebooks = "auto"
49 | execution_excludepatterns = [
50 |     "basic-intro.ipynb",
51 |     "dissolve.ipynb",
52 |     "spatial-partitioning.ipynb",
53 | ]
54 | 
55 | 
56 | def setup(app):
57 |     app.add_css_file("custom.css")  # may also be an URL
58 | 
59 | 
60 | # Add any paths that contain templates here, relative to this directory.
61 | templates_path = ["_templates"]
62 | 
63 | # List of patterns, relative to source directory, that match files and
64 | # directories to ignore when looking for source files.
65 | # This pattern also affects html_static_path and html_extra_path.
66 | exclude_patterns = []
67 | 
68 | 
69 | # -- Options for HTML output -------------------------------------------------
70 | 
71 | # The theme to use for HTML and HTML Help pages.  See the documentation for
72 | # a list of builtin themes.
73 | #
74 | html_theme = "sphinx_book_theme"
75 | 
76 | html_theme_options = {
77 |     "repository_url": "https://github.com/geopandas/dask-geopandas",
78 |     "use_repository_button": True,
79 |     "use_fullscreen_button": False,
80 | }
81 | html_title = "dask-geopandas"
82 | # Add any paths that contain custom static files (such as style sheets) here,
83 | # relative to this directory. They are copied after the builtin static files,
84 | # so a file named "default.css" will overwrite the builtin "default.css".
85 | html_static_path = ["_static"]
86 | 


--------------------------------------------------------------------------------
/doc/source/docs/reference/geodataframe.rst:
--------------------------------------------------------------------------------
 1 | ============
 2 | GeoDataFrame
 3 | ============
 4 | .. currentmodule:: dask_geopandas
 5 | 
 6 | A ``GeoDataFrame`` is a tabular data structure that contains a column
 7 | which stores geometries (a ``GeoSeries``).
 8 | 
 9 | Constructor
10 | -----------
11 | .. autosummary::
12 |    :toctree: api/
13 | 
14 |    GeoDataFrame
15 | 
16 | Serialization / IO / conversion
17 | -------------------------------
18 | 
19 | .. autosummary::
20 |    :toctree: api/
21 | 
22 |    GeoDataFrame.to_dask_dataframe
23 |    GeoDataFrame.to_feather
24 |    GeoDataFrame.to_parquet
25 |    GeoDataFrame.to_wkb
26 |    GeoDataFrame.to_wkt
27 | 
28 | Projection handling
29 | -------------------
30 | 
31 | .. autosummary::
32 |    :toctree: api/
33 | 
34 |    GeoDataFrame.crs
35 |    GeoDataFrame.set_crs
36 |    GeoDataFrame.to_crs
37 | 
38 | Active geometry handling
39 | ------------------------
40 | 
41 | .. autosummary::
42 |    :toctree: api/
43 | 
44 |    GeoDataFrame.set_geometry
45 |    GeoDataFrame.rename_geometry
46 | 
47 | Aggregating and exploding
48 | -------------------------
49 | 
50 | .. autosummary::
51 |    :toctree: api/
52 | 
53 |    GeoDataFrame.explode
54 |    GeoDataFrame.dissolve
55 | 
56 | Spatial joins
57 | -------------
58 | 
59 | .. autosummary::
60 |    :toctree: api/
61 | 
62 |    GeoDataFrame.sjoin
63 | 
64 | Overlay operations
65 | ------------------
66 | 
67 | .. autosummary::
68 |    :toctree: api/
69 | 
70 |    GeoDataFrame.clip
71 | 
72 | Indexing
73 | --------
74 | 
75 | .. autosummary::
76 |    :toctree: api/
77 | 
78 |    GeoDataFrame.cx
79 | 
80 | Spatial partitioning
81 | --------------------
82 | 
83 | .. autosummary::
84 |    :toctree: api/
85 | 
86 |    GeoDataFrame.spatial_shuffle
87 | 
88 | 
89 | All dask ``DataFrame`` methods are also available, although they may
90 | not operate in a meaningful way on the ``geometry`` column. All methods
91 | listed in `GeoSeries <geoseries>`__ work directly on an active geometry column of GeoDataFrame.
92 | 
93 | 


--------------------------------------------------------------------------------
/doc/source/docs/reference/geoseries.rst:
--------------------------------------------------------------------------------
  1 | =========
  2 | GeoSeries
  3 | =========
  4 | .. currentmodule:: dask_geopandas
  5 | 
  6 | Constructor
  7 | -----------
  8 | .. autosummary::
  9 |    :toctree: api/
 10 | 
 11 |    GeoSeries
 12 | 
 13 | General methods and attributes
 14 | ------------------------------
 15 | 
 16 | .. autosummary::
 17 |    :toctree: api/
 18 | 
 19 |    GeoSeries.area
 20 |    GeoSeries.boundary
 21 |    GeoSeries.bounds
 22 |    GeoSeries.total_bounds
 23 |    GeoSeries.length
 24 |    GeoSeries.geom_type
 25 |    GeoSeries.distance
 26 |    GeoSeries.representative_point
 27 |    GeoSeries.exterior
 28 |    GeoSeries.interiors
 29 |    GeoSeries.x
 30 |    GeoSeries.y
 31 |    GeoSeries.z
 32 | 
 33 | Unary predicates
 34 | ----------------
 35 | 
 36 | .. autosummary::
 37 |    :toctree: api/
 38 | 
 39 |    GeoSeries.is_empty
 40 |    GeoSeries.is_ring
 41 |    GeoSeries.is_simple
 42 |    GeoSeries.is_valid
 43 |    GeoSeries.has_z
 44 | 
 45 | 
 46 | Binary Predicates
 47 | -----------------
 48 | 
 49 | .. autosummary::
 50 |    :toctree: api/
 51 | 
 52 |    GeoSeries.contains
 53 |    GeoSeries.crosses
 54 |    GeoSeries.disjoint
 55 |    GeoSeries.geom_equals
 56 |    GeoSeries.geom_equals_exact
 57 |    GeoSeries.intersects
 58 |    GeoSeries.overlaps
 59 |    GeoSeries.touches
 60 |    GeoSeries.within
 61 |    GeoSeries.covers
 62 |    GeoSeries.covered_by
 63 | 
 64 | 
 65 | Set-theoretic Methods
 66 | ---------------------
 67 | 
 68 | .. autosummary::
 69 |    :toctree: api/
 70 | 
 71 |    GeoSeries.difference
 72 |    GeoSeries.intersection
 73 |    GeoSeries.symmetric_difference
 74 |    GeoSeries.union
 75 | 
 76 | Constructive Methods and Attributes
 77 | -----------------------------------
 78 | 
 79 | .. autosummary::
 80 |    :toctree: api/
 81 | 
 82 |    GeoSeries.buffer
 83 |    GeoSeries.boundary
 84 |    GeoSeries.centroid
 85 |    GeoSeries.convex_hull
 86 |    GeoSeries.envelope
 87 |    GeoSeries.simplify
 88 | 
 89 | Affine transformations
 90 | ----------------------
 91 | 
 92 | .. autosummary::
 93 |    :toctree: api/
 94 | 
 95 |    GeoSeries.affine_transform
 96 |    GeoSeries.rotate
 97 |    GeoSeries.scale
 98 |    GeoSeries.skew
 99 |    GeoSeries.translate
100 | 
101 | Aggregating and exploding
102 | -------------------------
103 | 
104 | .. autosummary::
105 |    :toctree: api/
106 | 
107 |    GeoSeries.unary_union
108 |    GeoSeries.explode
109 | 
110 | Serialization / IO / conversion
111 | -------------------------------
112 | 
113 | .. autosummary::
114 |    :toctree: api/
115 | 
116 |    GeoSeries.to_wkb
117 |    GeoSeries.to_wkt
118 | 
119 | Projection handling
120 | -------------------
121 | 
122 | .. autosummary::
123 |    :toctree: api/
124 | 
125 |    GeoSeries.crs
126 |    GeoSeries.set_crs
127 |    GeoSeries.to_crs
128 | 
129 | Missing values
130 | --------------
131 | 
132 | .. autosummary::
133 |    :toctree: api/
134 | 
135 |    GeoSeries.fillna
136 |    GeoSeries.isna
137 | 
138 | Overlay operations
139 | ------------------
140 | 
141 | .. autosummary::
142 |    :toctree: api/
143 | 
144 |    GeoSeries.clip
145 | 
146 | Indexing
147 | --------
148 | 
149 | .. autosummary::
150 |    :toctree: api/
151 | 
152 |    GeoSeries.cx
153 | 
154 | Spatial partitioning
155 | --------------------
156 | 
157 | .. autosummary::
158 |    :toctree: api/
159 | 
160 |    GeoSeries.calculate_spatial_partitions
161 |    GeoSeries.hilbert_distance
162 |    GeoSeries.morton_distance
163 |    GeoSeries.geohash
164 | 
165 | 
166 | 
167 | Methods of dask ``Series`` objects are also available, although not
168 | all are applicable to geometric objects and some may return a
169 | ``Series`` rather than a ``GeoSeries`` result when appropriate. The methods
170 | ``isna()`` and ``fillna()`` have been
171 | implemented specifically for ``GeoSeries`` and are expected to work
172 | correctly.
173 | 


--------------------------------------------------------------------------------
/doc/source/docs/reference/io.rst:
--------------------------------------------------------------------------------
 1 | ============
 2 | Input/output
 3 | ============
 4 | .. currentmodule:: dask_geopandas
 5 | 
 6 | GeoPandas
 7 | ----------------
 8 | .. autosummary::
 9 |    :toctree: api/
10 | 
11 |    from_dask_dataframe
12 |    from_geopandas
13 | 
14 | GIS files
15 | ---------
16 | 
17 | .. autosummary::
18 |    :toctree: api/
19 | 
20 |    read_file
21 | 
22 | Parquet
23 | -------
24 | .. autosummary::
25 |    :toctree: api/
26 | 
27 |    read_parquet
28 |    GeoDataFrame.to_parquet
29 | 
30 | Feather
31 | -------
32 | .. autosummary::
33 |    :toctree: api/
34 | 
35 |    read_feather
36 |    GeoDataFrame.to_feather
37 | 


--------------------------------------------------------------------------------
/doc/source/docs/reference/tools.rst:
--------------------------------------------------------------------------------
 1 | =====
 2 | Tools
 3 | =====
 4 | .. currentmodule:: dask_geopandas
 5 | 
 6 | .. autosummary::
 7 |    :toctree: api/
 8 | 
 9 |    sjoin
10 |    clip
11 |    points_from_xy
12 |    from_wkt
13 |    from_wkb


--------------------------------------------------------------------------------
/doc/source/getting_started.md:
--------------------------------------------------------------------------------
 1 | # Getting started
 2 | 
 3 | The relationship between Dask-GeoPandas and GeoPandas is the same as the relationship
 4 | between `dask.dataframe` and `pandas`.  We recommend checking the
 5 | [Dask documentation](https://docs.dask.org/en/latest/dataframe.html) to better understand how
 6 | DataFrames are scaled before diving into Dask-GeoPandas.
 7 | 
 8 | ## Dask-GeoPandas basics
 9 | 
10 | Given a GeoPandas dataframe
11 | 
12 | ```py
13 | import geopandas
14 | df = geopandas.read_file('...')
15 | ```
16 | 
17 | We can repartition it into a Dask-GeoPandas dataframe:
18 | 
19 | ```py
20 | import dask_geopandas
21 | ddf = dask_geopandas.from_geopandas(df, npartitions=4)
22 | ```
23 | 
24 | By default, this repartitions the data naively by rows. However, you can
25 | also provide spatial partitioning to take advantage of the spatial structure of
26 | the GeoDataFrame.
27 | 
28 | ```py
29 | ddf = ddf.spatial_shuffle()
30 | ```
31 | 
32 | The familiar spatial attributes and methods of GeoPandas are also available
33 | and will be computed in parallel:
34 | 
35 | ```py
36 | ddf.geometry.area.compute()
37 | ddf.within(polygon)
38 | ```
39 | 
40 | Additionally, if you have a distributed dask.dataframe you can pass columns of
41 | x-y points to the ``set_geometry`` method.
42 | 
43 | ```py
44 | import dask.dataframe as dd
45 | import dask_geopandas
46 | 
47 | ddf = dd.read_csv('...')
48 | 
49 | ddf = ddf.set_geometry(
50 |     dask_geopandas.points_from_xy(ddf, 'longitude', 'latitude')
51 | )
52 | ```
53 | 
54 | Writing files (and reading back) is currently supported for the Parquet and Feather file
55 | formats.
56 | 
57 | ```py
58 | ddf.to_parquet("path/to/dir/")
59 | ddf = dask_geopandas.read_parquet("path/to/dir/")
60 | ```
61 | 
62 | Traditional GIS file formats can be read into partitioned GeoDataFrame
63 | (requires `pyogrio`) but not written.
64 | 
65 | ```py
66 | ddf = dask_geopandas.read_file("file.gpkg", npartitions=4)
67 | ```
68 | 


--------------------------------------------------------------------------------
/doc/source/guide.md:
--------------------------------------------------------------------------------
 1 | # User Guide
 2 | 
 3 | The User Guide covers different parts of basic usage of Dask-GeoPandas. Each page focuses on a single topic and outlines how it is implemented in Dask-GeoPandas, with reproducible examples. You can also check the documentation of [GeoPandas](https://geopandas.org)
 4 | and [Dask](https://dask.org).
 5 | 
 6 | ```{toctree}
 7 | ---
 8 | maxdepth: 2
 9 | ---
10 | Basic introduction <guide/basic-intro>
11 | Spatial partitioning <guide/spatial-partitioning>
12 | Aggregation with dissolve <guide/dissolve>
13 | ```
14 | 


--------------------------------------------------------------------------------
/doc/source/guide/basic-intro.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# The basic introduction to Dask-GeoPandas\n",
  8 |     "\n",
  9 |     "This notebook illustrates the basic API of Dask-GeoPandas and provides a basic timing comparison between operations on `geopandas.GeoDataFrame` and parallel `dask_geopandas.GeoDataFrame`."
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "code",
 14 |    "execution_count": 1,
 15 |    "metadata": {},
 16 |    "outputs": [],
 17 |    "source": [
 18 |     "import numpy as np\n",
 19 |     "import geopandas\n",
 20 |     "\n",
 21 |     "import dask_geopandas"
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "markdown",
 26 |    "metadata": {},
 27 |    "source": [
 28 |     "## Creating a parallelized `dask_geopandas.GeoDataFrame`\n",
 29 |     "\n",
 30 |     "There are many ways how to create a parallelized `dask_geopandas.GeoDataFrame`. If your initial data fits in memory, you can create if from a `geopandas.GeoDataFrame` using the `from_geopandas` function:"
 31 |    ]
 32 |   },
 33 |   {
 34 |    "cell_type": "code",
 35 |    "execution_count": 2,
 36 |    "metadata": {},
 37 |    "outputs": [],
 38 |    "source": [
 39 |     "df = geopandas.read_file(geopandas.datasets.get_path(\"naturalearth_lowres\"))"
 40 |    ]
 41 |   },
 42 |   {
 43 |    "cell_type": "code",
 44 |    "execution_count": 3,
 45 |    "metadata": {},
 46 |    "outputs": [
 47 |     {
 48 |      "data": {
 49 |       "text/html": [
 50 |        "<div>\n",
 51 |        "<style scoped>\n",
 52 |        "    .dataframe tbody tr th:only-of-type {\n",
 53 |        "        vertical-align: middle;\n",
 54 |        "    }\n",
 55 |        "\n",
 56 |        "    .dataframe tbody tr th {\n",
 57 |        "        vertical-align: top;\n",
 58 |        "    }\n",
 59 |        "\n",
 60 |        "    .dataframe thead th {\n",
 61 |        "        text-align: right;\n",
 62 |        "    }\n",
 63 |        "</style>\n",
 64 |        "<table border=\"1\" class=\"dataframe\">\n",
 65 |        "  <thead>\n",
 66 |        "    <tr style=\"text-align: right;\">\n",
 67 |        "      <th></th>\n",
 68 |        "      <th>pop_est</th>\n",
 69 |        "      <th>continent</th>\n",
 70 |        "      <th>name</th>\n",
 71 |        "      <th>iso_a3</th>\n",
 72 |        "      <th>gdp_md_est</th>\n",
 73 |        "      <th>geometry</th>\n",
 74 |        "    </tr>\n",
 75 |        "  </thead>\n",
 76 |        "  <tbody>\n",
 77 |        "    <tr>\n",
 78 |        "      <th>0</th>\n",
 79 |        "      <td>920938</td>\n",
 80 |        "      <td>Oceania</td>\n",
 81 |        "      <td>Fiji</td>\n",
 82 |        "      <td>FJI</td>\n",
 83 |        "      <td>8374.0</td>\n",
 84 |        "      <td>MULTIPOLYGON (((180.00000 -16.06713, 180.00000...</td>\n",
 85 |        "    </tr>\n",
 86 |        "    <tr>\n",
 87 |        "      <th>1</th>\n",
 88 |        "      <td>53950935</td>\n",
 89 |        "      <td>Africa</td>\n",
 90 |        "      <td>Tanzania</td>\n",
 91 |        "      <td>TZA</td>\n",
 92 |        "      <td>150600.0</td>\n",
 93 |        "      <td>POLYGON ((33.90371 -0.95000, 34.07262 -1.05982...</td>\n",
 94 |        "    </tr>\n",
 95 |        "    <tr>\n",
 96 |        "      <th>2</th>\n",
 97 |        "      <td>603253</td>\n",
 98 |        "      <td>Africa</td>\n",
 99 |        "      <td>W. Sahara</td>\n",
100 |        "      <td>ESH</td>\n",
101 |        "      <td>906.5</td>\n",
102 |        "      <td>POLYGON ((-8.66559 27.65643, -8.66512 27.58948...</td>\n",
103 |        "    </tr>\n",
104 |        "    <tr>\n",
105 |        "      <th>3</th>\n",
106 |        "      <td>35623680</td>\n",
107 |        "      <td>North America</td>\n",
108 |        "      <td>Canada</td>\n",
109 |        "      <td>CAN</td>\n",
110 |        "      <td>1674000.0</td>\n",
111 |        "      <td>MULTIPOLYGON (((-122.84000 49.00000, -122.9742...</td>\n",
112 |        "    </tr>\n",
113 |        "    <tr>\n",
114 |        "      <th>4</th>\n",
115 |        "      <td>326625791</td>\n",
116 |        "      <td>North America</td>\n",
117 |        "      <td>United States of America</td>\n",
118 |        "      <td>USA</td>\n",
119 |        "      <td>18560000.0</td>\n",
120 |        "      <td>MULTIPOLYGON (((-122.84000 49.00000, -120.0000...</td>\n",
121 |        "    </tr>\n",
122 |        "  </tbody>\n",
123 |        "</table>\n",
124 |        "</div>"
125 |       ],
126 |       "text/plain": [
127 |        "     pop_est      continent                      name iso_a3  gdp_md_est  \\\n",
128 |        "0     920938        Oceania                      Fiji    FJI      8374.0   \n",
129 |        "1   53950935         Africa                  Tanzania    TZA    150600.0   \n",
130 |        "2     603253         Africa                 W. Sahara    ESH       906.5   \n",
131 |        "3   35623680  North America                    Canada    CAN   1674000.0   \n",
132 |        "4  326625791  North America  United States of America    USA  18560000.0   \n",
133 |        "\n",
134 |        "                                            geometry  \n",
135 |        "0  MULTIPOLYGON (((180.00000 -16.06713, 180.00000...  \n",
136 |        "1  POLYGON ((33.90371 -0.95000, 34.07262 -1.05982...  \n",
137 |        "2  POLYGON ((-8.66559 27.65643, -8.66512 27.58948...  \n",
138 |        "3  MULTIPOLYGON (((-122.84000 49.00000, -122.9742...  \n",
139 |        "4  MULTIPOLYGON (((-122.84000 49.00000, -120.0000...  "
140 |       ]
141 |      },
142 |      "execution_count": 3,
143 |      "metadata": {},
144 |      "output_type": "execute_result"
145 |     }
146 |    ],
147 |    "source": [
148 |     "df.head()"
149 |    ]
150 |   },
151 |   {
152 |    "cell_type": "markdown",
153 |    "metadata": {},
154 |    "source": [
155 |     "When creating `dask_geopandas.GeoDataFrame` we have to specify how to partittion, e.g. using `npartitons` argument to split it into N equal chunks."
156 |    ]
157 |   },
158 |   {
159 |    "cell_type": "code",
160 |    "execution_count": 4,
161 |    "metadata": {},
162 |    "outputs": [],
163 |    "source": [
164 |     "ddf = dask_geopandas.from_geopandas(df, npartitions=4)"
165 |    ]
166 |   },
167 |   {
168 |    "cell_type": "code",
169 |    "execution_count": 5,
170 |    "metadata": {},
171 |    "outputs": [
172 |     {
173 |      "data": {
174 |       "text/html": [
175 |        "<div><strong>Dask DataFrame Structure:</strong></div>\n",
176 |        "<div>\n",
177 |        "<style scoped>\n",
178 |        "    .dataframe tbody tr th:only-of-type {\n",
179 |        "        vertical-align: middle;\n",
180 |        "    }\n",
181 |        "\n",
182 |        "    .dataframe tbody tr th {\n",
183 |        "        vertical-align: top;\n",
184 |        "    }\n",
185 |        "\n",
186 |        "    .dataframe thead th {\n",
187 |        "        text-align: right;\n",
188 |        "    }\n",
189 |        "</style>\n",
190 |        "<table border=\"1\" class=\"dataframe\">\n",
191 |        "  <thead>\n",
192 |        "    <tr style=\"text-align: right;\">\n",
193 |        "      <th></th>\n",
194 |        "      <th>pop_est</th>\n",
195 |        "      <th>continent</th>\n",
196 |        "      <th>name</th>\n",
197 |        "      <th>iso_a3</th>\n",
198 |        "      <th>gdp_md_est</th>\n",
199 |        "      <th>geometry</th>\n",
200 |        "    </tr>\n",
201 |        "    <tr>\n",
202 |        "      <th>npartitions=4</th>\n",
203 |        "      <th></th>\n",
204 |        "      <th></th>\n",
205 |        "      <th></th>\n",
206 |        "      <th></th>\n",
207 |        "      <th></th>\n",
208 |        "      <th></th>\n",
209 |        "    </tr>\n",
210 |        "  </thead>\n",
211 |        "  <tbody>\n",
212 |        "    <tr>\n",
213 |        "      <th>0</th>\n",
214 |        "      <td>int64</td>\n",
215 |        "      <td>object</td>\n",
216 |        "      <td>object</td>\n",
217 |        "      <td>object</td>\n",
218 |        "      <td>float64</td>\n",
219 |        "      <td>geometry</td>\n",
220 |        "    </tr>\n",
221 |        "    <tr>\n",
222 |        "      <th>45</th>\n",
223 |        "      <td>...</td>\n",
224 |        "      <td>...</td>\n",
225 |        "      <td>...</td>\n",
226 |        "      <td>...</td>\n",
227 |        "      <td>...</td>\n",
228 |        "      <td>...</td>\n",
229 |        "    </tr>\n",
230 |        "    <tr>\n",
231 |        "      <th>90</th>\n",
232 |        "      <td>...</td>\n",
233 |        "      <td>...</td>\n",
234 |        "      <td>...</td>\n",
235 |        "      <td>...</td>\n",
236 |        "      <td>...</td>\n",
237 |        "      <td>...</td>\n",
238 |        "    </tr>\n",
239 |        "    <tr>\n",
240 |        "      <th>135</th>\n",
241 |        "      <td>...</td>\n",
242 |        "      <td>...</td>\n",
243 |        "      <td>...</td>\n",
244 |        "      <td>...</td>\n",
245 |        "      <td>...</td>\n",
246 |        "      <td>...</td>\n",
247 |        "    </tr>\n",
248 |        "    <tr>\n",
249 |        "      <th>176</th>\n",
250 |        "      <td>...</td>\n",
251 |        "      <td>...</td>\n",
252 |        "      <td>...</td>\n",
253 |        "      <td>...</td>\n",
254 |        "      <td>...</td>\n",
255 |        "      <td>...</td>\n",
256 |        "    </tr>\n",
257 |        "  </tbody>\n",
258 |        "</table>\n",
259 |        "</div>\n",
260 |        "<div>Dask Name: from_pandas, 4 tasks</div>"
261 |       ],
262 |       "text/plain": [
263 |        "<dask_geopandas.GeoDataFrame | 4 tasks | 4 npartitions>"
264 |       ]
265 |      },
266 |      "execution_count": 5,
267 |      "metadata": {},
268 |      "output_type": "execute_result"
269 |     }
270 |    ],
271 |    "source": [
272 |     "ddf"
273 |    ]
274 |   },
275 |   {
276 |    "cell_type": "markdown",
277 |    "metadata": {},
278 |    "source": [
279 |     "Computation on a non-geometry column:"
280 |    ]
281 |   },
282 |   {
283 |    "cell_type": "code",
284 |    "execution_count": 6,
285 |    "metadata": {},
286 |    "outputs": [
287 |     {
288 |      "data": {
289 |       "text/plain": [
290 |        "Africa                     51\n",
291 |        "Asia                       47\n",
292 |        "Europe                     39\n",
293 |        "North America              18\n",
294 |        "South America              13\n",
295 |        "Oceania                     7\n",
296 |        "Seven seas (open ocean)     1\n",
297 |        "Antarctica                  1\n",
298 |        "Name: continent, dtype: int64"
299 |       ]
300 |      },
301 |      "execution_count": 6,
302 |      "metadata": {},
303 |      "output_type": "execute_result"
304 |     }
305 |    ],
306 |    "source": [
307 |     "ddf.continent.value_counts().compute()"
308 |    ]
309 |   },
310 |   {
311 |    "cell_type": "markdown",
312 |    "metadata": {},
313 |    "source": [
314 |     "And calling one of the geopandas-specific methods or attributes:"
315 |    ]
316 |   },
317 |   {
318 |    "cell_type": "code",
319 |    "execution_count": 7,
320 |    "metadata": {},
321 |    "outputs": [
322 |     {
323 |      "data": {
324 |       "text/plain": [
325 |        "Dask Series Structure:\n",
326 |        "npartitions=4\n",
327 |        "0      float64\n",
328 |        "45         ...\n",
329 |        "90         ...\n",
330 |        "135        ...\n",
331 |        "176        ...\n",
332 |        "dtype: float64\n",
333 |        "Dask Name: getitem, 12 tasks"
334 |       ]
335 |      },
336 |      "execution_count": 7,
337 |      "metadata": {},
338 |      "output_type": "execute_result"
339 |     }
340 |    ],
341 |    "source": [
342 |     "ddf.geometry.area"
343 |    ]
344 |   },
345 |   {
346 |    "cell_type": "markdown",
347 |    "metadata": {},
348 |    "source": [
349 |     "As you can see, without calling `compute()`, the resulting Series does not yet contain any values."
350 |    ]
351 |   },
352 |   {
353 |    "cell_type": "code",
354 |    "execution_count": 8,
355 |    "metadata": {},
356 |    "outputs": [
357 |     {
358 |      "data": {
359 |       "text/plain": [
360 |        "0         1.639511\n",
361 |        "1        76.301964\n",
362 |        "2         8.603984\n",
363 |        "3      1712.995228\n",
364 |        "4      1122.281921\n",
365 |        "          ...     \n",
366 |        "172       8.604719\n",
367 |        "173       1.479321\n",
368 |        "174       1.231641\n",
369 |        "175       0.639000\n",
370 |        "176      51.196106\n",
371 |        "Length: 177, dtype: float64"
372 |       ]
373 |      },
374 |      "execution_count": 8,
375 |      "metadata": {},
376 |      "output_type": "execute_result"
377 |     }
378 |    ],
379 |    "source": [
380 |     "ddf.geometry.area.compute()"
381 |    ]
382 |   },
383 |   {
384 |    "cell_type": "markdown",
385 |    "metadata": {},
386 |    "source": [
387 |     "## Timing comparison: Point-in-polygon with million points"
388 |    ]
389 |   },
390 |   {
391 |    "cell_type": "markdown",
392 |    "metadata": {},
393 |    "source": [
394 |     "The GeoDataFrame used above is a bit small to see any benefit from parallelization using dask (as the overhead of the task scheduler is larger than the actual operation on such a tiny dataframe), so let's create a bigger point GeoSeries:"
395 |    ]
396 |   },
397 |   {
398 |    "cell_type": "code",
399 |    "execution_count": 9,
400 |    "metadata": {},
401 |    "outputs": [],
402 |    "source": [
403 |     "N = 10_000_000"
404 |    ]
405 |   },
406 |   {
407 |    "cell_type": "code",
408 |    "execution_count": 10,
409 |    "metadata": {},
410 |    "outputs": [],
411 |    "source": [
412 |     "points = geopandas.GeoDataFrame(geometry=geopandas.points_from_xy(np.random.randn(N),np.random.randn(N)))"
413 |    ]
414 |   },
415 |   {
416 |    "cell_type": "markdown",
417 |    "metadata": {},
418 |    "source": [
419 |     "And creating the dask-geopandas version of this series:"
420 |    ]
421 |   },
422 |   {
423 |    "cell_type": "code",
424 |    "execution_count": 11,
425 |    "metadata": {},
426 |    "outputs": [],
427 |    "source": [
428 |     "dpoints =  dask_geopandas.from_geopandas(points, npartitions=16)"
429 |    ]
430 |   },
431 |   {
432 |    "cell_type": "markdown",
433 |    "metadata": {},
434 |    "source": [
435 |     "A single polygon for which we will check if the points are located within this polygon:"
436 |    ]
437 |   },
438 |   {
439 |    "cell_type": "code",
440 |    "execution_count": 12,
441 |    "metadata": {},
442 |    "outputs": [],
443 |    "source": [
444 |     "import shapely.geometry\n",
445 |     "box = shapely.geometry.box(0, 0, 1, 1)"
446 |    ]
447 |   },
448 |   {
449 |    "cell_type": "markdown",
450 |    "metadata": {},
451 |    "source": [
452 |     "The `within` operation will result in a boolean Series:"
453 |    ]
454 |   },
455 |   {
456 |    "cell_type": "code",
457 |    "execution_count": 13,
458 |    "metadata": {},
459 |    "outputs": [
460 |     {
461 |      "data": {
462 |       "text/plain": [
463 |        "Dask Series Structure:\n",
464 |        "npartitions=16\n",
465 |        "0          bool\n",
466 |        "625000      ...\n",
467 |        "           ... \n",
468 |        "9375000     ...\n",
469 |        "9999999     ...\n",
470 |        "dtype: bool\n",
471 |        "Dask Name: within, 32 tasks"
472 |       ]
473 |      },
474 |      "execution_count": 13,
475 |      "metadata": {},
476 |      "output_type": "execute_result"
477 |     }
478 |    ],
479 |    "source": [
480 |     "dpoints.within(box)"
481 |    ]
482 |   },
483 |   {
484 |    "cell_type": "markdown",
485 |    "metadata": {},
486 |    "source": [
487 |     "The relative number of the points within the polygon:"
488 |    ]
489 |   },
490 |   {
491 |    "cell_type": "code",
492 |    "execution_count": 14,
493 |    "metadata": {},
494 |    "outputs": [
495 |     {
496 |      "data": {
497 |       "text/plain": [
498 |        "0.1162862"
499 |       ]
500 |      },
501 |      "execution_count": 14,
502 |      "metadata": {},
503 |      "output_type": "execute_result"
504 |     }
505 |    ],
506 |    "source": [
507 |     "(dpoints.within(box).sum() / len(dpoints)).compute()"
508 |    ]
509 |   },
510 |   {
511 |    "cell_type": "markdown",
512 |    "metadata": {},
513 |    "source": [
514 |     "Let's compare the time it takes to compute this:"
515 |    ]
516 |   },
517 |   {
518 |    "cell_type": "code",
519 |    "execution_count": 15,
520 |    "metadata": {},
521 |    "outputs": [
522 |     {
523 |      "name": "stdout",
524 |      "output_type": "stream",
525 |      "text": [
526 |       "460 ms ± 30.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
527 |      ]
528 |     }
529 |    ],
530 |    "source": [
531 |     "%timeit points.within(box)"
532 |    ]
533 |   },
534 |   {
535 |    "cell_type": "code",
536 |    "execution_count": 17,
537 |    "metadata": {},
538 |    "outputs": [
539 |     {
540 |      "name": "stdout",
541 |      "output_type": "stream",
542 |      "text": [
543 |       "169 ms ± 39.8 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
544 |      ]
545 |     }
546 |    ],
547 |    "source": [
548 |     "%timeit dpoints.within(box).compute()"
549 |    ]
550 |   },
551 |   {
552 |    "cell_type": "markdown",
553 |    "metadata": {},
554 |    "source": [
555 |     "This is run on a laptop with 4 physical cores, and giving roughly a 3x speed-up using multithreading."
556 |    ]
557 |   }
558 |  ],
559 |  "metadata": {
560 |   "kernelspec": {
561 |    "display_name": "Python 3",
562 |    "language": "python",
563 |    "name": "python3"
564 |   },
565 |   "language_info": {
566 |    "codemirror_mode": {
567 |     "name": "ipython",
568 |     "version": 3
569 |    },
570 |    "file_extension": ".py",
571 |    "mimetype": "text/x-python",
572 |    "name": "python",
573 |    "nbconvert_exporter": "python",
574 |    "pygments_lexer": "ipython3",
575 |    "version": "3.8.5"
576 |   }
577 |  },
578 |  "nbformat": 4,
579 |  "nbformat_minor": 4
580 | }
581 | 


--------------------------------------------------------------------------------
/doc/source/index.md:
--------------------------------------------------------------------------------
 1 | # dask-geopandas documentation
 2 | 
 3 | Parallel GeoPandas with Dask
 4 | 
 5 | Dask-GeoPandas is a project merging the geospatial capabilities of [GeoPandas](https://geopandas.org)
 6 | and scalability of [Dask](https://dask.org). GeoPandas is an open source project designed to make working with geospatial data in Python easier. GeoPandas extends the datatypes used by pandas to allow spatial operations on geometric types.
 7 | Dask provides advanced parallelism and distributed out-of-core computation with a dask.dataframe module designed to scale
 8 | pandas. Since GeoPandas is an extension to the pandas DataFrame, the same way Dask scales pandas can also be applied to GeoPandas.
 9 | 
10 | This project is a bridge between Dask and GeoPandas and offers geospatial capabilities of GeoPandas backed by Dask.
11 | 
12 | ## Install
13 | 
14 | Dask-GeoPandas depends on Dask and GeoPandas. In addition, it also requires
15 | Shapely >= 2.0. We recommend installing via `conda` or `mamba`
16 | from the `conda-forge` channel but you can also install it from PyPI.
17 | 
18 | ```sh
19 | conda install dask-geopandas -c conda-forge
20 | ```
21 | 
22 | ```sh
23 | pip install dask-geopandas
24 | ```
25 | 
26 | For more details, see the [installation instructions](installation).
27 | 
28 | ## Example
29 | 
30 | As with `dask.dataframe` and `pandas`, the API of `dask_geopandas` mirrors the one of `geopandas`.
31 | 
32 | ```py
33 | import geopandas
34 | import dask_geopandas
35 | 
36 | df = geopandas.read_file(geopandas.datasets.get_path("naturalearth_lowres"))
37 | dask_df = dask_geopandas.from_geopandas(df, npartitions=4)
38 | 
39 | dask_df.geometry.area.compute()
40 | ```
41 | 
42 | ## When should I use Dask-GeoPandas?
43 | 
44 | Dask-GeoPandas is useful when dealing with large GeoDataFrames that either do not comfortably fit in memory or require expensive computation that can be easily parallelised. Note that using Dask-GeoPandas is not always faster than using GeoPandas as there is an unavoidable overhead in task scheduling and transfer of data between threads and processes, but in other cases, your performance gains can be almost linear with more threads.
45 | 
46 | ## Useful links
47 | 
48 | [Source Repository (GitHub)](https://github.com/geopandas/dask-geopandas) | [Issues & Ideas](https://github.com/geopandas/dask-geopandas/issues) | [Gitter (chat)](https://gitter.im/geopandas/dask-geopandas)
49 | 
50 | ```{toctree}
51 | ---
52 | maxdepth: 2
53 | caption: Documentation
54 | hidden: true
55 | ---
56 | installation
57 | getting_started
58 | guide
59 | parquet
60 | api
61 | changelog
62 | GitHub <https://github.com/geopandas/dask-geopandas>
63 | ```
64 | 


--------------------------------------------------------------------------------
/doc/source/installation.md:
--------------------------------------------------------------------------------
 1 | # Installation
 2 | 
 3 | This package depends on GeoPandas and Dask. In addition, it also requires
 4 | Shapely >= 2.0.
 5 | 
 6 | GeoPandas is written in pure Python, but has several dependencies written in C (GEOS, GDAL, PROJ). Those base C libraries can sometimes be a challenge to install. Therefore, we advise you to closely follow the [recommendations](https://geopandas.org/en/stable/getting_started/install.html) to avoid installation problems.
 7 | 
 8 | ## Easy way
 9 | 
10 | The best way to install Dask-GeoPandas is using `conda` or `mamba` and `conda-forge` channel:
11 | 
12 | ```sh
13 | conda install -c conda-forge dask-geopandas
14 | ```
15 | 
16 | ## pip
17 | 
18 | You can install Dask-GeoPandas with `pip` from PyPI but make sure that your environment contains
19 | properly installed GeoPandas (note that Dask-GeoPandas does not use `fiona` which therefore doesn't
20 | have to be installed). See the [GeoPandas installation instructions](https://geopandas.org/en/stable/getting_started/install.html#installing-with-pip) for details.
21 | 
22 | ```sh
23 | pip install dask-geopandas
24 | ```
25 | 
26 | ## Fresh environment
27 | 
28 | One way to install all required dependencies is to use the `conda` package manager to
29 | create a new environment:
30 | 
31 | ```shell
32 | conda create -n geo_env
33 | conda activate geo_env
34 | conda config --env --add channels conda-forge
35 | conda config --env --set channel_priority strict
36 | conda install dask-geopandas
37 | ```
38 | 


--------------------------------------------------------------------------------
/doc/source/parquet.md:
--------------------------------------------------------------------------------
 1 | # Reading and Writing Apache Parquet
 2 | 
 3 | Similar to dask-dataframe, dask-geopandas supports reading and writing Apache Parquet files.
 4 | 
 5 | See the [Dask DataFrame](https://docs.dask.org/en/stable/dataframe-parquet.html#dataframe-parquet) 
 6 | and [Geopandas](https://geopandas.org/en/stable/docs/user_guide/io.html#apache-parquet-and-feather-file-formats) documentation
 7 | for more on Apache Parquet.
 8 | 
 9 | ## Partitioning
10 | 
11 | As outlined in [Spatial partitioning in Dask-GeoPandas](guide/spatial-partitioning.ipynb), dask-geopandas can spatially partition datasets. These partitions are
12 | persisted in the parquet files.
13 | 
14 | By default, reading these spatial partitions requires opening every file and checking its spatial extent. This can be a
15 | bit slow if the parquet dataset is made up of many individual partitions. To disable loading the spatial partitions,
16 | specify ``gather_spatial_partitions=False`` when reading the file:
17 | 
18 | 
19 | ```py
20 | ddf = dask_geopandas.read_parquet("...", gather_spatial_partitions=False)
21 | ddf.spatial_partitions  # None
22 | ```
23 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
  1 | [build-system]
  2 | requires = [
  3 |     "setuptools>=42",
  4 |     "wheel"
  5 | ]
  6 | build-backend = "setuptools.build_meta"
  7 | 
  8 | [project]
  9 | name = "dask-geopandas"
 10 | dynamic = ["version"]
 11 | authors = [
 12 |     { name = "Julia Signell", email = "jsignell@gmail.com" }
 13 | ]
 14 | maintainers = [
 15 |     { name = "GeoPandas contributors" }
 16 | ]
 17 | license = { text = "BSD 3-Clause" }
 18 | description = "Parallel GeoPandas with Dask"
 19 | readme = "README.rst"
 20 | keywords = ["dask", "geopandas", "spatial", "distributed", "cluster"]
 21 | classifiers = [
 22 |     "Development Status :: 5 - Production/Stable",
 23 |     "Intended Audience :: Developers",
 24 |     "Intended Audience :: Science/Research",
 25 |     "License :: OSI Approved :: BSD License",
 26 |     "Operating System :: OS Independent",
 27 |     "Programming Language :: Python :: 3",
 28 |     "Programming Language :: Python :: 3 :: Only",
 29 |     "Topic :: Scientific/Engineering",
 30 |     "Topic :: Scientific/Engineering :: GIS",
 31 |     "Topic :: System :: Distributed Computing",
 32 | ]
 33 | requires-python = ">=3.10"
 34 | dependencies = [
 35 |     "geopandas>=0.14.3",
 36 |     "shapely>=2.0",
 37 |     "dask[dataframe]>=2025.1.0",
 38 |     "packaging",
 39 | ]
 40 | 
 41 | [project.optional-dependencies]
 42 | test = [
 43 |     "pytest",
 44 | ]
 45 | 
 46 | [project.urls]
 47 | Home = "https://geopandas.org"
 48 | Documentation = "https://dask-geopandas.readthedocs.io/"
 49 | Repository = "https://github.com/geopandas/dask-geopandas"
 50 | "Issue Tracker" = "https://github.com/geopandas/dask-geopandas/issues"
 51 | 
 52 | 
 53 | [tool.black]
 54 | line-length = 88
 55 | 
 56 | [tool.ruff]
 57 | line-length = 88
 58 | extend-exclude = ["doc/*", "versioneer.py", "dask_geopandas/_version.py"]
 59 | 
 60 | [tool.ruff.lint]
 61 | select = [
 62 |     # pyflakes
 63 |     "F",
 64 |     # pycodestyle
 65 |     "E",
 66 |     "W",
 67 |     # pyupgrade
 68 |     # "UP",
 69 |     # flake8-bugbear
 70 |     "B",
 71 |     # flake8-debugger
 72 |     "T10",
 73 |     # flake8-simplify
 74 |     # "SIM",
 75 |     # pylint
 76 |     "PLC",
 77 |     "PLE",
 78 |     "PLR",
 79 |     "PLW",
 80 |     # misc lints
 81 |     "PIE",
 82 |     # implicit string concatenation
 83 |     "ISC",
 84 |     # type-checking imports
 85 |     "TCH",
 86 |     # comprehensions
 87 |     "C4",
 88 |     # Ruff-specific rules
 89 |     "RUF",
 90 |     # isort
 91 |     "I",
 92 | ]
 93 | 
 94 | ignore = [
 95 |     ### Intentionally disabled
 96 |     # module level import not at top of file
 97 |     "E402",
 98 |     # do not assign a lambda expression, use a def
 99 |     "E731",
100 |     # mutable-argument-default
101 |     "B006",
102 |     # unused-loop-control-variable
103 |     "B007",
104 |     # get-attr-with-constant
105 |     "B009",
106 |     # Only works with python >=3.10
107 |     "B905",
108 |     # dict literals
109 |     "C408",
110 |     # Too many arguments to function call
111 |     "PLR0913",
112 |     # Too many returns
113 |     "PLR0911",
114 |     # Too many branches
115 |     "PLR0912",
116 |     # Too many statements
117 |     "PLR0915",
118 |     # Magic number
119 |     "PLR2004",
120 |     # Redefined loop name
121 |     "PLW2901",
122 |     # Global statements are discouraged
123 |     "PLW0603",
124 |     # compare-to-empty-string
125 |     "PLC1901",
126 | 
127 |     ### Additional checks that don't pass yet
128 |     # Useless statement
129 |     "B018",
130 |     # Within an except clause, raise exceptions with ...
131 |     "B904",
132 |     # Consider `elif` instead of `else` then `if` to remove indentation level
133 |     "PLR5501",
134 |     # collection-literal-concatenation
135 |     "RUF005",
136 |     # Mutable class attributes should be annotated with `typing.ClassVar`,
137 |     "RUF012"
138 | ]
139 | 
140 | [tool.ruff.lint.per-file-ignores]
141 | "dask_geopandas/__init__.py" = ["F401", "I"]
142 | 
143 | [tool.ruff.lint.isort]
144 | extra-standard-library = ["packaging"]
145 | 
146 | section-order = [
147 |   "future",
148 |   "standard-library",
149 |   "third-party",
150 |   "dask",
151 |   "geo",
152 |   "first-party",
153 |   "local-folder",
154 |   "testing"
155 | ]
156 | 
157 | [tool.ruff.lint.isort.sections]
158 | "dask" = ["dask"]
159 | "geo" = ["geopandas", "shapely", "pyproj"]
160 | "testing" = ["pytest", "pandas.testing", "numpy.testing", "geopandas.tests", "geopandas.testing"]
161 | 


--------------------------------------------------------------------------------
/readthedocs.yml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | build:
 3 |     os: ubuntu-22.04
 4 |     tools:
 5 |         python: "3.11"
 6 | formats: []
 7 | 
 8 | sphinx:
 9 |     configuration: doc/source/conf.py
10 | 
11 | python:
12 |     install:
13 |         - requirements: doc/requirements.txt
14 |         - method: pip
15 |           path: .
16 | 


--------------------------------------------------------------------------------
/requirements-dev.txt:
--------------------------------------------------------------------------------
1 | black
2 | flake8
3 | hilbertcurve
4 | pygeohash
5 | pymorton
6 | pytest
7 | bokeh
8 | distributed


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | # See the docstring in versioneer.py for instructions. Note that you must
 2 | # re-run 'versioneer.py setup' after changing this section, and commit the
 3 | # resulting files.
 4 | 
 5 | [versioneer]
 6 | VCS = git
 7 | style = pep440
 8 | versionfile_source = dask_geopandas/_version.py
 9 | versionfile_build = dask_geopandas/_version.py
10 | tag_prefix = v
11 | parentdir_prefix = dask_geopandas-
12 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | 
 4 | from setuptools import setup
 5 | 
 6 | # ensure the current directory is on sys.path so versioneer can be imported
 7 | # when pip uses PEP 517/518 build rules.
 8 | # https://github.com/python-versioneer/python-versioneer/issues/193
 9 | sys.path.append(os.path.dirname(__file__))
10 | 
11 | import versioneer
12 | 
13 | 
14 | # see pyproject.toml for static project metadata
15 | setup(
16 |     name="dask-geopandas",  # need by GitHub dependency graph
17 |     version=versioneer.get_version(),
18 |     cmdclass=versioneer.get_cmdclass(),
19 | )
20 | 


--------------------------------------------------------------------------------