├── docs ├── .gitignore ├── source │ ├── .gitignore │ ├── index.rst │ ├── pandas.rst │ ├── conf.py │ └── pyarrow.rst ├── requirements.txt └── README.md ├── geoarrow-types ├── tests │ ├── test_package.py │ ├── test_constants.py │ ├── test_crs.py │ ├── test_type_spec.py │ └── test_type_pyarrow.py ├── src │ └── geoarrow │ │ └── types │ │ ├── __init__.py │ │ ├── constants.py │ │ └── crs.py └── pyproject.toml ├── geoarrow-metapackage ├── README.md └── pyproject.toml ├── .pre-commit-config.yaml ├── geoarrow-pandas ├── tests │ ├── test_geoarrow_pandas_geopandas.py │ ├── test_geoarrow_pandas_suite.py │ └── test_geoarrow_pandas.py ├── src │ └── geoarrow │ │ └── pandas │ │ └── __init__.py └── pyproject.toml ├── .github └── workflows │ ├── pre-commit.yaml │ ├── docs.yaml │ └── test.yaml ├── geoarrow-pyarrow ├── pyproject.toml ├── tests │ ├── test_geopandas.py │ ├── test_dataset.py │ ├── test_io.py │ └── test_pyarrow.py └── src │ └── geoarrow │ └── pyarrow │ ├── __init__.py │ ├── _scalar.py │ ├── _kernel.py │ ├── _type.py │ └── _array.py ├── .gitignore ├── README.md ├── LICENSE └── README.ipynb /docs/.gitignore: -------------------------------------------------------------------------------- 1 | _build 2 | -------------------------------------------------------------------------------- /docs/source/.gitignore: -------------------------------------------------------------------------------- 1 | *_generated.rst 2 | -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | pydata-sphinx-theme 2 | sphinx 3 | -------------------------------------------------------------------------------- /geoarrow-types/tests/test_package.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | import geoarrow.types as gat 4 | 5 | 6 | def test_version(): 7 | assert re.match(r"^[0-9]+\.[0-9]+", gat.__version__) 8 | -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- 1 | 2 | .. include:: README_generated.rst 3 | 4 | Contents 5 | -------- 6 | 7 | .. toctree:: 8 | :maxdepth: 2 9 | 10 | geoarrow-pyarrow 11 | geoarrow-pandas 12 | -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | 2 | # Building geoarrow documentation 3 | 4 | ```bash 5 | cd docs 6 | 7 | # copy the readme into rst so that we can include it from sphinx 8 | pandoc ../README.md --from markdown --to rst -s -o source/README_generated.rst 9 | 10 | # Run sphinx to generate the main site 11 | sphinx-build source _build/html 12 | ``` 13 | -------------------------------------------------------------------------------- /docs/source/pandas.rst: -------------------------------------------------------------------------------- 1 | 2 | geoarrow-pandas 3 | =============== 4 | 5 | .. automodule:: geoarrow.pandas 6 | 7 | .. autoclass:: GeoArrowAccessor 8 | :members: 9 | 10 | .. autoclass:: GeoArrowExtensionDtype 11 | :members: 12 | 13 | .. autoclass:: GeoArrowExtensionArray 14 | :members: 15 | 16 | .. autoclass:: GeoArrowExtensionScalar 17 | :members: 18 | -------------------------------------------------------------------------------- /geoarrow-metapackage/README.md: -------------------------------------------------------------------------------- 1 | # geoarrow 2 | 3 | This is a metapackage for the geoarrow namespace. 4 | 5 | The `geoarrow` Python libraries are distributed with [namespace packaging](https://packaging.python.org/en/latest/guides/packaging-namespace-packages/), meaning that each python package `geoarrow-[submodule-name]` (imported as `geoarrow.[submodule-name]`) can be published to PyPI independently. 6 | 7 | In order to obtain relevant modules, you should install them from PyPI directly, e.g.: 8 | 9 | ``` 10 | pip install geoarrow-pyarrow 11 | ``` 12 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | 2 | repos: 3 | - repo: https://github.com/pre-commit/pre-commit-hooks 4 | rev: v2.3.0 5 | hooks: 6 | - id: check-yaml 7 | - id: end-of-file-fixer 8 | - id: trailing-whitespace 9 | - repo: https://github.com/astral-sh/ruff-pre-commit 10 | rev: v0.1.5 11 | hooks: 12 | - id: ruff 13 | args: [ --fix ] 14 | - id: ruff-format 15 | - repo: https://github.com/codespell-project/codespell 16 | rev: v2.2.5 17 | hooks: 18 | - id: codespell 19 | types_or: [rst, markdown] 20 | additional_dependencies: [tomli] 21 | -------------------------------------------------------------------------------- /geoarrow-pandas/tests/test_geoarrow_pandas_geopandas.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import pandas as pd 3 | 4 | 5 | pytest.importorskip("geopandas") 6 | 7 | 8 | def test_scalar_to_shapely(): 9 | series = pd.Series(["POINT (0 1)", "POINT (1 2)"]) 10 | extension_series = series.geoarrow.as_geoarrow() 11 | assert extension_series[0].to_shapely().wkt == "POINT (0 1)" 12 | 13 | 14 | def test_accessor_to_geopandas(): 15 | series = pd.Series(["POINT (0 1)", "POINT (1 2)"]) 16 | geoseries = series.geoarrow.to_geopandas() 17 | assert len(geoseries) == 2 18 | assert geoseries[0].wkt == "POINT (0 1)" 19 | -------------------------------------------------------------------------------- /geoarrow-pandas/src/geoarrow/pandas/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Contains pandas integration for the geoarrow Python bindings. 3 | Importing this package will register pyarrow extension types and 4 | register the ``geoarrow`` accessor on ``pandas.Series`` objects. 5 | 6 | Examples 7 | -------- 8 | 9 | >>> import geoarrow.pandas as _ 10 | """ 11 | 12 | from geoarrow.types._version import __version__, __version_tuple__ # NOQA: F401 13 | 14 | from .lib import ( 15 | GeoArrowAccessor, 16 | GeoArrowExtensionDtype, 17 | GeoArrowExtensionArray, 18 | GeoArrowExtensionScalar, 19 | ) 20 | 21 | __all__ = [ 22 | "GeoArrowAccessor", 23 | "GeoArrowExtensionDtype", 24 | "GeoArrowExtensionArray", 25 | "GeoArrowExtensionScalar", 26 | ] 27 | -------------------------------------------------------------------------------- /.github/workflows/pre-commit.yaml: -------------------------------------------------------------------------------- 1 | name: pre-commit 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | pull_request: 8 | branches: 9 | - main 10 | 11 | permissions: 12 | contents: read 13 | 14 | jobs: 15 | pre-commit: 16 | name: "pre-commit" 17 | runs-on: ubuntu-latest 18 | steps: 19 | - uses: actions/checkout@v3 20 | with: 21 | fetch-depth: 0 22 | persist-credentials: false 23 | - uses: actions/setup-python@v4 24 | - name: pre-commit (cache) 25 | uses: actions/cache@v3 26 | with: 27 | path: ~/.cache/pre-commit 28 | key: pre-commit-${{ hashFiles('.pre-commit-config.yaml') }} 29 | - name: pre-commit (--all-files) 30 | run: | 31 | python -m pip install pre-commit 32 | pre-commit run --show-diff-on-failure --color=always --all-files 33 | -------------------------------------------------------------------------------- /geoarrow-metapackage/pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["hatchling"] 3 | build-backend = "hatchling.build" 4 | 5 | [project] 6 | name = "geoarrow" 7 | version = "0.1.0" 8 | description = 'Metapackage for geoarrow namespace.' 9 | readme = "README.md" 10 | requires-python = ">=3.7" 11 | license = "Apache-2.0" 12 | keywords = [] 13 | authors = [ 14 | { name = "Kyle Barron", email = "kyle@developmentseed.org" }, 15 | ] 16 | classifiers = [ 17 | "Programming Language :: Python", 18 | "Programming Language :: Python :: 3.7", 19 | "Programming Language :: Python :: 3.8", 20 | "Programming Language :: Python :: 3.9", 21 | "Programming Language :: Python :: 3.10", 22 | "Programming Language :: Python :: 3.11", 23 | ] 24 | dependencies = [] 25 | 26 | [project.urls] 27 | Documentation = "https://geoarrow.org" 28 | Issues = "https://github.com/geoarrow/geoarrow-python/issues" 29 | Source = "https://github.com/geoarrow/geoarrow-python" 30 | -------------------------------------------------------------------------------- /geoarrow-types/src/geoarrow/types/__init__.py: -------------------------------------------------------------------------------- 1 | from geoarrow.types._version import __version__, __version_tuple__ # NOQA: F401 2 | 3 | from geoarrow.types.constants import ( 4 | Encoding, 5 | GeometryType, 6 | Dimensions, 7 | CoordType, 8 | EdgeType, 9 | ) 10 | 11 | from geoarrow.types.crs import Crs, OGC_CRS84 12 | 13 | from geoarrow.types.type_spec import ( 14 | TypeSpec, 15 | type_spec, 16 | wkb, 17 | large_wkb, 18 | wkt, 19 | large_wkt, 20 | wkb_view, 21 | wkt_view, 22 | box, 23 | point, 24 | linestring, 25 | polygon, 26 | multipoint, 27 | multilinestring, 28 | multipolygon, 29 | geoarrow, 30 | ) 31 | 32 | 33 | __all__ = [ 34 | "Encoding", 35 | "GeometryType", 36 | "Dimensions", 37 | "CoordType", 38 | "EdgeType", 39 | "Crs", 40 | "OGC_CRS84", 41 | "TypeSpec", 42 | "type_spec", 43 | "wkb", 44 | "large_wkb", 45 | "wkt", 46 | "large_wkt", 47 | "wkb_view", 48 | "wkt_view", 49 | "geoarrow", 50 | "box", 51 | "point", 52 | "linestring", 53 | "polygon", 54 | "multipoint", 55 | "multilinestring", 56 | "multipolygon", 57 | ] 58 | -------------------------------------------------------------------------------- /geoarrow-types/pyproject.toml: -------------------------------------------------------------------------------- 1 | 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, 13 | # software distributed under the License is distributed on an 14 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | # KIND, either express or implied. See the License for the 16 | # specific language governing permissions and limitations 17 | # under the License. 18 | 19 | [project] 20 | name = "geoarrow-types" 21 | dynamic = ["version"] 22 | description = "" 23 | authors = [{name = "Dewey Dunnington", email = "dewey@dunnington.ca"}] 24 | license = {text = "Apache-2.0"} 25 | requires-python = ">=3.7" 26 | dependencies = [] 27 | 28 | [project.optional-dependencies] 29 | test = ["pytest", "pyarrow >= 12", "numpy"] 30 | 31 | [project.urls] 32 | homepage = "https://geoarrow.org" 33 | repository = "https://github.com/geoarrow/geoarrow-python" 34 | 35 | [build-system] 36 | requires = [ 37 | "setuptools >= 61.0.0", 38 | "setuptools-scm" 39 | ] 40 | build-backend = "setuptools.build_meta" 41 | 42 | [tool.setuptools_scm] 43 | root = ".." 44 | tag_regex = "geoarrow-types-([0-9.]+)" 45 | git_describe_command = "git describe --long --match='geoarrow-types-*'" 46 | version_file = "src/geoarrow/types/_version.py" 47 | 48 | [tool.pytest.ini_options] 49 | consider_namespace_packages = true 50 | -------------------------------------------------------------------------------- /geoarrow-pandas/pyproject.toml: -------------------------------------------------------------------------------- 1 | 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, 13 | # software distributed under the License is distributed on an 14 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | # KIND, either express or implied. See the License for the 16 | # specific language governing permissions and limitations 17 | # under the License. 18 | 19 | [project] 20 | name = "geoarrow-pandas" 21 | dynamic = ["version"] 22 | description = "" 23 | authors = [{name = "Dewey Dunnington", email = "dewey@dunnington.ca"}] 24 | license = {text = "Apache-2.0"} 25 | requires-python = ">=3.8" 26 | dependencies = ["geoarrow-pyarrow", "pandas", "pyarrow"] 27 | 28 | [project.optional-dependencies] 29 | test = ["pytest", "numpy", "geopandas"] 30 | 31 | [project.urls] 32 | homepage = "https://arrow.apache.org" 33 | repository = "https://github.com/geoarrow/geoarrow-python" 34 | 35 | [build-system] 36 | requires = [ 37 | "setuptools >= 61.0.0", 38 | "setuptools-scm" 39 | ] 40 | build-backend = "setuptools.build_meta" 41 | 42 | [tool.setuptools_scm] 43 | root = ".." 44 | tag_regex = "geoarrow-pandas-([0-9.]+)" 45 | git_describe_command = "git describe --long --match='geoarrow-pandas-*'" 46 | version_file = "src/geoarrow/pandas/_version.py" 47 | 48 | [tool.pytest.ini_options] 49 | consider_namespace_packages = true 50 | -------------------------------------------------------------------------------- /geoarrow-pyarrow/pyproject.toml: -------------------------------------------------------------------------------- 1 | 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, 13 | # software distributed under the License is distributed on an 14 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | # KIND, either express or implied. See the License for the 16 | # specific language governing permissions and limitations 17 | # under the License. 18 | 19 | [project] 20 | name = "geoarrow-pyarrow" 21 | dynamic = ["version"] 22 | description = "" 23 | authors = [{name = "Dewey Dunnington", email = "dewey@dunnington.ca"}] 24 | license = {text = "Apache-2.0"} 25 | requires-python = ">=3.8" 26 | dependencies = ["pyarrow >= 14.0.2", "geoarrow-types >= 0.3.0", "geoarrow-c >= 0.3.0"] 27 | 28 | [project.optional-dependencies] 29 | test = ["pytest", "pandas", "numpy", "geopandas", "pyogrio", "pyproj"] 30 | 31 | [project.urls] 32 | homepage = "https://geoarrow.org" 33 | repository = "https://github.com/geoarrow/geoarrow-python" 34 | 35 | [build-system] 36 | requires = [ 37 | "setuptools >= 61.0.0", 38 | "setuptools-scm" 39 | ] 40 | build-backend = "setuptools.build_meta" 41 | 42 | [tool.setuptools_scm] 43 | root = ".." 44 | tag_regex = "geoarrow-pyarrow-([0-9]+.[0-9]+.[0-9]+)" 45 | git_describe_command = "git describe --long --match='geoarrow-pyarrow-*'" 46 | version_file = "src/geoarrow/pyarrow/_version.py" 47 | 48 | [tool.pytest.ini_options] 49 | consider_namespace_packages = true 50 | -------------------------------------------------------------------------------- /.github/workflows/docs.yaml: -------------------------------------------------------------------------------- 1 | name: docs 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | pull_request: 8 | branches: 9 | - main 10 | 11 | jobs: 12 | docs: 13 | runs-on: ubuntu-latest 14 | 15 | steps: 16 | - uses: actions/checkout@v4 17 | with: 18 | fetch-depth: 0 19 | 20 | - uses: r-lib/actions/setup-pandoc@v2 21 | 22 | - name: Set up Python 23 | uses: actions/setup-python@v4 24 | with: 25 | python-version: '3.11' 26 | cache: 'pip' 27 | 28 | - name: Install docs requirements 29 | run: | 30 | pip install -r docs/requirements.txt 31 | 32 | - name: Install 33 | run: | 34 | pushd geoarrow-pyarrow 35 | pip install ".[test]" 36 | popd 37 | 38 | pushd geoarrow-pandas 39 | pip install ".[test]" 40 | popd 41 | 42 | - name: Build docs 43 | run: | 44 | pushd docs 45 | pandoc ../README.md --from markdown --to rst -s -o source/README_generated.rst 46 | sphinx-build source _build/html 47 | 48 | - name: Upload built documentation 49 | uses: actions/upload-artifact@main 50 | with: 51 | name: docs 52 | path: docs/_build/html 53 | 54 | - name: Clone gh-pages branch 55 | if: success() && github.repository == 'geoarrow/geoarrow-python' && github.ref == 'refs/heads/main' 56 | uses: actions/checkout@v2 57 | with: 58 | ref: gh-pages 59 | path: pages-clone 60 | 61 | - name: Update development documentation 62 | if: success() && github.repository == 'geoarrow/geoarrow-python' && github.ref == 'refs/heads/main' 63 | env: 64 | DOC_TAG: "main" 65 | run: | 66 | git config --global user.email "actions@github.com" 67 | git config --global user.name "GitHub Actions" 68 | cd pages-clone 69 | if [ -d "$DOC_TAG" ]; then 70 | git rm -rf "$DOC_TAG" 71 | fi 72 | mkdir "$DOC_TAG" 73 | cp -R ../docs/_build/html/* "$DOC_TAG" 74 | git add * 75 | git commit --allow-empty -m"update documentation for tag $DOC_TAG" 76 | git push 77 | cd .. 78 | -------------------------------------------------------------------------------- /docs/source/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # This file only contains a selection of the most common options. For a full 4 | # list see the documentation: 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 6 | 7 | # -- Path setup -------------------------------------------------------------- 8 | 9 | # If extensions (or modules to document with autodoc) are in another directory, 10 | # add these directories to sys.path here. If the directory is relative to the 11 | # documentation root, use os.path.abspath to make it absolute, like shown here. 12 | # 13 | import os 14 | import sys 15 | import datetime 16 | 17 | 18 | sys.path.insert(0, os.path.abspath("..")) 19 | 20 | # -- Project information ----------------------------------------------------- 21 | 22 | project = "geoarrow" 23 | copyright = f"2023-{datetime.datetime.now().year} Dewey Dunnington" 24 | author = "Dewey Dunnington" 25 | 26 | 27 | # -- General configuration --------------------------------------------------- 28 | 29 | # Add any Sphinx extension module names here, as strings. They can be 30 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 31 | # ones. 32 | extensions = [ 33 | "sphinx.ext.autodoc", 34 | ] 35 | 36 | # Add any paths that contain templates here, relative to this directory. 37 | templates_path = ["_templates"] 38 | 39 | # List of patterns, relative to source directory, that match files and 40 | # directories to ignore when looking for source files. 41 | # This pattern also affects html_static_path and html_extra_path. 42 | exclude_patterns = ["README_generated.rst"] 43 | 44 | 45 | # -- Options for HTML output ------------------------------------------------- 46 | 47 | # The theme to use for HTML and HTML Help pages. See the documentation for 48 | # a list of builtin themes. 49 | # 50 | html_theme = "pydata_sphinx_theme" 51 | 52 | html_theme_options = { 53 | "show_toc_level": 2, 54 | "use_edit_page_button": True, 55 | "external_links": [], 56 | } 57 | 58 | html_context = { 59 | "github_user": "geoarrow", 60 | "github_repo": "geoarrow-python", 61 | "github_version": "main", 62 | "doc_path": "docs/source", 63 | } 64 | 65 | html_sidebars = {"**": ["search-field", "sidebar-nav-bs"]} 66 | 67 | 68 | # Add any paths that contain custom static files (such as style sheets) here, 69 | # relative to this directory. They are copied after the builtin static files, 70 | # so a file named "default.css" will overwrite the builtin "default.css". 71 | html_static_path = [] 72 | 73 | add_module_names = False 74 | -------------------------------------------------------------------------------- /geoarrow-types/tests/test_constants.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from geoarrow.types.constants import ( 3 | Encoding, 4 | GeometryType, 5 | EdgeType, 6 | ) 7 | 8 | 9 | def test_enum_create_from_input(): 10 | # Can create enum values from an enum value, a string, or None 11 | assert Encoding.create(Encoding.WKB) is Encoding.WKB 12 | assert Encoding.create("wkb") is Encoding.WKB 13 | assert Encoding.create(None) is Encoding.UNSPECIFIED 14 | 15 | with pytest.raises(KeyError): 16 | Encoding.create("not a valid option") 17 | 18 | with pytest.raises(TypeError): 19 | Encoding.create(b"123") 20 | 21 | 22 | def test_enum_default(): 23 | assert Encoding._coalesce2(Encoding.WKB, Encoding.UNSPECIFIED) is Encoding.WKB 24 | assert Encoding._coalesce2(Encoding.UNSPECIFIED, Encoding.WKB) is Encoding.WKB 25 | assert Encoding._coalesce2(Encoding.WKB, Encoding.WKT) is Encoding.WKB 26 | 27 | 28 | def test_enum_specified(): 29 | assert Encoding._coalesce_unspecified2(Encoding.WKB, Encoding.WKB) is Encoding.WKB 30 | assert ( 31 | Encoding._coalesce_unspecified2(Encoding.WKB, Encoding.UNSPECIFIED) 32 | is Encoding.WKB 33 | ) 34 | assert ( 35 | Encoding._coalesce_unspecified2(Encoding.UNSPECIFIED, Encoding.WKB) 36 | is Encoding.WKB 37 | ) 38 | 39 | with pytest.raises(ValueError): 40 | Encoding._coalesce_unspecified2(Encoding.WKB, Encoding.WKT) 41 | 42 | 43 | def test_enum_common2(): 44 | # Values equal 45 | assert Encoding._common2(Encoding.WKB, Encoding.WKB) is Encoding.WKB 46 | 47 | # One value unspecified 48 | assert Encoding._common2(Encoding.WKB, Encoding.UNSPECIFIED) is Encoding.WKB 49 | assert Encoding._common2(Encoding.UNSPECIFIED, Encoding.WKB) is Encoding.WKB 50 | 51 | # Values (or reversed values) in lookup table 52 | assert Encoding._common2(Encoding.WKB, Encoding.LARGE_WKB) is Encoding.LARGE_WKB 53 | assert Encoding._common2(Encoding.LARGE_WKB, Encoding.WKB) is Encoding.LARGE_WKB 54 | 55 | # No _common2 value 56 | assert EdgeType._common2(EdgeType.SPHERICAL, EdgeType.PLANAR) is None 57 | 58 | 59 | def test_encoding_serialized(): 60 | assert Encoding.WKB.is_serialized() is True 61 | assert Encoding.GEOARROW.is_serialized() is False 62 | 63 | 64 | def test_geometry_type_common2(): 65 | # Case handled by base enum 66 | assert ( 67 | GeometryType._common2(GeometryType.POINT, GeometryType.POINT) 68 | is GeometryType.POINT 69 | ) 70 | 71 | # Always fall back to geometry 72 | assert ( 73 | GeometryType._common2(GeometryType.POINT, GeometryType.LINESTRING) 74 | is GeometryType.GEOMETRY 75 | ) 76 | -------------------------------------------------------------------------------- /docs/source/pyarrow.rst: -------------------------------------------------------------------------------- 1 | 2 | geoarrow-pyarrow 3 | ================ 4 | 5 | .. automodule:: geoarrow.pyarrow 6 | 7 | Array constructors 8 | ------------------ 9 | 10 | .. autofunction:: array 11 | 12 | Type Constructors 13 | ----------------- 14 | 15 | .. autofunction:: wkb 16 | 17 | .. autofunction:: wkt 18 | 19 | .. autofunction:: large_wkb 20 | 21 | .. autofunction:: large_wkt 22 | 23 | .. autofunction:: point 24 | 25 | .. autofunction:: linestring 26 | 27 | .. autofunction:: polygon 28 | 29 | .. autofunction:: multipoint 30 | 31 | .. autofunction:: multilinestring 32 | 33 | .. autofunction:: multipolygon 34 | 35 | Compute functions 36 | ----------------- 37 | 38 | .. autofunction:: parse_all 39 | 40 | .. autofunction:: unique_geometry_types 41 | 42 | .. autofunction:: infer_type_common 43 | 44 | .. autofunction:: as_wkt 45 | 46 | .. autofunction:: as_wkb 47 | 48 | .. autofunction:: as_geoarrow 49 | 50 | .. autofunction:: format_wkt 51 | 52 | .. autofunction:: box 53 | 54 | .. autofunction:: box_agg 55 | 56 | .. autofunction:: rechunk 57 | 58 | .. autofunction:: with_coord_type 59 | 60 | .. autofunction:: with_edge_type 61 | 62 | .. autofunction:: with_crs 63 | 64 | .. autofunction:: with_dimensions 65 | 66 | .. autofunction:: with_geometry_type 67 | 68 | .. autofunction:: point_coords 69 | 70 | .. autofunction:: to_geopandas 71 | 72 | Class Reference 73 | --------------- 74 | 75 | .. autoclass:: GeometryExtensionType 76 | :members: 77 | 78 | .. autoclass:: WkbType 79 | :members: 80 | 81 | .. autoclass:: WktType 82 | :members: 83 | 84 | .. autoclass:: PointType 85 | :members: 86 | 87 | .. autoclass:: LinestringType 88 | :members: 89 | 90 | .. autoclass:: PolygonType 91 | :members: 92 | 93 | .. autoclass:: MultiPointType 94 | :members: 95 | 96 | .. autoclass:: MultiLinestringType 97 | :members: 98 | 99 | .. autoclass:: MultiPolygonType 100 | :members: 101 | 102 | IO helpers 103 | -------------------- 104 | 105 | .. automodule:: geoarrow.pyarrow.io 106 | 107 | .. autofunction:: read_pyogrio_table 108 | 109 | .. autofunction:: read_geoparquet_table 110 | 111 | .. autofunction:: write_geoparquet_table 112 | 113 | 114 | Dataset constructors 115 | -------------------- 116 | 117 | .. automodule:: geoarrow.pyarrow.dataset 118 | 119 | .. autofunction:: dataset 120 | 121 | .. autoclass:: geoarrow.pyarrow.dataset.GeoDataset 122 | :members: 123 | 124 | .. autoclass:: geoarrow.pyarrow.dataset.ParquetRowGroupGeoDataset 125 | :members: 126 | -------------------------------------------------------------------------------- /geoarrow-pyarrow/tests/test_geopandas.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import pyarrow as pa 3 | from geoarrow import types 4 | import geoarrow.pyarrow as ga 5 | 6 | 7 | geopandas = pytest.importorskip("geopandas") 8 | 9 | 10 | def test_from_geopandas(): 11 | geoseries = geopandas.GeoSeries.from_wkt(["POINT (30 10)"]).set_crs("OGC:CRS84") 12 | array = ga.array(geoseries) 13 | assert isinstance(array.type, ga.WkbType) 14 | assert "CRS84" in repr(array.type.crs) 15 | assert ga.format_wkt(array)[0].as_py() == "POINT (30 10)" 16 | 17 | 18 | def test_scalar_to_shapely(): 19 | array = ga.array(["POINT (30 10)"]) 20 | assert array[0].to_shapely().wkt == "POINT (30 10)" 21 | 22 | wkb_item = b"\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x3e\x40\x00\x00\x00\x00\x00\x00\x24\x40" 23 | array = ga.array([wkb_item]) 24 | assert array[0].to_shapely().wkt == "POINT (30 10)" 25 | 26 | 27 | def test_to_geopandas_unsupported_type(): 28 | # GeoPandas doesn't support geoarrow.wkt, so this goes through the branch 29 | # that handles any GeoPandas failure 30 | array = ga.as_wkt(["POINT (30 10)"]) 31 | geoseries = ga.to_geopandas(array) 32 | assert isinstance(geoseries, geopandas.GeoSeries) 33 | assert len(geoseries) == 1 34 | assert geoseries.to_wkt()[0] == "POINT (30 10)" 35 | 36 | 37 | def test_to_geopandas_using_geopandas(): 38 | array = ga.as_wkb(["POINT (30 10)"]) 39 | geoseries = ga.to_geopandas(array) 40 | assert isinstance(geoseries, geopandas.GeoSeries) 41 | assert len(geoseries) == 1 42 | assert geoseries.to_wkt()[0] == "POINT (30 10)" 43 | 44 | 45 | def test_to_geopandas_with_crs(): 46 | array = ga.with_crs(ga.as_wkt(["POINT (30 10)"]), types.OGC_CRS84) 47 | geoseries = ga.to_geopandas(array) 48 | assert isinstance(geoseries, geopandas.GeoSeries) 49 | assert len(geoseries) == 1 50 | assert geoseries.to_wkt()[0] == "POINT (30 10)" 51 | assert geoseries.crs.to_authority() == ("OGC", "CRS84") 52 | 53 | 54 | def test_to_geopandas_with_crs_using_geopandas(): 55 | array = ga.with_crs(ga.as_wkb(["POINT (30 10)"]), types.OGC_CRS84) 56 | geoseries = ga.to_geopandas(array) 57 | assert isinstance(geoseries, geopandas.GeoSeries) 58 | assert len(geoseries) == 1 59 | assert geoseries.to_wkt()[0] == "POINT (30 10)" 60 | assert geoseries.crs.to_authority() == ("OGC", "CRS84") 61 | 62 | 63 | def test_table_to_geopandas_unsupported_type(): 64 | # GeoPandas doesn't support geoarrow.wkt, so this goes through the branch 65 | # that handles any GeoPandas failure 66 | table = pa.table({"geom": ga.as_wkt(["POINT (30 10)"])}) 67 | gdf = ga.to_geopandas(table) 68 | assert isinstance(gdf, geopandas.GeoDataFrame) 69 | 70 | geoseries = gdf.geometry 71 | assert len(geoseries) == 1 72 | assert geoseries.to_wkt()[0] == "POINT (30 10)" 73 | 74 | 75 | def test_table_to_geopandas_using_geopandas(): 76 | table = pa.table({"geom": ga.as_wkb(["POINT (30 10)"])}) 77 | gdf = ga.to_geopandas(table) 78 | assert isinstance(gdf, geopandas.GeoDataFrame) 79 | 80 | geoseries = gdf.geometry 81 | assert len(geoseries) == 1 82 | assert geoseries.to_wkt()[0] == "POINT (30 10)" 83 | -------------------------------------------------------------------------------- /geoarrow-pyarrow/src/geoarrow/pyarrow/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Contains pyarrow integration for the geoarrow Python bindings. 3 | 4 | Examples 5 | -------- 6 | 7 | >>> import geoarrow.pyarrow as ga 8 | """ 9 | 10 | from geoarrow.types._version import __version__, __version_tuple__ # NOQA: F401 11 | 12 | from geoarrow.types import ( 13 | GeometryType, 14 | Dimensions, 15 | CoordType, 16 | EdgeType, 17 | Encoding, 18 | OGC_CRS84, 19 | ) 20 | 21 | from geoarrow.pyarrow._type import ( 22 | GeometryExtensionType, 23 | WktType, 24 | WkbType, 25 | PointType, 26 | LinestringType, 27 | PolygonType, 28 | MultiPointType, 29 | MultiLinestringType, 30 | MultiPolygonType, 31 | wkb, 32 | large_wkb, 33 | wkb_view, 34 | wkt, 35 | large_wkt, 36 | wkt_view, 37 | point, 38 | linestring, 39 | polygon, 40 | multipoint, 41 | multilinestring, 42 | multipolygon, 43 | extension_type, 44 | geometry_type_common, 45 | ) 46 | 47 | from geoarrow.types.type_pyarrow import ( 48 | register_extension_types, 49 | unregister_extension_types, 50 | ) 51 | 52 | from geoarrow.pyarrow._kernel import Kernel 53 | 54 | from geoarrow.pyarrow._array import array 55 | 56 | from geoarrow.pyarrow import _scalar 57 | 58 | from geoarrow.pyarrow._compute import ( 59 | parse_all, 60 | as_wkt, 61 | as_wkb, 62 | infer_type_common, 63 | as_geoarrow, 64 | format_wkt, 65 | make_point, 66 | unique_geometry_types, 67 | box, 68 | box_agg, 69 | with_coord_type, 70 | with_crs, 71 | with_dimensions, 72 | with_edge_type, 73 | with_geometry_type, 74 | rechunk, 75 | point_coords, 76 | to_geopandas, 77 | ) 78 | 79 | __all__ = [ 80 | "GeometryType", 81 | "Dimensions", 82 | "CoordType", 83 | "EdgeType", 84 | "Encoding", 85 | "GeometryExtensionType", 86 | "WktType", 87 | "WkbType", 88 | "OGC_CRS84", 89 | "PointType", 90 | "LinestringType", 91 | "PolygonType", 92 | "MultiPointType", 93 | "MultiLinestringType", 94 | "MultiPolygonType", 95 | "wkb", 96 | "large_wkb", 97 | "wkb_view", 98 | "wkt", 99 | "large_wkt", 100 | "wkt_view", 101 | "point", 102 | "linestring", 103 | "polygon", 104 | "multipoint", 105 | "multilinestring", 106 | "multipolygon", 107 | "extension_type", 108 | "geometry_type_common", 109 | "register_extension_types", 110 | "unregister_extension_types", 111 | "Kernel", 112 | "array", 113 | "parse_all", 114 | "as_wkt", 115 | "as_wkb", 116 | "infer_type_common", 117 | "as_geoarrow", 118 | "format_wkt", 119 | "make_point", 120 | "unique_geometry_types", 121 | "box", 122 | "box_agg", 123 | "with_coord_type", 124 | "with_crs", 125 | "with_dimensions", 126 | "with_edge_type", 127 | "with_geometry_type", 128 | "rechunk", 129 | "point_coords", 130 | "to_geopandas", 131 | "_scalar", 132 | ] 133 | 134 | try: 135 | register_extension_types() 136 | except Exception as e: 137 | import warnings 138 | 139 | warnings.warn( 140 | "Failed to register one or more extension types.\n" 141 | "If this warning appears from pytest, you may have to re-run with --import-mode=importlib.\n" 142 | "You may also be able to run `unregister_extension_types()` and `register_extension_types()`.\n" 143 | f"The original error was {e}" 144 | ) 145 | -------------------------------------------------------------------------------- /geoarrow-types/tests/test_crs.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from geoarrow.types import crs 4 | 5 | 6 | def test_projjson_crs_from_string(): 7 | crs_obj = crs.ProjJsonCrs.from_json('{"some key": "some value"}') 8 | assert crs_obj.to_json() == '{"some key": "some value"}' 9 | assert crs_obj.to_json_dict() == {"some key": "some value"} 10 | 11 | 12 | def test_projjson_crs_from_dict(): 13 | crs_obj = crs.ProjJsonCrs.from_json_dict({"some key": "some value"}) 14 | assert crs_obj.to_json() == '{"some key": "some value"}' 15 | assert crs_obj.to_json_dict() == {"some key": "some value"} 16 | 17 | 18 | def test_projjson_crs_from_bytes(): 19 | crs_obj = crs.ProjJsonCrs('{"some key": "some value"}'.encode()) 20 | assert crs_obj.to_json() == '{"some key": "some value"}' 21 | 22 | 23 | def test_projjson_crs_from_crs(): 24 | crs_obj = crs.ProjJsonCrs.from_json('{"some key": "some value"}') 25 | crs_obj_from_crs = crs.ProjJsonCrs(crs_obj) 26 | assert crs_obj_from_crs.to_json() == crs_obj.to_json() 27 | 28 | 29 | def test_projjson_crs_repr(): 30 | crs_valid_projjson = crs.OGC_CRS84 31 | assert repr(crs_valid_projjson) == "ProjJsonCrs(OGC:CRS84)" 32 | 33 | crs_valid_json = crs.ProjJsonCrs('{"some key": "some value"}') 34 | assert repr(crs_valid_json) == 'ProjJsonCrs({"some key": "some value"})' 35 | 36 | # repr() shouldn't error here 37 | crs_invalid_json = crs.ProjJsonCrs('{"this is not valid json') 38 | assert repr(crs_invalid_json) == 'ProjJsonCrs({"this is not valid json)' 39 | 40 | 41 | def test_string_crs(): 42 | crs_obj = crs.StringCrs("arbitrary string") 43 | assert crs_obj.__geoarrow_crs_json_values__() == {"crs": "arbitrary string"} 44 | assert repr(crs_obj) == "StringCrs(arbitrary string)" 45 | 46 | 47 | def test_string_crs_quoted_json_string(): 48 | crs_obj = crs.StringCrs('"this is json"') 49 | assert crs_obj.__geoarrow_crs_json_values__() == {"crs": "this is json"} 50 | assert repr(crs_obj) == "StringCrs(this is json)" 51 | 52 | 53 | def test_string_crs_json_object(): 54 | crs_obj = crs.StringCrs('{"valid": "object"}') 55 | assert crs_obj.to_json() == '{"valid": "object"}' 56 | assert crs_obj.to_json_dict() == {"valid": "object"} 57 | 58 | 59 | def test_string_crs_pyproj(): 60 | pyproj = pytest.importorskip("pyproj") 61 | 62 | crs_obj = crs.StringCrs("OGC:CRS84") 63 | assert crs_obj.to_json_dict() == pyproj.CRS("OGC:CRS84").to_json_dict() 64 | assert crs_obj.to_json() == pyproj.CRS("OGC:CRS84").to_json() 65 | assert crs_obj.to_wkt() == pyproj.CRS("OGC:CRS84").to_wkt() 66 | 67 | 68 | def test_crs_coalesce(): 69 | assert crs._coalesce2(crs.UNSPECIFIED, crs.OGC_CRS84) is crs.OGC_CRS84 70 | assert crs._coalesce2(None, crs.OGC_CRS84) is None 71 | 72 | 73 | def test_crs_coalesce_unspecified(): 74 | assert crs._coalesce_unspecified2(crs.UNSPECIFIED, crs.OGC_CRS84) is crs.OGC_CRS84 75 | assert crs._coalesce_unspecified2(crs.OGC_CRS84, crs.UNSPECIFIED) is crs.OGC_CRS84 76 | assert crs._coalesce_unspecified2(crs.OGC_CRS84, crs.OGC_CRS84) is crs.OGC_CRS84 77 | 78 | ogc_crs84_clone = crs.ProjJsonCrs(crs.OGC_CRS84.to_json()) 79 | assert crs._coalesce_unspecified2(ogc_crs84_clone, crs.OGC_CRS84) is ogc_crs84_clone 80 | assert crs._coalesce_unspecified2(crs.OGC_CRS84, ogc_crs84_clone) is crs.OGC_CRS84 81 | 82 | with pytest.raises(ValueError): 83 | crs._coalesce_unspecified2(None, crs.OGC_CRS84) 84 | 85 | 86 | def test_crs_common(): 87 | assert crs._common2(crs.UNSPECIFIED, crs.OGC_CRS84) is crs.OGC_CRS84 88 | assert crs._common2(crs.OGC_CRS84, crs.UNSPECIFIED) is crs.OGC_CRS84 89 | assert crs._common2(crs.OGC_CRS84, crs.OGC_CRS84) is crs.OGC_CRS84 90 | 91 | with pytest.raises(ValueError): 92 | crs._common2(None, crs.OGC_CRS84) 93 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Generated version files 2 | _version.py 3 | 4 | # Byte-compiled / optimized / DLL files 5 | __pycache__/ 6 | *.py[cod] 7 | *$py.class 8 | 9 | # C extensions 10 | *.so 11 | 12 | # Distribution / packaging 13 | .Python 14 | build/ 15 | develop-eggs/ 16 | dist/ 17 | downloads/ 18 | eggs/ 19 | .eggs/ 20 | lib/ 21 | lib64/ 22 | parts/ 23 | sdist/ 24 | var/ 25 | wheels/ 26 | share/python-wheels/ 27 | *.egg-info/ 28 | .installed.cfg 29 | *.egg 30 | MANIFEST 31 | 32 | .DS_Store 33 | .vscode/ 34 | 35 | # PyInstaller 36 | # Usually these files are written by a python script from a template 37 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 38 | *.manifest 39 | *.spec 40 | 41 | # Installer logs 42 | pip-log.txt 43 | pip-delete-this-directory.txt 44 | 45 | # Unit test / coverage reports 46 | htmlcov/ 47 | .tox/ 48 | .nox/ 49 | .coverage 50 | .coverage.* 51 | .cache 52 | nosetests.xml 53 | coverage.xml 54 | *.cover 55 | *.py,cover 56 | .hypothesis/ 57 | .pytest_cache/ 58 | cover/ 59 | 60 | # Translations 61 | *.mo 62 | *.pot 63 | 64 | # Django stuff: 65 | *.log 66 | local_settings.py 67 | db.sqlite3 68 | db.sqlite3-journal 69 | 70 | # Flask stuff: 71 | instance/ 72 | .webassets-cache 73 | 74 | # Scrapy stuff: 75 | .scrapy 76 | 77 | # Sphinx documentation 78 | docs/_build/ 79 | 80 | # PyBuilder 81 | .pybuilder/ 82 | target/ 83 | 84 | # Jupyter Notebook 85 | .ipynb_checkpoints 86 | 87 | # IPython 88 | profile_default/ 89 | ipython_config.py 90 | 91 | # pyenv 92 | # For a library or package, you might want to ignore these files since the code is 93 | # intended to run in multiple environments; otherwise, check them in: 94 | # .python-version 95 | 96 | # pipenv 97 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 98 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 99 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 100 | # install all needed dependencies. 101 | #Pipfile.lock 102 | 103 | # poetry 104 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 105 | # This is especially recommended for binary packages to ensure reproducibility, and is more 106 | # commonly ignored for libraries. 107 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 108 | #poetry.lock 109 | 110 | # pdm 111 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 112 | #pdm.lock 113 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 114 | # in version control. 115 | # https://pdm.fming.dev/#use-with-ide 116 | .pdm.toml 117 | 118 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 119 | __pypackages__/ 120 | 121 | # Celery stuff 122 | celerybeat-schedule 123 | celerybeat.pid 124 | 125 | # SageMath parsed files 126 | *.sage.py 127 | 128 | # Environments 129 | .env 130 | .venv 131 | env/ 132 | venv/ 133 | ENV/ 134 | env.bak/ 135 | venv.bak/ 136 | 137 | # Spyder project settings 138 | .spyderproject 139 | .spyproject 140 | 141 | # Rope project settings 142 | .ropeproject 143 | 144 | # mkdocs documentation 145 | /site 146 | 147 | # mypy 148 | .mypy_cache/ 149 | .dmypy.json 150 | dmypy.json 151 | 152 | # Pyre type checker 153 | .pyre/ 154 | 155 | # pytype static type analyzer 156 | .pytype/ 157 | 158 | # Cython debug symbols 159 | cython_debug/ 160 | 161 | # PyCharm 162 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 163 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 164 | # and can be added to the global gitignore or merged into this file. For a more nuclear 165 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 166 | #.idea/ 167 | -------------------------------------------------------------------------------- /geoarrow-pyarrow/src/geoarrow/pyarrow/_scalar.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | import pyarrow as pa 4 | from geoarrow.pyarrow._kernel import Kernel 5 | from geoarrow.types.type_pyarrow import GeometryExtensionType 6 | 7 | 8 | class GeometryExtensionScalar(pa.ExtensionScalar): 9 | def __repr__(self): 10 | # Before pyarrow 13.0.0 this will fail because constructing 11 | # arrays from scalars 12 | pa_version = [int(component) for component in pa.__version__.split(".")] 13 | if pa_version[0] < 13: 14 | return super().__repr__() 15 | 16 | # Pretty WKT printing needs geoarrow-c 17 | try: 18 | from geoarrow import c # noqa: F401 19 | except ImportError: 20 | return ( 21 | super().__repr__() 22 | + "\n" 23 | + "* pip install geoarrow-c for prettier printing of geometry scalars" 24 | ) 25 | 26 | max_width = 70 27 | 28 | try: 29 | kernel = Kernel.format_wkt(self.type, max_element_size_bytes=max_width) 30 | array_formatted = kernel.push(self._array1()) 31 | string_formatted = array_formatted[0].as_py() 32 | except Exception: 33 | string_formatted = "" 34 | 35 | if len(string_formatted) >= max_width: 36 | string_formatted = string_formatted[: (max_width - 3)] + "..." 37 | 38 | return f"{type(self).__name__}:{repr(self.type)}\n<{string_formatted}>" 39 | 40 | def _array1(self): 41 | return self.type.wrap_array(pa.array([self.value])) 42 | 43 | @property 44 | def wkt(self): 45 | kernel = Kernel.as_wkt(self.type) 46 | array_wkt = kernel.push(self._array1()) 47 | return array_wkt.storage[0].as_py() 48 | 49 | @property 50 | def wkb(self): 51 | kernel = Kernel.as_wkb(self.type) 52 | array_wkb = kernel.push(self._array1()) 53 | return array_wkb.storage[0].as_py() 54 | 55 | def to_shapely(self): 56 | """ 57 | Convert an array item to a shapely geometry 58 | 59 | >>> import geoarrow.pyarrow as ga 60 | >>> array = ga.array(["POINT (30 10)"]) 61 | >>> array[0].to_shapely() 62 | 63 | """ 64 | from shapely import from_wkb 65 | 66 | return from_wkb(self.wkb) 67 | 68 | 69 | class WktScalar(GeometryExtensionScalar): 70 | @property 71 | def wkt(self): 72 | return self.value.as_py() 73 | 74 | def to_shapely(self): 75 | from shapely import from_wkt 76 | 77 | return from_wkt(self.value.as_py()) 78 | 79 | 80 | class WkbScalar(GeometryExtensionScalar): 81 | @property 82 | def wkb(self): 83 | return self.value.as_py() 84 | 85 | 86 | class BoxScalar(GeometryExtensionScalar): 87 | @property 88 | def bounds(self) -> dict: 89 | return self.as_py() 90 | 91 | @property 92 | def xmin(self) -> float: 93 | return self.bounds["xmin"] 94 | 95 | @property 96 | def ymin(self) -> float: 97 | return self.bounds["ymin"] 98 | 99 | @property 100 | def xmax(self) -> float: 101 | return self.bounds["xmax"] 102 | 103 | @property 104 | def ymax(self) -> float: 105 | return self.bounds["ymax"] 106 | 107 | @property 108 | def zmin(self) -> Optional[float]: 109 | return self.bounds["zmin"] if "zmin" in self.bounds else None 110 | 111 | @property 112 | def zmax(self) -> Optional[float]: 113 | return self.bounds["zmax"] if "zmax" in self.bounds else None 114 | 115 | @property 116 | def mmin(self) -> Optional[float]: 117 | return self.bounds["mmin"] if "mmin" in self.bounds else None 118 | 119 | @property 120 | def mmax(self) -> Optional[float]: 121 | return self.bounds["mmax"] if "mmax" in self.bounds else None 122 | 123 | def __repr__(self) -> str: 124 | return f"BoxScalar({self.bounds})" 125 | 126 | 127 | def scalar_cls_from_name(name): 128 | if name == "geoarrow.wkb": 129 | return WkbScalar 130 | elif name == "geoarrow.wkt": 131 | return WktScalar 132 | elif name == "geoarrow.box": 133 | return BoxScalar 134 | else: 135 | return GeometryExtensionScalar 136 | 137 | 138 | # Inject array_cls_from_name exactly once to avoid circular import 139 | if GeometryExtensionType._scalar_cls_from_name is None: 140 | GeometryExtensionType._scalar_cls_from_name = scalar_cls_from_name 141 | -------------------------------------------------------------------------------- /.github/workflows/test.yaml: -------------------------------------------------------------------------------- 1 | name: test 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | pull_request: 8 | branches: 9 | - main 10 | 11 | permissions: 12 | contents: read 13 | 14 | jobs: 15 | test: 16 | runs-on: ${{ matrix.os }} 17 | strategy: 18 | matrix: 19 | os: [ubuntu-latest] 20 | python-version: ['3.8', '3.9', '3.10', '3.11', '3.12', '3.13'] 21 | 22 | steps: 23 | - uses: actions/checkout@v4 24 | with: 25 | fetch-depth: 0 26 | 27 | - name: Set up Python ${{ matrix.python-version }} 28 | uses: actions/setup-python@v4 29 | with: 30 | python-version: ${{ matrix.python-version }} 31 | cache: 'pip' 32 | 33 | - name: Install (geoarrow-pyarrow) 34 | run: | 35 | pushd geoarrow-pyarrow 36 | pip install ".[test]" 37 | 38 | - name: Install (geoarrow-types) 39 | run: | 40 | pushd geoarrow-types 41 | pip install ".[test]" 42 | 43 | - name: Install (geoarrow-pandas) 44 | run: | 45 | pushd geoarrow-pandas 46 | pip install ".[test]" 47 | 48 | - name: Run tests (geoarrow-pyarrow) 49 | run: | 50 | pytest geoarrow-pyarrow/tests -v -s 51 | 52 | - name: Run tests (geoarrow-types) 53 | run: | 54 | pytest geoarrow-types/tests -v -s 55 | 56 | - name: Run tests (geoarrow-pandas) 57 | run: | 58 | pytest geoarrow-pandas/tests -v -s 59 | 60 | - name: Run doctests (geoarrow-pyarrow) 61 | if: success() && matrix.python-version == '3.13' 62 | run: | 63 | cd geoarrow-pyarrow 64 | pytest --pyargs geoarrow.pyarrow --doctest-modules --import-mode=importlib 65 | 66 | - name: Run doctests (geoarrow-types) 67 | if: success() && matrix.python-version == '3.13' 68 | run: | 69 | cd geoarrow-types 70 | pytest --pyargs geoarrow.types --doctest-modules --import-mode=importlib 71 | 72 | - name: Run doctests (geoarrow-pandas) 73 | if: success() && matrix.python-version == '3.13' 74 | run: | 75 | cd geoarrow-pandas 76 | pytest --pyargs geoarrow.pandas --doctest-modules --import-mode=importlib 77 | 78 | # This is a test of geoarrow-types on Python 3.7 (which implies pyarrow 12 79 | # since this is the last supported version there). Python 3.7 is still the 80 | # runtime available on some hosted platforms (e.g., it is the minimum required 81 | # version for apache-sedona Python) 82 | oldest-supported: 83 | runs-on: ubuntu-latest 84 | container: 85 | image: python:3.7 86 | 87 | steps: 88 | - uses: actions/checkout@v4 89 | with: 90 | fetch-depth: 0 91 | fetch-tags: true 92 | 93 | - name: Check git setup 94 | run: | 95 | git config --global --add safe.directory "$(pwd)" 96 | git describe --long --match='geoarrow-types-*' 97 | 98 | # setuptools_scm available for Python 3.7 does not support version_file 99 | # (it can still be installed on Python 3.7, it just can't be built there 100 | # without this modification) 101 | - name: Patch pyproject.toml 102 | run: | 103 | cd geoarrow-types 104 | sed -i.bak '/^version_file/d' pyproject.toml 105 | echo '__version__ = "0.0.0"' > src/geoarrow/types/_version.py 106 | echo '__version_tuple__ = (0, 0, 0)' >> src/geoarrow/types/_version.py 107 | 108 | - name: Install (geoarrow-types) 109 | run: | 110 | pip install --upgrade setuptools setuptools_scm 111 | cd geoarrow-types 112 | pip install ".[test]" 113 | 114 | - name: Run tests (geoarrow-types) 115 | run: | 116 | pytest geoarrow-types/tests -v -s 117 | 118 | coverage: 119 | needs: [test] 120 | 121 | runs-on: ubuntu-latest 122 | 123 | steps: 124 | - uses: actions/checkout@v4 125 | with: 126 | fetch-depth: 0 127 | 128 | - name: Set up Python 129 | uses: actions/setup-python@v4 130 | with: 131 | python-version: '3.13' 132 | cache: 'pip' 133 | 134 | - name: Install (geoarrow-pyarrow) 135 | run: | 136 | pushd geoarrow-pyarrow 137 | pip install -e ".[test]" 138 | 139 | - name: Install (geoarrow-types) 140 | run: | 141 | pushd geoarrow-types 142 | pip install -e ".[test]" 143 | 144 | - name: Install (geoarrow-pandas) 145 | run: | 146 | pushd geoarrow-pandas 147 | pip install -e ".[test]" 148 | 149 | - name: Install coverage dependencies 150 | run: | 151 | pip install pytest-cov 152 | 153 | - name: Install editable 154 | run: | 155 | pip install -e geoarrow-pyarrow/ 156 | pip install -e geoarrow-types/ 157 | pip install -e geoarrow-pandas/ 158 | 159 | - name: Coverage 160 | run: | 161 | pushd geoarrow-pyarrow 162 | python -m pytest --cov ./src/geoarrow tests --import-mode=importlib 163 | python -m coverage xml 164 | popd 165 | 166 | pushd geoarrow-types 167 | python -m pytest --cov ./src/geoarrow tests --import-mode=importlib 168 | python -m coverage xml 169 | popd 170 | 171 | pushd geoarrow-pandas 172 | python -m pytest --cov ./src/geoarrow tests --import-mode=importlib 173 | python -m coverage xml 174 | popd 175 | 176 | - name: Upload coverage to codecov 177 | uses: codecov/codecov-action@v2 178 | with: 179 | files: 'geoarrow-pyarrow/coverage.xml,geoarrow-types/coverage.xml,geoarrow-pandas/coverage.xml' 180 | token: ${{ secrets.CODECOV_TOKEN }} 181 | -------------------------------------------------------------------------------- /geoarrow-pyarrow/src/geoarrow/pyarrow/_kernel.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | import pyarrow as pa 4 | from geoarrow.types import box as box_spec 5 | from geoarrow.pyarrow._type import GeometryExtensionType 6 | 7 | 8 | _lazy_lib = None 9 | _geoarrow_c_version = None 10 | 11 | 12 | def _geoarrow_c(): 13 | global _lazy_lib, _geoarrow_c_version 14 | if _lazy_lib is None: 15 | try: 16 | import geoarrow.c 17 | 18 | except ImportError as e: 19 | raise ImportError("Requested operation requires geoarrow-c") from e 20 | 21 | _lazy_lib = geoarrow.c.lib 22 | if hasattr(geoarrow.c, "__version_tuple__"): 23 | _geoarrow_c_version = geoarrow.c.__version_tuple__ 24 | else: 25 | _geoarrow_c_version = (0, 1, 0) 26 | 27 | return _lazy_lib 28 | 29 | 30 | class Kernel: 31 | def __init__(self, name, type_in, **kwargs) -> None: 32 | if not isinstance(type_in, pa.DataType): 33 | raise TypeError("Expected `type_in` to inherit from pyarrow.DataType") 34 | 35 | self._name = str(name) 36 | self._kernel = _geoarrow_c().CKernel(self._name.encode("UTF-8")) 37 | # True for all the kernels that currently exist 38 | self._is_agg = self._name.endswith("_agg") 39 | 40 | type_in_schema = _geoarrow_c().SchemaHolder() 41 | type_in._export_to_c(type_in_schema._addr()) 42 | 43 | options = Kernel._pack_options(kwargs) 44 | 45 | type_out_schema = self._kernel.start(type_in_schema, options) 46 | self._type_out = GeometryExtensionType._import_from_c(type_out_schema._addr()) 47 | self._type_in = type_in 48 | 49 | def push(self, arr): 50 | if isinstance(arr, pa.ChunkedArray) and self._is_agg: 51 | for chunk_in in arr.chunks: 52 | self.push(chunk_in) 53 | return 54 | elif isinstance(arr, pa.ChunkedArray): 55 | chunks_out = [] 56 | for chunk_in in arr.chunks: 57 | chunks_out.append(self.push(chunk_in)) 58 | return pa.chunked_array(chunks_out, type=self._type_out) 59 | elif not isinstance(arr, pa.Array): 60 | raise TypeError( 61 | f"Expected pyarrow.Array or pyarrow.ChunkedArray but got {type(arr)}" 62 | ) 63 | 64 | array_in = _geoarrow_c().ArrayHolder() 65 | arr._export_to_c(array_in._addr()) 66 | 67 | if self._is_agg: 68 | self._kernel.push_batch_agg(array_in) 69 | else: 70 | array_out = self._kernel.push_batch(array_in) 71 | return pa.Array._import_from_c(array_out._addr(), self._type_out) 72 | 73 | def finish(self): 74 | if self._is_agg: 75 | out = self._kernel.finish_agg() 76 | return pa.Array._import_from_c(out._addr(), self._type_out) 77 | else: 78 | self._kernel.finish() 79 | 80 | @staticmethod 81 | def void(type_in): 82 | return Kernel("void", type_in) 83 | 84 | @staticmethod 85 | def void_agg(type_in): 86 | return Kernel("void_agg", type_in) 87 | 88 | @staticmethod 89 | def visit_void_agg(type_in): 90 | return Kernel("visit_void_agg", type_in) 91 | 92 | @staticmethod 93 | def as_wkt(type_in): 94 | return Kernel.as_geoarrow(type_in, 100003) 95 | 96 | @staticmethod 97 | def as_wkb(type_in): 98 | return Kernel.as_geoarrow(type_in, 100001) 99 | 100 | @staticmethod 101 | def format_wkt(type_in, precision=None, max_element_size_bytes=None): 102 | return Kernel( 103 | "format_wkt", 104 | type_in, 105 | precision=precision, 106 | max_element_size_bytes=max_element_size_bytes, 107 | ) 108 | 109 | @staticmethod 110 | def as_geoarrow(type_in, type_id): 111 | return Kernel("as_geoarrow", type_in, type=int(type_id)) 112 | 113 | @staticmethod 114 | def unique_geometry_types_agg(type_in): 115 | return Kernel("unique_geometry_types_agg", type_in) 116 | 117 | @staticmethod 118 | def box(type_in): 119 | kernel = Kernel("box", type_in) 120 | if _geoarrow_c_version <= (0, 1, 3): 121 | return BoxKernelCompat(kernel) 122 | else: 123 | return kernel 124 | 125 | @staticmethod 126 | def box_agg(type_in): 127 | kernel = Kernel("box_agg", type_in) 128 | if _geoarrow_c_version <= (0, 1, 3): 129 | return BoxKernelCompat(kernel) 130 | else: 131 | return kernel 132 | 133 | @staticmethod 134 | def _pack_options(options): 135 | if not options: 136 | return b"" 137 | 138 | options = {k: v for k, v in options.items() if v is not None} 139 | bytes = len(options).to_bytes(4, sys.byteorder, signed=True) 140 | for k, v in options.items(): 141 | k = str(k) 142 | bytes += len(k).to_bytes(4, sys.byteorder, signed=True) 143 | bytes += k.encode("UTF-8") 144 | 145 | v = str(v) 146 | bytes += len(v).to_bytes(4, sys.byteorder, signed=True) 147 | bytes += v.encode("UTF-8") 148 | 149 | return bytes 150 | 151 | 152 | class BoxKernelCompat: 153 | """A wrapper around the "box" kernel that works for geoarrow-c 0.1. 154 | This is mostly to ease the transition for geoarrow-python CI while 155 | all the packages are being updated.""" 156 | 157 | def __init__(self, parent: Kernel): 158 | self.parent = parent 159 | self.type_out = box_spec().to_pyarrow().with_crs(parent._type_in.crs) 160 | 161 | def push(self, arr): 162 | parent_result = self.parent.push(arr) 163 | return ( 164 | None if parent_result is None else self._old_box_to_new_box(parent_result) 165 | ) 166 | 167 | def finish(self): 168 | return self._old_box_to_new_box(self.parent.finish()) 169 | 170 | def _old_box_to_new_box(self, array): 171 | xmin, xmax, ymin, ymax = array.flatten() 172 | storage = pa.StructArray.from_arrays( 173 | [xmin, ymin, xmax, ymax], names=["xmin", "ymin", "xmax", "ymax"] 174 | ) 175 | return self.type_out.wrap_array(storage) 176 | -------------------------------------------------------------------------------- /geoarrow-pyarrow/src/geoarrow/pyarrow/_type.py: -------------------------------------------------------------------------------- 1 | from typing import Iterable 2 | import pyarrow as pa 3 | 4 | from geoarrow import types 5 | from geoarrow.types.type_pyarrow import ( 6 | GeometryExtensionType, 7 | PointType, 8 | LinestringType, 9 | PolygonType, 10 | MultiPointType, 11 | MultiLinestringType, 12 | MultiPolygonType, 13 | WkbType, 14 | WktType, 15 | extension_type, 16 | ) 17 | 18 | 19 | def wkb() -> WkbType: 20 | """Well-known binary with a maximum array size of 2 GB per chunk. 21 | 22 | >>> import geoarrow.pyarrow as ga 23 | >>> ga.wkb() 24 | WkbType(geoarrow.wkb) 25 | >>> ga.wkb().storage_type 26 | DataType(binary) 27 | """ 28 | return WkbType.__arrow_ext_deserialize__(pa.binary(), b"") 29 | 30 | 31 | def large_wkb() -> WkbType: 32 | """Well-known binary using 64-bit integer offsets. 33 | 34 | >>> import geoarrow.pyarrow as ga 35 | >>> ga.large_wkb() 36 | WkbType(geoarrow.wkb) 37 | >>> ga.large_wkb().storage_type 38 | DataType(large_binary) 39 | """ 40 | return WkbType.__arrow_ext_deserialize__(pa.large_binary(), b"") 41 | 42 | 43 | def wkb_view() -> WkbType: 44 | """Well-known binary using binary views as the underlying storage. 45 | 46 | >>> import geoarrow.pyarrow as ga 47 | >>> ga.wkb_view() 48 | WkbType(geoarrow.wkb) 49 | >>> ga.wkb_view().storage_type 50 | DataType(binary_view) 51 | """ 52 | return WkbType.__arrow_ext_deserialize__(pa.binary_view(), b"") 53 | 54 | 55 | def wkt() -> WktType: 56 | """Well-known text with a maximum array size of 2 GB per chunk. 57 | 58 | >>> import geoarrow.pyarrow as ga 59 | >>> ga.wkt() 60 | WktType(geoarrow.wkt) 61 | >>> ga.wkt().storage_type 62 | DataType(string) 63 | """ 64 | return WktType.__arrow_ext_deserialize__(pa.utf8(), b"") 65 | 66 | 67 | def large_wkt() -> WktType: 68 | """Well-known text using 64-bit integer offsets. 69 | 70 | >>> import geoarrow.pyarrow as ga 71 | >>> ga.large_wkt() 72 | WktType(geoarrow.wkt) 73 | >>> ga.large_wkt().storage_type 74 | DataType(large_string) 75 | """ 76 | return WktType.__arrow_ext_deserialize__(pa.large_utf8(), b"") 77 | 78 | 79 | def wkt_view() -> WktType: 80 | """Well-known text using string views as the underlying storage. 81 | 82 | >>> import geoarrow.pyarrow as ga 83 | >>> ga.wkt_view() 84 | WktType(geoarrow.wkt) 85 | >>> ga.wkt_view().storage_type 86 | DataType(string_view) 87 | """ 88 | return WktType.__arrow_ext_deserialize__(pa.string_view(), b"") 89 | 90 | 91 | def point() -> PointType: 92 | """Geoarrow-encoded point features. 93 | 94 | >>> import geoarrow.pyarrow as ga 95 | >>> ga.point() 96 | PointType(geoarrow.point) 97 | >>> ga.point().storage_type 98 | StructType(struct) 99 | """ 100 | return extension_type(types.point()) 101 | 102 | 103 | def linestring() -> LinestringType: 104 | """Geoarrow-encoded line features. 105 | 106 | >>> import geoarrow.pyarrow as ga 107 | >>> ga.linestring() 108 | LinestringType(geoarrow.linestring) 109 | >>> ga.linestring().storage_type 110 | ListType(list not null>) 111 | """ 112 | return extension_type(types.linestring()) 113 | 114 | 115 | def polygon() -> PolygonType: 116 | """Geoarrow-encoded polygon features. 117 | 118 | >>> import geoarrow.pyarrow as ga 119 | >>> ga.polygon() 120 | PolygonType(geoarrow.polygon) 121 | >>> ga.polygon().storage_type 122 | ListType(list not null> not null>) 123 | """ 124 | return extension_type(types.polygon()) 125 | 126 | 127 | def multipoint() -> MultiPointType: 128 | """Geoarrow-encoded multipoint features. 129 | 130 | >>> import geoarrow.pyarrow as ga 131 | >>> ga.multipoint() 132 | MultiPointType(geoarrow.multipoint) 133 | >>> ga.multipoint().storage_type 134 | ListType(list not null>) 135 | """ 136 | return extension_type(types.multipoint()) 137 | 138 | 139 | def multilinestring() -> MultiLinestringType: 140 | """Geoarrow-encoded multilinestring features. 141 | 142 | >>> import geoarrow.pyarrow as ga 143 | >>> ga.multilinestring() 144 | MultiLinestringType(geoarrow.multilinestring) 145 | >>> ga.multilinestring().storage_type 146 | ListType(list not null> not null>) 147 | """ 148 | return extension_type(types.multilinestring()) 149 | 150 | 151 | def multipolygon() -> MultiPolygonType: 152 | """Geoarrow-encoded polygon features. 153 | 154 | >>> import geoarrow.pyarrow as ga 155 | >>> ga.multipolygon() 156 | MultiPolygonType(geoarrow.multipolygon) 157 | >>> ga.multipolygon().storage_type 158 | ListType(list not null> not null> not null>) 159 | """ 160 | return extension_type(types.multipolygon()) 161 | 162 | 163 | def geometry_type_common( 164 | type_objects: Iterable[GeometryExtensionType], 165 | ) -> GeometryExtensionType: 166 | """Compute common type 167 | 168 | From a sequence of GeoArrow types, return a type to which all can be cast 169 | or error if this cannot occur. 170 | 171 | >>> import geoarrow.pyarrow as ga 172 | >>> ga.geometry_type_common([ga.wkb(), ga.point()]) 173 | WkbType(geoarrow.wkb) 174 | >>> ga.geometry_type_common([ga.point(), ga.point()]) 175 | PointType(geoarrow.point) 176 | """ 177 | type_objects = list(type_objects) 178 | 179 | if len(type_objects) == 0: 180 | # Would be nice to have an empty type option here 181 | return wkb() 182 | elif len(type_objects) == 1: 183 | return type_objects[0] 184 | 185 | specs = [t.spec for t in type_objects] 186 | spec = types.TypeSpec.common(*specs).canonicalize() 187 | 188 | if ( 189 | spec.encoding == types.Encoding.GEOARROW 190 | and spec.geometry_type == types.GeometryType.GEOMETRY 191 | ): 192 | spec = types.TypeSpec.coalesce(types.wkb(), spec) 193 | 194 | return extension_type(spec) 195 | -------------------------------------------------------------------------------- /geoarrow-pyarrow/src/geoarrow/pyarrow/_array.py: -------------------------------------------------------------------------------- 1 | import pyarrow as pa 2 | 3 | from geoarrow.pyarrow._kernel import Kernel, _geoarrow_c 4 | from geoarrow.pyarrow._type import ( 5 | GeometryExtensionType, 6 | wkb, 7 | wkt, 8 | large_wkb, 9 | large_wkt, 10 | ) 11 | 12 | 13 | class GeometryExtensionArray(pa.ExtensionArray): 14 | def geobuffers(self): 15 | import numpy as np 16 | 17 | lib = _geoarrow_c() 18 | 19 | cschema = lib.SchemaHolder() 20 | self.type._export_to_c(cschema._addr()) 21 | carray = lib.ArrayHolder() 22 | self._export_to_c(carray._addr()) 23 | 24 | array_view = lib.CArrayView(carray, cschema) 25 | buffers = array_view.buffers() 26 | return [np.array(b) if b is not None else None for b in buffers] 27 | 28 | def __repr__(self): 29 | # Pretty WKT printing needs geoarrow-c 30 | try: 31 | from geoarrow import c # noqa: F401 32 | except ImportError: 33 | return ( 34 | super().__repr__() 35 | + "\n" 36 | + "* pip install geoarrow-c for prettier printing of geometry arrays" 37 | ) 38 | 39 | n_values_to_show = 10 40 | max_width = 70 41 | 42 | if len(self) > n_values_to_show: 43 | n_extra = len(self) - n_values_to_show 44 | value_s = "values" if n_extra != 1 else "value" 45 | head = self[: int(n_values_to_show / 2)] 46 | mid = f"...{n_extra} {value_s}..." 47 | tail = self[int(-n_values_to_show / 2) :] 48 | else: 49 | head = self 50 | mid = "" 51 | tail = self[:0] 52 | 53 | try: 54 | kernel = Kernel.format_wkt(self.type, max_element_size_bytes=max_width) 55 | head = kernel.push(head) 56 | tail = kernel.push(tail) 57 | except Exception as e: 58 | err = f"* 1 or more display values failed to parse\n* {str(e)}" 59 | type_name = type(self).__name__ 60 | super_repr = super().__repr__() 61 | return f"{type_name}:{repr(self.type)}[{len(self)}]\n{err}\n{super_repr}" 62 | 63 | head_str = [f"<{item.as_py()}>" for item in head] 64 | tail_str = [f"<{item.as_py()}>" for item in tail] 65 | for i in range(len(head)): 66 | if len(head_str[i]) > max_width: 67 | head_str[i] = f"{head_str[i][: (max_width - 4)]}...>" 68 | for i in range(len(tail)): 69 | if len(tail_str[i]) > max_width: 70 | tail_str[i] = f"{tail_str[i][: (max_width - 4)]}...>" 71 | 72 | type_name = type(self).__name__ 73 | head_str = "\n".join(head_str) 74 | tail_str = "\n".join(tail_str) 75 | items_str = f"{head_str}\n{mid}\n{tail_str}" 76 | 77 | return f"{type_name}:{repr(self.type)}[{len(self)}]\n{items_str}".strip() 78 | 79 | 80 | class BoxArray(GeometryExtensionArray): 81 | def __repr__(self): 82 | type_name = type(self).__name__ 83 | items_str = "\n".join(repr(item.bounds) for item in self) 84 | return f"{type_name}:{repr(self.type)}[{len(self)}]\n{items_str}".strip() 85 | 86 | 87 | def array_cls_from_name(name): 88 | if name == "geoarrow.box": 89 | return BoxArray 90 | else: 91 | return GeometryExtensionArray 92 | 93 | 94 | # Inject array_cls_from_name exactly once to avoid circular import 95 | if GeometryExtensionType._array_cls_from_name is None: 96 | GeometryExtensionType._array_cls_from_name = array_cls_from_name 97 | 98 | 99 | def array(obj, type_=None, *args, **kwargs) -> GeometryExtensionArray: 100 | """Attempt to create an Array or ChunkedArray with a geoarrow extension type 101 | from ``obj``. This constructor attempts to perform the fewest transformations 102 | possible (i.e., WKB is left as WKB, WKT is left as WKT), whereas 103 | :func:`geoarrow.pyarrow.as_geoarrow` actively attempts a conversion to 104 | a geoarrow-encoding based on a common geometry type. GeoPandas objects are 105 | supported. This implementation relies heavily on ``pyarrow.array()`` and has 106 | similar behaviour. 107 | 108 | >>> import geoarrow.pyarrow as ga 109 | >>> ga.array(["POINT (0 1)"]) 110 | GeometryExtensionArray:WktType(geoarrow.wkt)[1] 111 | 112 | >>> ga.as_geoarrow(["POINT (0 1)"]) 113 | GeometryExtensionArray:PointType(geoarrow.point)[1] 114 | 115 | """ 116 | # Convert GeoPandas to WKB 117 | if type(obj).__name__ == "GeoSeries": 118 | if obj.crs: 119 | type_ = wkb().with_crs(obj.crs) 120 | else: 121 | type_ = wkb() 122 | 123 | # Prefer ISO WKB 124 | obj = obj.to_wkb(flavor="iso") 125 | 126 | # Convert obj to array if it isn't already one 127 | if isinstance(obj, pa.Array) or isinstance(obj, pa.ChunkedArray): 128 | arr = obj 129 | else: 130 | arr = pa.array(obj, *args, **kwargs) 131 | 132 | # Handle the case where we get to pick the type 133 | if type_ is None: 134 | if isinstance(arr.type, GeometryExtensionType): 135 | return arr 136 | elif arr.type == pa.utf8(): 137 | return wkt().wrap_array(arr) 138 | elif arr.type == pa.large_utf8(): 139 | return large_wkt().wrap_array(arr) 140 | elif arr.type == pa.binary(): 141 | return wkb().wrap_array(arr) 142 | elif arr.type == pa.large_binary(): 143 | return large_wkb().wrap_array(arr) 144 | else: 145 | raise TypeError( 146 | f"Can't create geoarrow.array from Arrow array of type {type_}" 147 | ) 148 | 149 | # Handle the case where the type requested is already the correct type 150 | if type_ == arr.type: 151 | return arr 152 | 153 | type_is_geoarrow = isinstance(type_, GeometryExtensionType) 154 | type_is_wkb_or_wkt = type_.extension_name in ("geoarrow.wkt", "geoarrow.wkb") 155 | 156 | if type_is_geoarrow and type_is_wkb_or_wkt: 157 | arr = arr.cast(type_.storage_type) 158 | return type_.wrap_array(arr) 159 | 160 | # Eventually we will be able to handle more types (e.g., parse wkt or wkb 161 | # into a geoarrow type) 162 | raise TypeError(f"Can't create geoarrow.array for type {type_}") 163 | -------------------------------------------------------------------------------- /geoarrow-pandas/tests/test_geoarrow_pandas_suite.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from pandas.tests.extension import base 3 | import geoarrow.pandas as gapd 4 | import geoarrow.pyarrow as ga 5 | import operator 6 | 7 | from pandas import ( 8 | Series, 9 | options, 10 | ) 11 | 12 | 13 | @pytest.fixture 14 | def dtype(): 15 | """A fixture providing the ExtensionDtype to validate.""" 16 | return gapd.GeoArrowExtensionDtype(ga.wkt()) 17 | 18 | 19 | @pytest.fixture 20 | def data(): 21 | """ 22 | Length-100 array for this type. 23 | 24 | * data[0] and data[1] should both be non missing 25 | * data[0] and data[1] should not be equal 26 | """ 27 | strings = [f"POINT ({i} {i + 1})" for i in range(100)] 28 | return gapd.GeoArrowExtensionArray(ga.array(strings)) 29 | 30 | 31 | @pytest.fixture 32 | def data_for_twos(): 33 | """Length-100 array in which all the elements are two.""" 34 | pytest.skip() 35 | 36 | 37 | @pytest.fixture 38 | def data_missing(): 39 | """Length-2 array with [NA, Valid]""" 40 | return gapd.GeoArrowExtensionArray([None, "POINT (0 1)"]) 41 | 42 | 43 | @pytest.fixture(params=["data", "data_missing"]) 44 | def all_data(request, data, data_missing): 45 | """Parametrized fixture giving 'data' and 'data_missing'""" 46 | if request.param == "data": 47 | return data 48 | elif request.param == "data_missing": 49 | return data_missing 50 | 51 | 52 | @pytest.fixture 53 | def data_repeated(data): 54 | """ 55 | Generate many datasets. 56 | 57 | Parameters 58 | ---------- 59 | data : fixture implementing `data` 60 | 61 | Returns 62 | ------- 63 | Callable[[int], Generator]: 64 | A callable that takes a `count` argument and 65 | returns a generator yielding `count` datasets. 66 | """ 67 | 68 | def gen(count): 69 | for _ in range(count): 70 | yield data 71 | 72 | return gen 73 | 74 | 75 | @pytest.fixture 76 | def data_for_sorting(): 77 | """ 78 | Length-3 array with a known sort order. 79 | 80 | This should be three items [B, C, A] with 81 | A < B < C 82 | """ 83 | pytest.skip() 84 | 85 | 86 | @pytest.fixture 87 | def data_missing_for_sorting(): 88 | """ 89 | Length-3 array with a known sort order. 90 | 91 | This should be three items [B, NA, A] with 92 | A < B and NA missing. 93 | """ 94 | pytest.skip() 95 | 96 | 97 | @pytest.fixture 98 | def na_cmp(): 99 | """ 100 | Binary operator for comparing NA values. 101 | 102 | Should return a function of two arguments that returns 103 | True if both arguments are (scalar) NA for your type. 104 | 105 | By default, uses ``operator.is_`` 106 | """ 107 | return operator.is_ 108 | 109 | 110 | @pytest.fixture 111 | def na_value(): 112 | """The scalar missing value for this type. Default 'None'""" 113 | return None 114 | 115 | 116 | @pytest.fixture 117 | def data_for_grouping(): 118 | """ 119 | Data for factorization, grouping, and unique tests. 120 | 121 | Expected to be like [B, B, NA, NA, A, A, B, C] 122 | 123 | Where A < B < C and NA is missing 124 | """ 125 | pytest.skip() 126 | 127 | 128 | @pytest.fixture(params=[True, False]) 129 | def box_in_series(request): 130 | """Whether to box the data in a Series""" 131 | return request.param 132 | 133 | 134 | @pytest.fixture( 135 | params=[ 136 | lambda x: 1, 137 | lambda x: [1] * len(x), 138 | lambda x: Series([1] * len(x)), 139 | lambda x: x, 140 | ], 141 | ids=["scalar", "list", "series", "object"], 142 | ) 143 | def groupby_apply_op(request): 144 | """ 145 | Functions to test groupby.apply(). 146 | """ 147 | return request.param 148 | 149 | 150 | @pytest.fixture(params=[True, False]) 151 | def as_frame(request): 152 | """ 153 | Boolean fixture to support Series and Series.to_frame() comparison testing. 154 | """ 155 | return request.param 156 | 157 | 158 | @pytest.fixture(params=[True, False]) 159 | def as_series(request): 160 | """ 161 | Boolean fixture to support arr and Series(arr) comparison testing. 162 | """ 163 | return request.param 164 | 165 | 166 | @pytest.fixture(params=[True, False]) 167 | def use_numpy(request): 168 | """ 169 | Boolean fixture to support comparison testing of ExtensionDtype array 170 | and numpy array. 171 | """ 172 | return request.param 173 | 174 | 175 | @pytest.fixture(params=["ffill", "bfill"]) 176 | def fillna_method(request): 177 | """ 178 | Parametrized fixture giving method parameters 'ffill' and 'bfill' for 179 | Series.fillna(method=) testing. 180 | """ 181 | return request.param 182 | 183 | 184 | @pytest.fixture(params=[True, False]) 185 | def as_array(request): 186 | """ 187 | Boolean fixture to support ExtensionDtype _from_sequence method testing. 188 | """ 189 | return request.param 190 | 191 | 192 | @pytest.fixture 193 | def invalid_scalar(data): 194 | """ 195 | A scalar that *cannot* be held by this ExtensionArray. 196 | 197 | The default should work for most subclasses, but is not guaranteed. 198 | 199 | If the array can hold any item (i.e. object dtype), then use pytest.skip. 200 | """ 201 | return object.__new__(object) 202 | 203 | 204 | @pytest.fixture 205 | def using_copy_on_write() -> bool: 206 | """ 207 | Fixture to check if Copy-on-Write is enabled. 208 | """ 209 | return options.mode.copy_on_write and options.mode.data_manager == "block" 210 | 211 | 212 | class TestGeoArrowDtype(base.BaseDtypeTests): 213 | pass 214 | 215 | 216 | class TestGeoArrowConstructors(base.BaseConstructorsTests): 217 | pass 218 | 219 | 220 | class TestGeoArrowGetItem(base.BaseGetitemTests): 221 | pass 222 | 223 | 224 | class TestGeoArrowMissing(base.BaseMissingTests): 225 | def test_fillna_scalar(self, data_missing): 226 | pytest.skip() 227 | 228 | def test_fillna_frame(self, data_missing): 229 | pytest.skip() 230 | 231 | def test_fillna_series(self, data_missing): 232 | pytest.skip() 233 | 234 | 235 | class TestGeoArrowMethods(base.BaseMethodsTests): 236 | def test_value_counts(self, all_data): 237 | pytest.skip() 238 | 239 | def test_value_counts_with_normalize(self, data): 240 | pytest.skip() 241 | 242 | def test_factorize_empty(self, data): 243 | pytest.skip() 244 | 245 | def test_fillna_copy_frame(self, data_missing): 246 | pytest.skip() 247 | 248 | def test_fillna_copy_series(self, data_missing): 249 | pytest.skip() 250 | 251 | def test_combine_first(self, data): 252 | pytest.skip() 253 | 254 | def test_shift_0_periods(self, data): 255 | pytest.skip() 256 | 257 | def test_where_series(self, data, na_value, as_frame): 258 | pytest.skip() 259 | 260 | 261 | class TestGeoArrowIndex(base.BaseIndexTests): 262 | pass 263 | 264 | 265 | class TestGeoArrowInterface(base.BaseInterfaceTests): 266 | def test_copy(self, data): 267 | pytest.skip() 268 | 269 | def test_view(self, data): 270 | pytest.skip() 271 | 272 | def test_array_interface_copy(self, data): 273 | pytest.skip() 274 | 275 | 276 | class TestGeoArrowParsing(base.BaseParsingTests): 277 | pass 278 | 279 | 280 | class TestGeoArrowPrinting(base.BasePrintingTests): 281 | pass 282 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # GeoArrow for Python 2 | 3 | The GeoArrow Python packages provide an implementation of the [GeoArrow specification](https://geoarrow.org) that integrates with [pyarrow](https://arrow.apache.org/docs/python). The GeoArrow Python bindings enable input/output to/from Arrow-friendly formats (e.g., Parquet, Arrow Stream, Arrow File) and general-purpose coordinate shuffling tools among GeoArrow, WKT, and WKB encodings. 4 | 5 | ## Installation 6 | 7 | Python bindings for GeoArrow are available on PyPI. You can install them with: 8 | 9 | ```bash 10 | pip install geoarrow-pyarrow 11 | ``` 12 | 13 | You can install the latest development version with: 14 | 15 | ```bash 16 | pip install "git+https://github.com/geoarrow/geoarrow-python.git#subdirectory=geoarrow-pyarrow" 17 | ``` 18 | 19 | If you can import the namespace, you're good to go! 20 | 21 | 22 | ```python 23 | import geoarrow.pyarrow as ga 24 | ``` 25 | 26 | ## Example 27 | 28 | The most important thing that `geoarrow.pyarrow` does is register pyarrow extension types so that metadata is kept intact when reading files or interacting with other libraries. For example, we can now read Arrow IPC files written with GeoArrow extension types and the CRS and geometry type is kept: 29 | 30 | 31 | ```python 32 | import pyarrow as pa 33 | import urllib.request 34 | 35 | url = "https://raw.githubusercontent.com/geoarrow/geoarrow-data/v0.2.0/natural-earth/files/natural-earth_cities_wkb.arrows" 36 | with urllib.request.urlopen(url) as f, pa.ipc.open_stream(f) as reader: 37 | tab = reader.read_all() 38 | 39 | tab.schema.field("geometry").type 40 | ``` 41 | 42 | 43 | 44 | 45 | WkbType(geoarrow.wkb ) 46 | 47 | 48 | 49 | Use `geoarrow.pyarrow.to_geopandas()` to convert to [geopandas](https://geopandas.org): 50 | 51 | 52 | ```python 53 | df = ga.to_geopandas(tab) 54 | df.geometry.crs 55 | ``` 56 | 57 | 58 | 59 | 60 | 61 | Name: WGS 84 62 | Axis Info [ellipsoidal]: 63 | - Lat[north]: Geodetic latitude (degree) 64 | - Lon[east]: Geodetic longitude (degree) 65 | Area of Use: 66 | - name: World. 67 | - bounds: (-180.0, -90.0, 180.0, 90.0) 68 | Datum: World Geodetic System 1984 ensemble 69 | - Ellipsoid: WGS 84 70 | - Prime Meridian: Greenwich 71 | 72 | 73 | 74 | ...and use `GeoDataFrame.to_arrow()` to get it back: 75 | 76 | 77 | ```python 78 | pa.table(df.to_arrow())["geometry"].type.crs 79 | ``` 80 | 81 | 82 | 83 | 84 | ProjJsonCrs(EPSG:4326) 85 | 86 | 87 | 88 | These Python bindings also include [GeoParquet](https://geoparquet.org) and [pyogrio](https://github.com/geopandas/pyogrio) integration for direct IO to/from pyarrow. This can be useful when loading data approaching the size of available memory as GeoPandas requires many times more memory for some types of data (notably: large numbers of points). 89 | 90 | 91 | ```python 92 | import geoarrow.pyarrow.io 93 | 94 | url = "https://raw.githubusercontent.com/geoarrow/geoarrow-data/v0.2.0/natural-earth/files/natural-earth_cities.fgb" 95 | geoarrow.pyarrow.io.read_pyogrio_table(url) 96 | ``` 97 | 98 | 99 | 100 | 101 | pyarrow.Table 102 | name: string 103 | geometry: extension> 104 | ---- 105 | name: [["Vatican City","San Marino","Vaduz","Lobamba","Luxembourg",...,"Rio de Janeiro","Sao Paulo","Sydney","Singapore","Hong Kong"]] 106 | geometry: [[010100000054E57B4622E828408B074AC09EF34440,0101000000DCB122B42FE228402376B7FCD1F74540,01010000006DAE9AE78808234032D989DC1D914740,01010000007BCB8B0233333F40289B728577773AC0,0101000000C08D39741F8518400F2153E34ACE4840,...,0101000000667B47AA269B45C002B53F5745E836C0,0101000000F15A536A405047C0C1148A19868E37C0,0101000000A286FD30CDE662401F04CF2989EF40C0,01010000003A387DE2A5F659409AF3E7363CB8F43F,0101000000D865F84FB78B5C40144438C1924E3640]] 107 | 108 | 109 | 110 | 111 | ```python 112 | url = "https://raw.githubusercontent.com/geoarrow/geoarrow-data/v0.2.0/natural-earth/files/natural-earth_cities_geo.parquet" 113 | local_filename, _ = urllib.request.urlretrieve(url) 114 | 115 | geoarrow.pyarrow.io.read_geoparquet_table(local_filename) 116 | ``` 117 | 118 | 119 | 120 | 121 | pyarrow.Table 122 | name: string 123 | geometry: extension> 124 | ---- 125 | name: [["Vatican City","San Marino","Vaduz","Lobamba","Luxembourg",...,"Rio de Janeiro","Sao Paulo","Sydney","Singapore","Hong Kong"]] 126 | geometry: [[010100000054E57B4622E828408B074AC09EF34440,0101000000DCB122B42FE228402376B7FCD1F74540,01010000006DAE9AE78808234032D989DC1D914740,01010000007BCB8B0233333F40289B728577773AC0,0101000000C08D39741F8518400F2153E34ACE4840,...,0101000000667B47AA269B45C002B53F5745E836C0,0101000000F15A536A405047C0C1148A19868E37C0,0101000000A286FD30CDE662401F04CF2989EF40C0,01010000003A387DE2A5F659409AF3E7363CB8F43F,0101000000D865F84FB78B5C40144438C1924E3640]] 127 | 128 | 129 | 130 | Finally, a number of compute functions are provided for common transformations required to create/consume arrays of geometries: 131 | 132 | 133 | ```python 134 | ga.format_wkt(tab["geometry"])[:5] 135 | ``` 136 | 137 | 138 | 139 | 140 | 141 | [ 142 | [ 143 | "POINT (12.4533865 41.9032822)", 144 | "POINT (12.4417702 43.9360958)", 145 | "POINT (9.5166695 47.1337238)", 146 | "POINT (31.1999971 -26.4666675)", 147 | "POINT (6.1300028 49.6116604)" 148 | ] 149 | ] 150 | 151 | 152 | 153 | ## Create/Consume GeoArrow Arrays 154 | 155 | The `geoarrow-pyarrow` package also provides a number of utilities for working with serialized and GeoArrow-native arrays. For example, you can create geoarrow-encoded `pyarrow.Array`s with `as_geoarrow()`: 156 | 157 | 158 | ```python 159 | ga.as_geoarrow(["POINT (0 1)"]) 160 | ``` 161 | 162 | 163 | 164 | 165 | GeometryExtensionArray:PointType(geoarrow.point)[1] 166 | 167 | 168 | 169 | 170 | This will work with: 171 | 172 | - An existing array created by geoarrow 173 | - A `geopandas.GeoSeries` 174 | - A `pyarrow.Array` or `pyarrow.ChunkedArray` (geoarrow text interpreted as well-known text; binary interpreted as well-known binary) 175 | - Anything that `pyarrow.array()` will convert to a text or binary array 176 | 177 | If there is no common geometry type among elements of the input, `as_geoarrow()` will fall back to well-known binary encoding. To explicitly convert to well-known text or binary, use `as_wkt()` or `as_wkb()`. 178 | 179 | Alternatively, you can construct GeoArrow arrays directly from a series of buffers as described in the specification: 180 | 181 | 182 | ```python 183 | import numpy as np 184 | 185 | ga.point().from_geobuffers( 186 | None, 187 | np.array([1.0, 2.0, 3.0]), 188 | np.array([3.0, 4.0, 5.0]) 189 | ) 190 | ``` 191 | 192 | 193 | 194 | 195 | GeometryExtensionArray:PointType(geoarrow.point)[3] 196 | 197 | 198 | 199 | 200 | 201 | 202 | 203 | ```python 204 | ga.point().with_coord_type(ga.CoordType.INTERLEAVED).from_geobuffers( 205 | None, 206 | np.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0]) 207 | ) 208 | ``` 209 | 210 | 211 | 212 | 213 | GeometryExtensionArray:PointType(interleaved geoarrow.point)[3] 214 | 215 | 216 | 217 | 218 | 219 | 220 | ## For Developers 221 | 222 | One of the challeneges with GeoArrow data is the large number of permutations between X, Y, Z, M, geometry types, and serialized encodings. The `geoarrow-types` package provides pure Python utilities to manage, compute on, and specify these types (or parts of them, as required). 223 | 224 | 225 | ```python 226 | import geoarrow.types as gt 227 | 228 | gt.TypeSpec.common( 229 | gt.Encoding.GEOARROW, 230 | gt.GeometryType.POINT, 231 | gt.GeometryType.MULTIPOINT, 232 | gt.Dimensions.XYM, 233 | gt.Dimensions.XYZ, 234 | ).to_pyarrow() 235 | ``` 236 | 237 | 238 | 239 | 240 | MultiPointType(geoarrow.multipoint_zm) 241 | 242 | 243 | 244 | ## Building 245 | 246 | Python bindings for geoarrow are managed with [setuptools](https://setuptools.pypa.io/en/latest/index.html). 247 | This means you can build the project using: 248 | 249 | ```shell 250 | git clone https://github.com/geoarrow/geoarrow-python.git 251 | pip install -e geoarrow-pyarrow/ geoarrow-types/ 252 | ``` 253 | 254 | Tests use [pytest](https://docs.pytest.org/): 255 | 256 | ```shell 257 | pytest 258 | ``` 259 | -------------------------------------------------------------------------------- /geoarrow-types/src/geoarrow/types/constants.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | from functools import reduce 3 | 4 | 5 | class TypeSpecEnum(Enum): 6 | def is_specified(self): 7 | return self is self.UNSPECIFIED 8 | 9 | @classmethod 10 | def create(cls, obj): 11 | if isinstance(obj, cls): 12 | return obj 13 | elif obj is None: 14 | return cls.UNSPECIFIED 15 | elif isinstance(obj, str): 16 | return cls[obj.upper()] 17 | else: 18 | raise TypeError( 19 | f"Can't create {cls.__name__} from object of type {type(obj).__name__}" 20 | ) 21 | 22 | @classmethod 23 | def coalesce(cls, *args): 24 | return reduce(cls._coalesce2, args, cls.UNSPECIFIED) 25 | 26 | @classmethod 27 | def coalesce_unspecified(cls, *args): 28 | return reduce(cls._coalesce_unspecified2, args, cls.UNSPECIFIED) 29 | 30 | @classmethod 31 | def common(cls, *args): 32 | return reduce(cls._common2, args, cls.UNSPECIFIED) 33 | 34 | @classmethod 35 | def _coalesce2(cls, value, default): 36 | if value == cls.UNSPECIFIED: 37 | return default 38 | else: 39 | return value 40 | 41 | @classmethod 42 | def _coalesce_unspecified2(cls, lhs, rhs): 43 | if lhs == rhs: 44 | return lhs 45 | elif lhs == cls.UNSPECIFIED: 46 | return rhs 47 | elif rhs == cls.UNSPECIFIED: 48 | return lhs 49 | else: 50 | raise ValueError(f"{cls.__name__} is overspecified ({lhs} and {rhs})") 51 | 52 | @classmethod 53 | def _common2(cls, lhs, rhs): 54 | if lhs == cls.UNSPECIFIED: 55 | return rhs 56 | elif rhs == cls.UNSPECIFIED: 57 | return lhs 58 | elif lhs == rhs: 59 | return lhs 60 | elif (lhs, rhs) in _VALUE_COMMON_HELPER: 61 | return _VALUE_COMMON_HELPER[(lhs, rhs)] 62 | elif (rhs, lhs) in _VALUE_COMMON_HELPER: 63 | return _VALUE_COMMON_HELPER[(rhs, lhs)] 64 | else: 65 | return None 66 | 67 | 68 | class Encoding(TypeSpecEnum): 69 | """Constants for encoding type. 70 | 71 | Examples 72 | -------- 73 | 74 | >>> from geoarrow import types 75 | >>> types.Encoding.GEOARROW 76 | 77 | """ 78 | 79 | UNSPECIFIED = 0 80 | """Unknown or uninitialized encoding""" 81 | 82 | WKB = 1 83 | """Well-known binary encoding with a maximum of 2GB of data per array chunk""" 84 | 85 | LARGE_WKB = 2 86 | """Well-known binary encoding""" 87 | 88 | WKT = 3 89 | """Well-known text encoding with a maximum of 2GB of data per array chunk""" 90 | 91 | LARGE_WKT = 4 92 | """Well-known text encoding with 64-bit offsets""" 93 | 94 | WKB_VIEW = 5 95 | """Well-known binary encoding using binary views as a storage type""" 96 | 97 | WKT_VIEW = 6 98 | """Well-known binary encoding using string views as a storage type""" 99 | 100 | GEOARROW = 7 101 | """GeoArrow native nested list encoding""" 102 | 103 | def is_serialized(self): 104 | return self in ( 105 | Encoding.WKB, 106 | Encoding.LARGE_WKB, 107 | Encoding.WKT, 108 | Encoding.LARGE_WKT, 109 | Encoding.WKB_VIEW, 110 | Encoding.WKT_VIEW, 111 | ) 112 | 113 | 114 | class GeometryType(TypeSpecEnum): 115 | """Constants for geometry type. These values are the same as those used 116 | in well-known binary (i.e, 0-7). 117 | 118 | Examples 119 | -------- 120 | 121 | >>> from geoarrow import types 122 | >>> types.GeometryType.MULTIPOINT 123 | 124 | """ 125 | 126 | UNSPECIFIED = -1 127 | """Unspecified geometry type""" 128 | 129 | GEOMETRY = 0 130 | """Unknown or mixed geometry type""" 131 | 132 | POINT = 1 133 | """Point geometry type""" 134 | 135 | LINESTRING = 2 136 | """Linestring geometry type""" 137 | 138 | POLYGON = 3 139 | """Polygon geometry type""" 140 | 141 | MULTIPOINT = 4 142 | """Multipoint geometry type""" 143 | 144 | MULTILINESTRING = 5 145 | """Multilinestring geometry type""" 146 | 147 | MULTIPOLYGON = 6 148 | """Multipolygon geometry type""" 149 | 150 | GEOMETRYCOLLECTION = 7 151 | """Geometry collection geometry type""" 152 | 153 | BOX = 990 154 | """Box geometry type""" 155 | 156 | @classmethod 157 | def _common2(cls, lhs, rhs): 158 | out = super()._common2(lhs, rhs) 159 | if out is not None: 160 | return out 161 | else: 162 | return cls.GEOMETRY 163 | 164 | 165 | class Dimensions(TypeSpecEnum): 166 | """Constants for dimensions. 167 | 168 | Examples 169 | -------- 170 | 171 | >>> from geoarrow import types 172 | >>> types.Dimensions.XYZM 173 | 174 | """ 175 | 176 | UNSPECIFIED = -1 177 | """Unspecified dimensions""" 178 | 179 | UNKNOWN = 0 180 | """Unknown or mixed dimensions""" 181 | 182 | XY = 1 183 | """XY dimensions""" 184 | 185 | XYZ = 2 186 | """XYZ dimensions""" 187 | 188 | XYM = 3 189 | """XYM dimensions""" 190 | 191 | XYZM = 4 192 | """XYZM dimensions""" 193 | 194 | def count(self): 195 | if self in (Dimensions.UNSPECIFIED, Dimensions.UNKNOWN): 196 | return 0 197 | else: 198 | return len(self.name) 199 | 200 | @classmethod 201 | def _common2(cls, lhs, rhs): 202 | out = super()._common2(lhs, rhs) 203 | if out is not None: 204 | return out 205 | else: 206 | return cls.UNKNOWN 207 | 208 | 209 | class CoordType(TypeSpecEnum): 210 | """Constants for coordinate type. 211 | 212 | Examples 213 | -------- 214 | 215 | >>> from geoarrow import types 216 | >>> types.CoordType.INTERLEAVED 217 | 218 | """ 219 | 220 | UNSPECIFIED = 0 221 | """"Unknown or uninitialized coordinate type""" 222 | 223 | SEPARATED = 1 224 | """Coordinate type composed of separate arrays for each dimension 225 | (i.e., a struct) 226 | """ 227 | 228 | INTERLEAVED = 2 229 | """Coordinate type composed of a single array containing all dimensions 230 | (i.e., a fixed-size list) 231 | """ 232 | 233 | 234 | class EdgeType(TypeSpecEnum): 235 | """Constants for edge type. 236 | 237 | Examples 238 | -------- 239 | 240 | >>> from geoarrow import types 241 | >>> types.EdgeType.SPHERICAL 242 | 243 | """ 244 | 245 | UNSPECIFIED = 0 246 | """Unknown or ininitialized edge type""" 247 | 248 | PLANAR = 1 249 | """Edges form a Cartesian line on a plane""" 250 | 251 | SPHERICAL = 2 252 | """Edges are geodesic on a sphere""" 253 | 254 | VINCENTY = 3 255 | """Edges are geodesic on a spheroid according to the Vincenty algorithm""" 256 | 257 | THOMAS = 4 258 | """Edges are geodesic on a spheroid according to the Thomas algorithm""" 259 | 260 | ANDOYER = 5 261 | """Edges are geodesic on a spheroid according to the Andoyer algorithm""" 262 | 263 | KARNEY = 6 264 | """Edges are geodesic on a spheroid according to the Karney algorithm""" 265 | 266 | 267 | _VALUE_COMMON_HELPER = { 268 | (Encoding.WKB, Encoding.LARGE_WKB): Encoding.LARGE_WKB, 269 | (Encoding.WKB, Encoding.WKT): Encoding.WKB, 270 | (Encoding.WKB, Encoding.LARGE_WKT): Encoding.LARGE_WKB, 271 | (Encoding.WKB, Encoding.WKB_VIEW): Encoding.WKB_VIEW, 272 | (Encoding.WKB, Encoding.GEOARROW): Encoding.WKB, 273 | (Encoding.WKB_VIEW, Encoding.LARGE_WKB): Encoding.WKB_VIEW, 274 | (Encoding.WKT, Encoding.LARGE_WKT): Encoding.LARGE_WKT, 275 | (Encoding.WKT, Encoding.LARGE_WKB): Encoding.LARGE_WKB, 276 | (Encoding.WKT, Encoding.WKT_VIEW): Encoding.WKT_VIEW, 277 | (Encoding.WKT, Encoding.GEOARROW): Encoding.WKB, 278 | (Encoding.WKT_VIEW, Encoding.LARGE_WKT): Encoding.WKT_VIEW, 279 | (GeometryType.POINT, GeometryType.MULTIPOINT): GeometryType.MULTIPOINT, 280 | ( 281 | GeometryType.LINESTRING, 282 | GeometryType.MULTILINESTRING, 283 | ): GeometryType.MULTILINESTRING, 284 | (GeometryType.POLYGON, GeometryType.MULTIPOLYGON): GeometryType.MULTIPOLYGON, 285 | (Dimensions.XY, Dimensions.XYZ): Dimensions.XYZ, 286 | (Dimensions.XY, Dimensions.XYM): Dimensions.XYM, 287 | (Dimensions.XY, Dimensions.XYZM): Dimensions.XYZM, 288 | (Dimensions.XYZ, Dimensions.XYM): Dimensions.XYZM, 289 | (Dimensions.XYM, Dimensions.XYZM): Dimensions.XYZM, 290 | } 291 | -------------------------------------------------------------------------------- /geoarrow-pyarrow/tests/test_dataset.py: -------------------------------------------------------------------------------- 1 | from tempfile import TemporaryDirectory 2 | 3 | import pyarrow as pa 4 | import pyarrow.dataset as ds 5 | import pyarrow.parquet as pq 6 | import pytest 7 | 8 | import geoarrow.pyarrow as ga 9 | import geoarrow.pyarrow.dataset as gads 10 | 11 | 12 | def test_geodataset_column_name_guessing(): 13 | table = pa.table([ga.array(["POINT (0.5 1.5)"])], ["geometry"]) 14 | geods = gads.dataset(table) 15 | assert geods.geometry_columns == ("geometry",) 16 | 17 | 18 | def test_geodataset_column_type_guessing(): 19 | # Already a geoarrow type 20 | table = pa.table([ga.array(["POINT (0.5 1.5)"])], ["geometry"]) 21 | geods = gads.dataset(table, geometry_columns=["geometry"]) 22 | assert geods.geometry_types == (ga.wkt(),) 23 | 24 | # utf8 maps to wkt 25 | table = pa.table([ga.array(["POINT (0.5 1.5)"]).storage], ["geometry"]) 26 | geods = gads.dataset(table, geometry_columns=["geometry"]) 27 | assert geods.geometry_types == (ga.wkt(),) 28 | 29 | # binary maps to wkb 30 | table = pa.table([ga.as_wkb(["POINT (0.5 1.5)"]).storage], ["geometry"]) 31 | geods = gads.dataset(table, geometry_columns=["geometry"]) 32 | assert geods.geometry_types == (ga.wkb(),) 33 | 34 | # Error for other types 35 | with pytest.raises(TypeError): 36 | table = pa.table([[123]], ["geometry"]) 37 | geods = gads.dataset(table, geometry_columns=["geometry"]) 38 | geods.geometry_types 39 | 40 | 41 | def test_geodataset_in_memory(): 42 | table1 = pa.table([ga.array(["POINT (0.5 1.5)"])], ["geometry"]) 43 | table2 = pa.table([ga.array(["POINT (2.5 3.5)"])], ["geometry"]) 44 | 45 | geods = gads.dataset([table1, table2]) 46 | assert isinstance(geods._parent, ds.InMemoryDataset) 47 | assert len(list(geods._parent.get_fragments())) == 2 48 | 49 | filtered1 = geods.filter_fragments("POLYGON ((2 3, 3 3, 3 4, 2 4, 2 3))") 50 | assert isinstance(filtered1, gads.GeoDataset) 51 | assert filtered1.to_table().num_rows == 1 52 | assert filtered1._index.column("_fragment_index") == pa.chunked_array([[0]]) 53 | assert filtered1._index.column("geometry") == geods._index.column("geometry").take( 54 | [1] 55 | ) 56 | 57 | # Make sure we can filter to empty 58 | filtered0 = geods.filter_fragments("POLYGON ((0 0, 0 1, 1 1, 1 0, 0 0))") 59 | assert filtered0.to_table().num_rows == 0 60 | 61 | with pytest.raises(TypeError): 62 | gads.dataset([table1], use_row_groups=True) 63 | 64 | 65 | def test_geodataset_in_memory_guessed_type(): 66 | table1 = pa.table([ga.array(["POINT (0.5 1.5)"]).storage], ["geometry"]) 67 | table2 = pa.table([ga.array(["POINT (2.5 3.5)"]).storage], ["geometry"]) 68 | geods = gads.dataset([table1, table2], geometry_columns=["geometry"]) 69 | 70 | filtered1 = geods.filter_fragments("POLYGON ((2 3, 3 3, 3 4, 2 4, 2 3))") 71 | assert filtered1.to_table().num_rows == 1 72 | 73 | 74 | def test_geodataset_multiple_geometry_columns(): 75 | table1 = pa.table( 76 | [ga.array(["POINT (0.5 1.5)"]), ga.array(["POINT (2.5 3.5)"])], 77 | ["geometry1", "geometry2"], 78 | ) 79 | table2 = pa.table( 80 | [ga.array(["POINT (4.5 5.5)"]), ga.array(["POINT (6.5 7.5)"])], 81 | ["geometry1", "geometry2"], 82 | ) 83 | 84 | geods = gads.dataset([table1, table2]) 85 | assert isinstance(geods._parent, ds.InMemoryDataset) 86 | assert len(list(geods._parent.get_fragments())) == 2 87 | 88 | filtered1 = geods.filter_fragments("POLYGON ((0 1, 1 1, 1 2, 0 2, 0 1))").to_table() 89 | assert filtered1.num_rows == 1 90 | 91 | filtered2 = geods.filter_fragments("POLYGON ((2 3, 3 3, 3 4, 2 4, 2 3))").to_table() 92 | assert filtered2.num_rows == 1 93 | 94 | 95 | def test_geodataset_parquet(): 96 | table1 = pa.table([ga.array(["POINT (0.5 1.5)"])], ["geometry"]) 97 | table2 = pa.table([ga.array(["POINT (2.5 3.5)"])], ["geometry"]) 98 | with TemporaryDirectory() as td: 99 | pq.write_table(table1, f"{td}/table1.parquet") 100 | pq.write_table(table2, f"{td}/table2.parquet") 101 | geods = gads.dataset( 102 | [f"{td}/table1.parquet", f"{td}/table2.parquet"], use_row_groups=False 103 | ) 104 | 105 | filtered1 = geods.filter_fragments( 106 | "POLYGON ((0 1, 1 1, 1 2, 0 2, 0 1))" 107 | ).to_table() 108 | assert filtered1.num_rows == 1 109 | 110 | 111 | def test_geodataset_parquet_rowgroups(): 112 | table = pa.table([ga.array(["POINT (0.5 1.5)", "POINT (2.5 3.5)"])], ["geometry"]) 113 | with TemporaryDirectory() as td: 114 | pq.write_table(table, f"{td}/table.parquet", row_group_size=1) 115 | 116 | geods = gads.dataset(f"{td}/table.parquet") 117 | assert isinstance(geods, gads.ParquetRowGroupGeoDataset) 118 | assert len(geods.get_fragments()) == 2 119 | 120 | filtered1 = geods.filter_fragments("POLYGON ((2 3, 3 3, 3 4, 2 4, 2 3))") 121 | assert isinstance(filtered1, gads.ParquetRowGroupGeoDataset) 122 | assert filtered1.to_table().num_rows == 1 123 | assert filtered1._index.column("_fragment_index") == pa.chunked_array([[0]]) 124 | assert filtered1._index.column("geometry") == geods._index.column( 125 | "geometry" 126 | ).take([1]) 127 | 128 | assert filtered1._row_group_ids == [1] 129 | 130 | 131 | def test_geodataset_parquet_index_rowgroups(): 132 | array_wkt = ga.array( 133 | ["LINESTRING (0.5 1.5, 2.5 3.5)", "LINESTRING (4.5 5.5, 6.5 7.5)"] 134 | ) 135 | array_geoarrow = ga.as_geoarrow( 136 | ["LINESTRING (8.5 9.5, 10.5 11.5)", "LINESTRING (12.5 13.5, 14.5 15.5)"] 137 | ) 138 | 139 | table_wkt = pa.table([array_wkt], ["geometry"]) 140 | table_geoarrow = pa.table([array_geoarrow], ["geometry"]) 141 | table_both = pa.table( 142 | [array_wkt, array_geoarrow], ["geometry_wkt", "geometry_geoarrow"] 143 | ) 144 | 145 | with TemporaryDirectory() as td: 146 | pq.write_table(table_wkt, f"{td}/table_wkt.parquet", row_group_size=1) 147 | pq.write_table(table_geoarrow, f"{td}/table_geoarrow.parquet", row_group_size=1) 148 | pq.write_table( 149 | table_geoarrow, 150 | f"{td}/table_geoarrow_nostats.parquet", 151 | row_group_size=1, 152 | write_statistics=False, 153 | ) 154 | pq.write_table(table_both, f"{td}/table_both.parquet", row_group_size=1) 155 | 156 | ds_wkt = gads.dataset(f"{td}/table_wkt.parquet") 157 | ds_geoarrow = gads.dataset(f"{td}/table_geoarrow.parquet") 158 | ds_geoarrow_nostats = gads.dataset(f"{td}/table_geoarrow_nostats.parquet") 159 | ds_both = gads.dataset(f"{td}/table_both.parquet") 160 | 161 | index_wkt = ds_wkt.index_fragments() 162 | index_geoarrow = ds_geoarrow.index_fragments() 163 | index_geoarrow_nostats = ds_geoarrow_nostats.index_fragments() 164 | index_both = ds_both.index_fragments() 165 | 166 | # All the fragment indices should be the same 167 | assert index_geoarrow.column(0) == index_wkt.column(0) 168 | assert index_geoarrow_nostats.column(0) == index_wkt.column(0) 169 | assert index_both.column(0) == index_wkt.column(0) 170 | 171 | # The wkt index should be the same in index_both and index_wkt 172 | assert index_both.column("geometry_wkt") == index_wkt.column("geometry") 173 | 174 | # The geoarrow index should be the same everywhere 175 | assert index_geoarrow_nostats.column("geometry") == index_geoarrow.column( 176 | "geometry" 177 | ) 178 | assert index_both.column("geometry_geoarrow") == index_geoarrow.column( 179 | "geometry" 180 | ) 181 | 182 | 183 | def test_geodataset_parquet_filter_rowgroups_with_stats(): 184 | arr = ga.as_geoarrow(["POINT (0.5 1.5)", "POINT (2.5 3.5)"]) 185 | table = pa.table([arr], ["geometry"]) 186 | with TemporaryDirectory() as td: 187 | pq.write_table(table, f"{td}/table.parquet", row_group_size=1) 188 | 189 | geods = gads.dataset(f"{td}/table.parquet") 190 | assert len(geods.get_fragments()) == 2 191 | 192 | geods._build_index_using_stats(["geometry"]) 193 | 194 | filtered1 = geods.filter_fragments( 195 | "POLYGON ((0 1, 1 1, 1 2, 0 2, 0 1))" 196 | ).to_table() 197 | assert filtered1.num_rows == 1 198 | 199 | 200 | def test_parquet_fields_before(): 201 | schema = pa.schema([pa.field("col1", pa.int32()), pa.field("col2", pa.int32())]) 202 | fields_before = gads.ParquetRowGroupGeoDataset._count_fields_before(schema) 203 | assert fields_before == [(("col1",), 0), (("col2",), 1)] 204 | 205 | schema = pa.schema( 206 | [pa.field("col1", pa.list_(pa.int32())), pa.field("col2", pa.int32())] 207 | ) 208 | fields_before = gads.ParquetRowGroupGeoDataset._count_fields_before(schema) 209 | assert fields_before == [(("col1",), 0), (("col1", "item"), 0), (("col2",), 1)] 210 | 211 | schema = pa.schema( 212 | [pa.field("col1", ga.linestring()), pa.field("col2", pa.int32())] 213 | ) 214 | fields_before = gads.ParquetRowGroupGeoDataset._count_fields_before(schema) 215 | assert fields_before == [ 216 | (("col1",), 0), 217 | (("col1", "vertices"), 0), 218 | (("col1", "vertices", "x"), 0), 219 | (("col1", "vertices", "y"), 1), 220 | (("col2",), 2), 221 | ] 222 | -------------------------------------------------------------------------------- /geoarrow-pandas/tests/test_geoarrow_pandas.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | import pytest 4 | 5 | import pandas as pd 6 | import pyarrow as pa 7 | import geoarrow.pandas as gapd 8 | import geoarrow.pyarrow as ga 9 | import numpy as np 10 | 11 | 12 | def test_version(): 13 | assert re.match(r"^[0-9]+\.[0-9]+", gapd.__version__) 14 | 15 | 16 | def test_dtype_constructor(): 17 | from_pyarrow = gapd.GeoArrowExtensionDtype(ga.point()) 18 | assert from_pyarrow.name == "geoarrow.point" 19 | 20 | from_spec = gapd.GeoArrowExtensionDtype(ga.point().spec) 21 | assert from_spec.name == "geoarrow.point" 22 | 23 | from_dtype = gapd.GeoArrowExtensionDtype(from_spec) 24 | assert from_dtype.name == "geoarrow.point" 25 | 26 | with pytest.raises(TypeError): 27 | gapd.GeoArrowExtensionDtype(b"1234") 28 | 29 | 30 | def test_dtype_strings(): 31 | dtype = gapd.GeoArrowExtensionDtype(ga.point()) 32 | assert str(dtype) == "geoarrow.point" 33 | dtype2 = gapd.GeoArrowExtensionDtype.construct_from_string(str(dtype)) 34 | assert dtype2 == dtype 35 | 36 | dtype = gapd.GeoArrowExtensionDtype(ga.point().with_crs(ga.OGC_CRS84)) 37 | assert ( 38 | str(dtype) 39 | == 'geoarrow.point{"crs": ' 40 | + ga.OGC_CRS84.to_json() 41 | + ', "crs_type": "projjson"}' 42 | ) 43 | dtype2 = gapd.GeoArrowExtensionDtype.construct_from_string(str(dtype)) 44 | assert dtype2 == dtype 45 | 46 | dtype = gapd.GeoArrowExtensionDtype( 47 | ga.point().with_coord_type(ga.CoordType.INTERLEAVED) 48 | ) 49 | assert str(dtype) == "geoarrow.point[interleaved]" 50 | dtype2 = gapd.GeoArrowExtensionDtype.construct_from_string(str(dtype)) 51 | assert dtype2 == dtype 52 | 53 | dtype = gapd.GeoArrowExtensionDtype(ga.point().with_dimensions(ga.Dimensions.XYZ)) 54 | assert str(dtype) == "geoarrow.point[z]" 55 | dtype2 = gapd.GeoArrowExtensionDtype.construct_from_string(str(dtype)) 56 | assert dtype2 == dtype 57 | 58 | 59 | def test_scalar(): 60 | scalar_from_wkt = gapd.GeoArrowExtensionScalar("POINT (0 1)") 61 | assert scalar_from_wkt.wkt == "POINT (0 1)" 62 | assert isinstance(scalar_from_wkt.wkb, bytes) 63 | assert str(scalar_from_wkt) == "POINT (0 1)" 64 | assert repr(scalar_from_wkt) == 'GeoArrowExtensionScalar("POINT (0 1)")' 65 | 66 | scalar_from_wkb = gapd.GeoArrowExtensionScalar(scalar_from_wkt.wkb) 67 | assert scalar_from_wkb == scalar_from_wkt 68 | 69 | scalar_from_scalar = gapd.GeoArrowExtensionScalar(scalar_from_wkt) 70 | assert scalar_from_scalar == scalar_from_wkt 71 | 72 | array = ga.as_geoarrow(["POINT (0 1)", "POINT (1 2)"]) 73 | scalar_from_array0 = gapd.GeoArrowExtensionScalar(array, 0) 74 | assert scalar_from_array0 == scalar_from_wkt 75 | 76 | scalar_from_array1 = gapd.GeoArrowExtensionScalar(array, 1) 77 | assert scalar_from_array1 == gapd.GeoArrowExtensionScalar("POINT (1 2)") 78 | 79 | 80 | def test_array_init_without_type(): 81 | array = gapd.GeoArrowExtensionArray(["POINT (0 1)"]) 82 | assert array._data == ga.array(["POINT (0 1)"]) 83 | assert array._dtype._parent.extension_name == "geoarrow.wkt" 84 | 85 | 86 | def test_array_init_with_type(): 87 | array = gapd.GeoArrowExtensionArray(["POINT (0 1)"], ga.wkt()) 88 | assert array._data == ga.array(["POINT (0 1)"], ga.wkt()) 89 | assert array._dtype._parent.extension_name == "geoarrow.wkt" 90 | 91 | 92 | def test_array_basic_methods(): 93 | pa_array = ga.array(["POINT (0 1)", "POINT (1 2)", None]) 94 | array = gapd.GeoArrowExtensionArray(pa_array) 95 | 96 | assert array[0] == gapd.GeoArrowExtensionScalar("POINT (0 1)") 97 | assert array[2] is None 98 | assert isinstance(array[1:2], gapd.GeoArrowExtensionArray) 99 | assert len(array[1:2]) == 1 100 | assert array[1:2][0] == gapd.GeoArrowExtensionScalar("POINT (1 2)") 101 | assert isinstance(array[[1]], gapd.GeoArrowExtensionArray) 102 | assert array[[1]][0] == gapd.GeoArrowExtensionScalar("POINT (1 2)") 103 | 104 | assert len(array) == 3 105 | assert all(array[:2] == array[:2]) 106 | assert array.dtype == gapd.GeoArrowExtensionDtype(ga.wkt()) 107 | assert array.nbytes == pa_array.nbytes 108 | assert isinstance(array.take(np.array([1])), gapd.GeoArrowExtensionArray) 109 | assert array.take(np.array([1]))[0] == gapd.GeoArrowExtensionScalar("POINT (1 2)") 110 | np.testing.assert_array_equal(array.isna(), np.array([False, False, True])) 111 | 112 | assert isinstance(array.copy(), gapd.GeoArrowExtensionArray) 113 | assert array.copy()[0] == gapd.GeoArrowExtensionScalar("POINT (0 1)") 114 | 115 | np.testing.assert_array_equal( 116 | array.to_numpy(), 117 | np.array( 118 | [ 119 | gapd.GeoArrowExtensionScalar("POINT (0 1)"), 120 | gapd.GeoArrowExtensionScalar("POINT (1 2)"), 121 | None, 122 | ] 123 | ), 124 | ) 125 | 126 | 127 | def test_array_basic_methods_chunked_data(): 128 | pa_array = ga.array(["POINT (0 1)", "POINT (1 2)", None]) 129 | array = gapd.GeoArrowExtensionArray(pa.chunked_array([pa_array])) 130 | 131 | assert array[0] == gapd.GeoArrowExtensionScalar("POINT (0 1)") 132 | assert array[2] is None 133 | assert isinstance(array[1:2], gapd.GeoArrowExtensionArray) 134 | assert len(array[1:2]) == 1 135 | assert array[1:2][0] == gapd.GeoArrowExtensionScalar("POINT (1 2)") 136 | assert isinstance(array[[1]], gapd.GeoArrowExtensionArray) 137 | assert array[[1]][0] == gapd.GeoArrowExtensionScalar("POINT (1 2)") 138 | 139 | assert len(array) == 3 140 | assert all(array[:2] == array[:2]) 141 | assert array.dtype == gapd.GeoArrowExtensionDtype(ga.wkt()) 142 | assert array.nbytes == pa_array.nbytes 143 | assert isinstance(array.take(np.array([1])), gapd.GeoArrowExtensionArray) 144 | assert array.take(np.array([1]))[0] == gapd.GeoArrowExtensionScalar("POINT (1 2)") 145 | np.testing.assert_array_equal(array.isna(), np.array([False, False, True])) 146 | 147 | assert isinstance(array.copy(), gapd.GeoArrowExtensionArray) 148 | assert array.copy()[0] == gapd.GeoArrowExtensionScalar("POINT (0 1)") 149 | 150 | np.testing.assert_array_equal( 151 | array.to_numpy(), 152 | np.array( 153 | [ 154 | gapd.GeoArrowExtensionScalar("POINT (0 1)"), 155 | gapd.GeoArrowExtensionScalar("POINT (1 2)"), 156 | None, 157 | ] 158 | ), 159 | ) 160 | 161 | 162 | def test_array_concat(): 163 | pa_array_wkt = ga.array(["POINT (0 1)", "POINT (1 2)", None]) 164 | array_wkt = gapd.GeoArrowExtensionArray(pa_array_wkt) 165 | array_wkt_chunkned = gapd.GeoArrowExtensionArray(pa.chunked_array([array_wkt])) 166 | pa_array_geoarrow = ga.as_geoarrow(pa_array_wkt) 167 | array_geoarrow = gapd.GeoArrowExtensionArray(pa_array_geoarrow) 168 | 169 | concatenated0 = gapd.GeoArrowExtensionArray._concat_same_type([]) 170 | assert concatenated0.dtype == gapd.GeoArrowExtensionDtype(ga.wkb()) 171 | assert len(concatenated0) == 0 172 | 173 | concatenated1 = gapd.GeoArrowExtensionArray._concat_same_type([array_wkt]) 174 | assert concatenated1 is array_wkt 175 | 176 | concatenated_same_type = gapd.GeoArrowExtensionArray._concat_same_type( 177 | [array_wkt, array_wkt_chunkned] 178 | ) 179 | assert concatenated_same_type.dtype == array_wkt.dtype 180 | assert len(concatenated_same_type) == 6 181 | 182 | concatenated_diff_type = gapd.GeoArrowExtensionArray._concat_same_type( 183 | [array_wkt, array_geoarrow] 184 | ) 185 | assert concatenated_diff_type.dtype == gapd.GeoArrowExtensionDtype(ga.wkb()) 186 | assert len(concatenated_diff_type) == 6 187 | 188 | 189 | def test_accessor_parse_all(): 190 | series = pd.Series(["POINT (0 1)"]) 191 | assert series.geoarrow.parse_all() is series 192 | with pytest.raises(Exception, match="Expected geometry type at byte 0"): 193 | pd.Series(["NOT WKT"]).geoarrow.parse_all() 194 | 195 | 196 | def test_accessor_as_wkt(): 197 | ga_series = pd.Series(["POINT (0 1)"]).geoarrow.as_wkt() 198 | assert isinstance(ga_series.dtype.pyarrow_dtype, ga.WktType) 199 | 200 | 201 | def test_accessor_as_wkb(): 202 | ga_series = pd.Series(["POINT (0 1)"]).geoarrow.as_wkb() 203 | assert isinstance(ga_series.dtype.pyarrow_dtype, ga.WkbType) 204 | 205 | 206 | def test_accessor_format_wkt(): 207 | with pytest.raises(TypeError): 208 | pd.Series(["POINT (0 1)"]).geoarrow.format_wkt() 209 | 210 | ga_series = pd.Series(["POINT (0 1)"]).geoarrow.as_geoarrow().geoarrow.format_wkt() 211 | assert ga_series.dtype.pyarrow_dtype == pa.utf8() 212 | 213 | 214 | def test_accessor_format_wkb(): 215 | with pytest.raises(TypeError): 216 | pd.Series(["POINT (0 1)"]).geoarrow.format_wkb() 217 | 218 | ga_series = pd.Series(["POINT (0 1)"]).geoarrow.as_geoarrow().geoarrow.format_wkb() 219 | assert ga_series.dtype.pyarrow_dtype == pa.binary() 220 | 221 | # Currently handles ChunkedArray explicitly 222 | chunked = pa.chunked_array([ga.array(["POINT (0 1)"])]) 223 | ga_series = chunked.to_pandas().geoarrow.format_wkb() 224 | assert ga_series.dtype.pyarrow_dtype == pa.binary() 225 | 226 | 227 | def test_accessor_as_geoarrow(): 228 | ga_series = pd.Series(["POINT (0 1)"]).geoarrow.as_geoarrow() 229 | assert isinstance(ga_series.dtype.pyarrow_dtype, ga.PointType) 230 | 231 | 232 | def test_accessor_bounds(): 233 | df = pd.Series(["POINT (0 1)"]).geoarrow.bounds() 234 | assert isinstance(df, pd.DataFrame) 235 | assert df.xmin[0] == 0 236 | assert df.ymin[0] == 1 237 | assert df.xmax[0] == 0 238 | assert df.ymax[0] == 1 239 | 240 | 241 | def test_accessor_total_bounds(): 242 | df = pd.Series(["POINT (0 1)"]).geoarrow.total_bounds() 243 | assert isinstance(df, pd.DataFrame) 244 | assert df.xmin[0] == 0 245 | assert df.ymin[0] == 1 246 | assert df.xmax[0] == 0 247 | assert df.ymax[0] == 1 248 | 249 | 250 | def test_accessor_point_coords(): 251 | series = pd.Series(["POINT (0 1)", "POINT (1 2)"]) 252 | x, y = series.geoarrow.point_coords() 253 | np.testing.assert_array_equal(np.array(x), np.array([0.0, 1.0])) 254 | np.testing.assert_array_equal(np.array(y), np.array([1.0, 2.0])) 255 | 256 | 257 | def test_accessor_with_coord_type(): 258 | ga_series = pd.Series(["POINT (0 1)"]).geoarrow.with_coord_type( 259 | ga.CoordType.INTERLEAVED 260 | ) 261 | assert ga_series.dtype.pyarrow_dtype.coord_type == ga.CoordType.INTERLEAVED 262 | 263 | 264 | def test_accessor_with_edge_type(): 265 | ga_series = pd.Series(["POINT (0 1)"]).geoarrow.with_edge_type( 266 | ga.EdgeType.SPHERICAL 267 | ) 268 | assert ga_series.dtype.pyarrow_dtype.edge_type == ga.EdgeType.SPHERICAL 269 | 270 | 271 | def test_accessor_with_crs(): 272 | ga_series = pd.Series(["POINT (0 1)"]).geoarrow.with_crs(ga.OGC_CRS84) 273 | assert ga_series.dtype.pyarrow_dtype.crs == ga.OGC_CRS84 274 | 275 | 276 | def test_accessor_with_dimensions(): 277 | ga_series = pd.Series(["POINT (0 1)"]).geoarrow.with_dimensions(ga.Dimensions.XYZ) 278 | assert ga_series.dtype.pyarrow_dtype.dimensions == ga.Dimensions.XYZ 279 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /geoarrow-types/src/geoarrow/types/crs.py: -------------------------------------------------------------------------------- 1 | from copy import deepcopy 2 | import json 3 | from typing import Union, Mapping, Optional 4 | 5 | try: 6 | from typing import Protocol 7 | except ImportError: 8 | 9 | class Protocol: 10 | pass 11 | 12 | 13 | class Crs(Protocol): 14 | """Coordinate reference system protocol 15 | 16 | Defines an protocol with the methods required by GeoArrow types 17 | to consume a coordinate reference system. This is a subset of the 18 | methods available from a ``pyproj.CRS`` such that a pyproj CRS 19 | can be used to create GeoArrow types. 20 | """ 21 | 22 | @classmethod 23 | def from_json(cls, crs_json: str) -> "Crs": 24 | """Create an instance from a PROJJSON string.""" 25 | raise NotImplementedError() 26 | 27 | @classmethod 28 | def from_json_dict(cls, crs_dict: Mapping) -> "Crs": 29 | """Create an instance from the dictionary representation of a parsed 30 | PROJJSON string. 31 | """ 32 | raise NotImplementedError() 33 | 34 | def to_json(self) -> str: 35 | """Returns the PROJJSON representation of this coordinate reference 36 | system. 37 | """ 38 | raise NotImplementedError() 39 | 40 | def to_json_dict(self) -> Mapping: 41 | """Returns the parsed PROJJSON representation of this coordinate reference 42 | system.""" 43 | raise NotImplementedError() 44 | 45 | 46 | class ProjJsonCrs(Crs): 47 | """Concrete Crs implementation wrapping a previously-generated 48 | PROJJSON string or dictionary. 49 | 50 | Parameters 51 | ---------- 52 | obj : dict or str or bytes 53 | The PROJJSON representation as a string, dictionary representation 54 | of the parsed string, or UTF-8 bytes. 55 | 56 | 57 | Examples 58 | -------- 59 | >>> from geoarrow.types import crs 60 | >>> crs.ProjJsonCrs('{"key": "value"}') 61 | ProjJsonCrs({"key": "value"}) 62 | """ 63 | 64 | @classmethod 65 | def from_json(cls, crs_json: str) -> "Crs": 66 | return ProjJsonCrs(crs_json) 67 | 68 | @classmethod 69 | def from_json_dict(cls, crs_dict: Mapping) -> "Crs": 70 | return ProjJsonCrs(crs_dict) 71 | 72 | def __init__(self, obj: Union[Crs, Mapping, str, bytes]) -> None: 73 | if isinstance(obj, dict): 74 | self._obj = obj 75 | self._str = None 76 | elif isinstance(obj, str): 77 | self._obj = None 78 | self._str = obj 79 | elif isinstance(obj, bytes): 80 | self._obj = None 81 | self._str = obj.decode() 82 | elif hasattr(obj, "to_json"): 83 | self._obj = None 84 | self._str = obj.to_json() 85 | else: 86 | raise TypeError( 87 | "ProjJsonCrs can only be created from Crs, dict, str, or bytes" 88 | ) 89 | 90 | def __eq__(self, value): 91 | # Some duplication with _crs_equal(), but using that here 92 | # involves some recursion that's difficult to avoid 93 | if isinstance(value, UnspecifiedCrs): 94 | return False 95 | elif hasattr(value, "to_json_dict"): 96 | return self.to_json_dict() == value.to_json_dict() 97 | else: 98 | return False 99 | 100 | def to_json(self) -> str: 101 | if self._str is None: 102 | self._str = json.dumps(self._obj) 103 | 104 | return self._str 105 | 106 | def to_json_dict(self) -> Mapping: 107 | if self._obj is None: 108 | self._obj = json.loads(self._str) 109 | 110 | return deepcopy(self._obj) 111 | 112 | def to_wkt(self) -> str: 113 | # This could in theory be written to not use pyproj; however, the 114 | # main purpose of this method is to enable pyproj.CRS(self) so it 115 | # may not matter. 116 | import pyproj 117 | 118 | return pyproj.CRS(self.to_json_dict()).to_wkt() 119 | 120 | def __repr__(self) -> str: 121 | try: 122 | crs_dict = self.to_json_dict() 123 | if "id" in crs_dict: 124 | crs_id = crs_dict["id"] 125 | if "authority" in crs_id and "code" in crs_id: 126 | return f"ProjJsonCrs({crs_id['authority']}:{crs_id['code']})" 127 | 128 | except ValueError: 129 | pass 130 | 131 | return f"ProjJsonCrs({self.to_json()[:80]})" 132 | 133 | 134 | class StringCrs(Crs): 135 | def __init__(self, crs: Union[str, bytes]): 136 | if isinstance(crs, str): 137 | self._crs = crs 138 | elif isinstance(crs, bytes): 139 | self._crs = crs.decode() 140 | else: 141 | self._crs = str(crs) 142 | 143 | def __geoarrow_crs_json_values__(self): 144 | # Try to avoid escaping valid JSON into a JSON string 145 | try: 146 | return {"crs": json.loads(self._crs)} 147 | except ValueError: 148 | return {"crs": self._crs} 149 | 150 | def __eq__(self, value): 151 | if isinstance(value, UnspecifiedCrs): 152 | return False 153 | elif isinstance(value, StringCrs) and self._crs == value._crs: 154 | return True 155 | elif hasattr(value, "to_json_dict"): 156 | return self.to_json_dict() == value.to_json_dict() 157 | else: 158 | return False 159 | 160 | @classmethod 161 | def from_json(cls, crs_json: str) -> "StringCrs": 162 | return StringCrs(crs_json) 163 | 164 | @classmethod 165 | def from_json_dict(cls, crs_dict: Mapping) -> "Crs": 166 | return StringCrs(json.dumps(crs_dict)) 167 | 168 | def to_json(self) -> str: 169 | out = self._try_parse_json_object() 170 | if out: 171 | return self._crs 172 | 173 | # Fall back on pyproj 174 | import pyproj 175 | 176 | return pyproj.CRS(self._crs).to_json() 177 | 178 | def to_json_dict(self) -> Mapping: 179 | return json.loads(self.to_json()) 180 | 181 | def to_wkt(self) -> str: 182 | import pyproj 183 | 184 | crs_repr = self.__geoarrow_crs_json_values__()["crs"] 185 | return pyproj.CRS(crs_repr).to_wkt() 186 | 187 | def __repr__(self) -> str: 188 | crs_repr = self.__geoarrow_crs_json_values__()["crs"] 189 | return f"StringCrs({crs_repr})" 190 | 191 | def _try_parse_json_object(self) -> Optional[dict]: 192 | try: 193 | obj = json.loads(self._crs) 194 | if isinstance(obj, dict): 195 | return obj 196 | except ValueError: 197 | return None 198 | 199 | 200 | _CRS_LONLAT_DICT = { 201 | "$schema": "https://proj.org/schemas/v0.7/projjson.schema.json", 202 | "type": "GeographicCRS", 203 | "name": "WGS 84 (CRS84)", 204 | "datum_ensemble": { 205 | "name": "World Geodetic System 1984 ensemble", 206 | "members": [ 207 | { 208 | "name": "World Geodetic System 1984 (Transit)", 209 | "id": {"authority": "EPSG", "code": 1166}, 210 | }, 211 | { 212 | "name": "World Geodetic System 1984 (G730)", 213 | "id": {"authority": "EPSG", "code": 1152}, 214 | }, 215 | { 216 | "name": "World Geodetic System 1984 (G873)", 217 | "id": {"authority": "EPSG", "code": 1153}, 218 | }, 219 | { 220 | "name": "World Geodetic System 1984 (G1150)", 221 | "id": {"authority": "EPSG", "code": 1154}, 222 | }, 223 | { 224 | "name": "World Geodetic System 1984 (G1674)", 225 | "id": {"authority": "EPSG", "code": 1155}, 226 | }, 227 | { 228 | "name": "World Geodetic System 1984 (G1762)", 229 | "id": {"authority": "EPSG", "code": 1156}, 230 | }, 231 | { 232 | "name": "World Geodetic System 1984 (G2139)", 233 | "id": {"authority": "EPSG", "code": 1309}, 234 | }, 235 | ], 236 | "ellipsoid": { 237 | "name": "WGS 84", 238 | "semi_major_axis": 6378137, 239 | "inverse_flattening": 298.257223563, 240 | }, 241 | "accuracy": "2.0", 242 | "id": {"authority": "EPSG", "code": 6326}, 243 | }, 244 | "coordinate_system": { 245 | "subtype": "ellipsoidal", 246 | "axis": [ 247 | { 248 | "name": "Geodetic longitude", 249 | "abbreviation": "Lon", 250 | "direction": "east", 251 | "unit": "degree", 252 | }, 253 | { 254 | "name": "Geodetic latitude", 255 | "abbreviation": "Lat", 256 | "direction": "north", 257 | "unit": "degree", 258 | }, 259 | ], 260 | }, 261 | "scope": "Not known.", 262 | "area": "World.", 263 | "bbox": { 264 | "south_latitude": -90, 265 | "west_longitude": -180, 266 | "north_latitude": 90, 267 | "east_longitude": 180, 268 | }, 269 | "id": {"authority": "OGC", "code": "CRS84"}, 270 | } 271 | 272 | OGC_CRS84 = ProjJsonCrs.from_json_dict(_CRS_LONLAT_DICT) 273 | """Longitude/latitude CRS definition""" 274 | 275 | 276 | class UnspecifiedCrs(Crs): 277 | def __eq__(self, value): 278 | return value is UNSPECIFIED 279 | 280 | 281 | UNSPECIFIED = UnspecifiedCrs() 282 | """Unspecified CRS sentinel 283 | 284 | A :class:`Crs` singleton indicating that a CRS has not been specified. 285 | This is necessary because ``None`` is a valid CRS specification denoting 286 | an explicitly unset CRS. 287 | """ 288 | 289 | 290 | def create(obj) -> Optional[Crs]: 291 | """Create a Crs from an arbitrary Python object 292 | 293 | Applies some heuristics to sanitize an object as a CRS that can be 294 | exported to PROJJSON for use with a GeoArrow type. 295 | 296 | Parameters 297 | ---------- 298 | obj : None, crs-like, string, bytes, or dict 299 | Can be any of: 300 | - ``None``, in which case ``None`` will be returned. This is the 301 | sentinel used to indcate an explicitly unset CRS. 302 | - A crs-like object (i.e., an object with a ``to_json_dict()`` method) 303 | - A string, bytes, or dictionary representation of a PROJJSON crs 304 | (passed to :class:`ProjJsonCrs`). 305 | 306 | Examples 307 | -------- 308 | >>> from geoarrow.types import crs 309 | >>> crs.create(None) 310 | >>> crs.create(crs.OGC_CRS84) 311 | ProjJsonCrs(OGC:CRS84) 312 | """ 313 | if obj is None: 314 | return None 315 | elif hasattr(obj, "to_json_dict"): 316 | return obj 317 | elif isinstance(obj, dict): 318 | return ProjJsonCrs(obj) 319 | elif isinstance(obj, (str, bytes)): 320 | return StringCrs(obj) 321 | else: 322 | raise ValueError(f"Can't create geoarrow.types.Crs from {obj}") 323 | 324 | 325 | def _coalesce2(value, default): 326 | if value is UNSPECIFIED: 327 | return default 328 | else: 329 | return value 330 | 331 | 332 | def _coalesce_unspecified2(lhs, rhs): 333 | if _crs_equal(lhs, rhs): 334 | return lhs 335 | elif lhs == UNSPECIFIED: 336 | return rhs 337 | elif rhs == UNSPECIFIED: 338 | return lhs 339 | else: 340 | raise ValueError(f"Crs {lhs} and {rhs} are both specified") 341 | 342 | 343 | def _common2(lhs, rhs): 344 | return _coalesce_unspecified2(lhs, rhs) 345 | 346 | 347 | def _crs_equal(lhs, rhs): 348 | if lhs is UNSPECIFIED or rhs is UNSPECIFIED: 349 | return lhs == rhs 350 | elif lhs == rhs: 351 | return True 352 | elif hasattr(lhs, "to_json_dict") and hasattr(rhs, "to_json_dict"): 353 | # This could be more sophisticated; however, CRS equality is 354 | # hard and is currently outside the scope of this module 355 | return lhs.to_json_dict() == rhs.to_json_dict() 356 | else: 357 | return False 358 | -------------------------------------------------------------------------------- /geoarrow-types/tests/test_type_spec.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import json 3 | 4 | import geoarrow.types as gt 5 | from geoarrow.types.constants import ( 6 | Encoding, 7 | GeometryType, 8 | Dimensions, 9 | EdgeType, 10 | CoordType, 11 | ) 12 | from geoarrow.types.type_spec import TypeSpec 13 | from geoarrow.types.crs import ( 14 | OGC_CRS84, 15 | UNSPECIFIED as UNSPECIFIED_CRS, 16 | StringCrs, 17 | ProjJsonCrs, 18 | ) 19 | 20 | 21 | def test_type_spec_repr(): 22 | assert repr(TypeSpec()) == "TypeSpec()" 23 | assert repr(TypeSpec(encoding=Encoding.WKB)) == "TypeSpec(Encoding.WKB)" 24 | 25 | 26 | def test_type_spec_extension_name(): 27 | assert gt.wkb().extension_name() == "geoarrow.wkb" 28 | assert gt.large_wkb().extension_name() == "geoarrow.wkb" 29 | assert gt.wkt().extension_name() == "geoarrow.wkt" 30 | assert gt.large_wkt().extension_name() == "geoarrow.wkt" 31 | 32 | assert gt.point().extension_name() == "geoarrow.point" 33 | assert gt.linestring().extension_name() == "geoarrow.linestring" 34 | assert gt.polygon().extension_name() == "geoarrow.polygon" 35 | assert gt.multipoint().extension_name() == "geoarrow.multipoint" 36 | assert gt.multilinestring().extension_name() == "geoarrow.multilinestring" 37 | assert gt.multipolygon().extension_name() == "geoarrow.multipolygon" 38 | 39 | with pytest.raises(ValueError, match="Can't compute extension name for"): 40 | TypeSpec().extension_name() 41 | 42 | with pytest.raises(ValueError, match="Can't compute extension name for"): 43 | TypeSpec(encoding=Encoding.GEOARROW).extension_name() 44 | 45 | 46 | def test_type_spec_extension_metadata(): 47 | assert TypeSpec().with_defaults().extension_metadata() == "{}" 48 | assert ( 49 | TypeSpec(edge_type=EdgeType.SPHERICAL).with_defaults().extension_metadata() 50 | == '{"edges": "spherical"}' 51 | ) 52 | assert ( 53 | TypeSpec(edge_type=EdgeType.VINCENTY).with_defaults().extension_metadata() 54 | == '{"edges": "vincenty"}' 55 | ) 56 | assert ( 57 | TypeSpec(crs=gt.OGC_CRS84) 58 | .with_defaults() 59 | .extension_metadata() 60 | .startswith('{"crs": {') 61 | ) 62 | 63 | with pytest.raises(ValueError, match="Can't compute extension_metadata"): 64 | TypeSpec().extension_metadata() 65 | 66 | 67 | def test_type_spec_metadata_crs(): 68 | # StringCrs 69 | spec = TypeSpec(edge_type=EdgeType.PLANAR, crs=StringCrs("EPSG:32620")) 70 | assert spec.extension_metadata() == '{"crs": "EPSG:32620"}' 71 | 72 | # ProjJsonCrs 73 | spec = spec.override(crs=OGC_CRS84) 74 | assert json.loads(spec.extension_metadata())["crs"] == OGC_CRS84.to_json_dict() 75 | assert json.loads(spec.extension_metadata())["crs_type"] == "projjson" 76 | 77 | # Raw string 78 | spec = TypeSpec(edge_type=EdgeType.PLANAR, crs="EPSG:32620") 79 | assert spec.extension_metadata() == '{"crs": "EPSG:32620"}' 80 | 81 | # Raw bytes 82 | spec = TypeSpec(edge_type=EdgeType.PLANAR, crs="EPSG:32620".encode()) 83 | assert spec.extension_metadata() == '{"crs": "EPSG:32620"}' 84 | 85 | # Accidentally JSON-encoded string 86 | spec = TypeSpec(edge_type=EdgeType.PLANAR, crs='"EPSG:32620"') 87 | assert spec.extension_metadata() == '{"crs": "EPSG:32620"}' 88 | 89 | # UnspecifiedCrs 90 | with pytest.raises(ValueError, match="edge_type or crs is unspecified"): 91 | TypeSpec(crs=UNSPECIFIED_CRS).extension_metadata() 92 | 93 | 94 | def test_type_spec_metadata_crs_load(): 95 | spec = TypeSpec.from_extension_metadata('{"crs": "EPSG:32620"}') 96 | assert isinstance(spec.crs, StringCrs) 97 | 98 | spec = TypeSpec.from_extension_metadata('{"crs": {}, "crs_type": "projjson"}') 99 | assert isinstance(spec.crs, ProjJsonCrs) 100 | assert spec.crs.to_json_dict() == {} 101 | 102 | 103 | def test_type_spec_metadata_crs_sanitize(): 104 | crs_obj = TypeSpec().override(crs="EPSG:32620").crs 105 | assert isinstance(crs_obj, StringCrs) 106 | assert crs_obj._crs == "EPSG:32620" 107 | assert TypeSpec().override(crs=crs_obj).crs is crs_obj 108 | 109 | crs_obj = TypeSpec().override(crs=ProjJsonCrs({})).crs 110 | assert isinstance(crs_obj, ProjJsonCrs) 111 | assert TypeSpec().override(crs=crs_obj).crs is crs_obj 112 | 113 | 114 | def test_type_spec_metadata_crs_pyproj(): 115 | pyproj = pytest.importorskip("pyproj") 116 | 117 | spec = TypeSpec(edge_type=EdgeType.PLANAR, crs=pyproj.CRS("EPSG:32620")) 118 | metadata_obj = json.loads(spec.extension_metadata()) 119 | assert metadata_obj["crs"] == pyproj.CRS("EPSG:32620").to_json_dict() 120 | assert metadata_obj["crs_type"] == "projjson" 121 | 122 | spec2 = TypeSpec.from_extension_metadata(spec.extension_metadata()) 123 | assert isinstance(spec2.crs, ProjJsonCrs) 124 | assert pyproj.CRS(spec2.crs) == pyproj.CRS("EPSG:32620") 125 | assert spec2.crs == pyproj.CRS("EPSG:32620") 126 | assert pyproj.CRS("EPSG:32620") == spec2.crs 127 | 128 | crs_obj = TypeSpec().override(crs=pyproj.CRS("EPSG:32620")).crs 129 | assert isinstance(crs_obj, pyproj.CRS) 130 | assert TypeSpec().override(crs=crs_obj).crs is crs_obj 131 | 132 | 133 | def test_type_spec_create(): 134 | # From TypeSpec 135 | spec = TypeSpec() 136 | assert TypeSpec.create(spec) is spec 137 | 138 | # From Encoding 139 | assert TypeSpec.create(Encoding.WKB) == TypeSpec(encoding=Encoding.WKB) 140 | 141 | # From GeometryType 142 | assert TypeSpec.create(GeometryType.POINT) == TypeSpec( 143 | geometry_type=GeometryType.POINT 144 | ) 145 | 146 | # From Dimensions 147 | assert TypeSpec.create(Dimensions.XY) == TypeSpec(dimensions=Dimensions.XY) 148 | 149 | # From CoordType 150 | assert TypeSpec.create(CoordType.INTERLEAVED) == TypeSpec( 151 | coord_type=CoordType.INTERLEAVED 152 | ) 153 | 154 | # From EdgeType 155 | assert TypeSpec.create(EdgeType.PLANAR) == TypeSpec(edge_type=EdgeType.PLANAR) 156 | 157 | # From Crs 158 | assert TypeSpec.create(OGC_CRS84) == TypeSpec(crs=OGC_CRS84) 159 | 160 | # From unknown 161 | with pytest.raises( 162 | TypeError, match="Can't create TypeSpec from object of type NoneType" 163 | ): 164 | TypeSpec.create(None) 165 | 166 | 167 | def test_type_spec_coalesce(): 168 | fully_specified = TypeSpec( 169 | Encoding.GEOARROW, 170 | GeometryType.POINT, 171 | Dimensions.XY, 172 | CoordType.SEPARATED, 173 | EdgeType.PLANAR, 174 | None, 175 | ) 176 | 177 | fully_specified2 = TypeSpec( 178 | Encoding.GEOARROW, 179 | GeometryType.LINESTRING, 180 | Dimensions.XYZ, 181 | CoordType.INTERLEAVED, 182 | EdgeType.SPHERICAL, 183 | OGC_CRS84, 184 | ) 185 | 186 | # Ensure specified always trumps unspecifed 187 | assert TypeSpec.coalesce(fully_specified, TypeSpec()) == fully_specified 188 | assert TypeSpec.coalesce(TypeSpec(), fully_specified) == fully_specified 189 | 190 | # Ensure that if both are specified, the lefthand side wins 191 | assert TypeSpec.coalesce(fully_specified, fully_specified2) == fully_specified 192 | assert TypeSpec.coalesce(fully_specified2, fully_specified) == fully_specified2 193 | 194 | # Ensure that with_default()/override() are mapped properly 195 | assert TypeSpec().with_defaults(fully_specified) == fully_specified 196 | assert fully_specified.with_defaults(fully_specified2) == fully_specified 197 | 198 | 199 | def test_type_spec_coalesce_unspecified(): 200 | fully_specified = TypeSpec( 201 | Encoding.GEOARROW, 202 | GeometryType.POINT, 203 | Dimensions.XY, 204 | CoordType.SEPARATED, 205 | EdgeType.PLANAR, 206 | None, 207 | ) 208 | 209 | # Ensure specified always trumps unspecifed 210 | assert TypeSpec.coalesce_unspecified(fully_specified, TypeSpec()) == fully_specified 211 | assert TypeSpec.coalesce_unspecified(TypeSpec(), fully_specified) == fully_specified 212 | 213 | # Ensure that arguments that are equal can be coalesced here 214 | assert ( 215 | TypeSpec.coalesce_unspecified(fully_specified, fully_specified) 216 | == fully_specified 217 | ) 218 | 219 | # Ensure that arguments can't be overspecified 220 | with pytest.raises(ValueError, match="Encoding is overspecified"): 221 | TypeSpec.coalesce_unspecified(fully_specified, Encoding.WKB) 222 | 223 | 224 | def test_type_spec_common(): 225 | fully_specified = TypeSpec( 226 | Encoding.GEOARROW, 227 | GeometryType.POINT, 228 | Dimensions.XY, 229 | CoordType.SEPARATED, 230 | EdgeType.PLANAR, 231 | None, 232 | ) 233 | fully_specified_z = TypeSpec( 234 | Encoding.GEOARROW, 235 | GeometryType.POINT, 236 | Dimensions.XYZ, 237 | CoordType.SEPARATED, 238 | EdgeType.PLANAR, 239 | None, 240 | ) 241 | 242 | # Ensure specified always trumps unspecifed 243 | assert TypeSpec.common(fully_specified, TypeSpec()) == fully_specified 244 | assert TypeSpec.common(TypeSpec(), fully_specified) == fully_specified 245 | 246 | # Make sure the common output with itself is equal to itself 247 | assert TypeSpec.common(fully_specified, fully_specified) == fully_specified 248 | 249 | # Ensure that arguments that have a common output are modified 250 | assert ( 251 | TypeSpec.common(fully_specified, TypeSpec(dimensions=Dimensions.XYZ)) 252 | == fully_specified_z 253 | ) 254 | 255 | 256 | def test_type_spec_override(): 257 | fully_specified = TypeSpec( 258 | Encoding.GEOARROW, 259 | GeometryType.POINT, 260 | Dimensions.XY, 261 | CoordType.SEPARATED, 262 | EdgeType.PLANAR, 263 | None, 264 | ) 265 | 266 | assert fully_specified.override(encoding="unspecified") == TypeSpec( 267 | Encoding.UNSPECIFIED, *fully_specified[1:] 268 | ) 269 | 270 | assert fully_specified.override(geometry_type="unspecified") == TypeSpec( 271 | *fully_specified[:1], GeometryType.UNSPECIFIED, *fully_specified[2:] 272 | ) 273 | 274 | assert fully_specified.override(dimensions="unspecified") == TypeSpec( 275 | *fully_specified[:2], Dimensions.UNSPECIFIED, *fully_specified[3:] 276 | ) 277 | 278 | assert fully_specified.override(coord_type="unspecified") == TypeSpec( 279 | *fully_specified[:3], CoordType.UNSPECIFIED, *fully_specified[4:] 280 | ) 281 | 282 | assert fully_specified.override(edge_type="unspecified") == TypeSpec( 283 | *fully_specified[:4], EdgeType.UNSPECIFIED, *fully_specified[5:] 284 | ) 285 | 286 | assert fully_specified.override(crs=UNSPECIFIED_CRS) == TypeSpec( 287 | *fully_specified[:5], UNSPECIFIED_CRS 288 | ) 289 | 290 | 291 | def test_type_spec_helper(): 292 | # Check positional arguments inferred 293 | assert gt.type_spec(Encoding.WKB) == TypeSpec(encoding=Encoding.WKB) 294 | assert gt.type_spec(GeometryType.POINT) == TypeSpec( 295 | geometry_type=GeometryType.POINT 296 | ) 297 | assert gt.type_spec(Dimensions.XY) == TypeSpec(dimensions=Dimensions.XY) 298 | assert gt.type_spec(CoordType.INTERLEAVED) == TypeSpec( 299 | coord_type=CoordType.INTERLEAVED 300 | ) 301 | assert gt.type_spec(EdgeType.PLANAR) == TypeSpec(edge_type=EdgeType.PLANAR) 302 | assert gt.type_spec(gt.OGC_CRS84) == TypeSpec(crs=gt.OGC_CRS84) 303 | 304 | # Check sanitized arguments by name 305 | assert gt.type_spec(encoding="wkb") == TypeSpec(encoding=Encoding.WKB) 306 | assert gt.type_spec(geometry_type="point") == TypeSpec( 307 | geometry_type=GeometryType.POINT 308 | ) 309 | assert gt.type_spec(dimensions="xy") == TypeSpec(dimensions=Dimensions.XY) 310 | assert gt.type_spec(coord_type="interleaved") == TypeSpec( 311 | coord_type=CoordType.INTERLEAVED 312 | ) 313 | assert gt.type_spec(edge_type="planar") == TypeSpec(edge_type=EdgeType.PLANAR) 314 | assert gt.type_spec(crs=gt.OGC_CRS84) == TypeSpec(crs=gt.OGC_CRS84) 315 | 316 | 317 | def test_type_spec_shortcuts(): 318 | assert gt.wkb() == TypeSpec(encoding=Encoding.WKB) 319 | assert gt.large_wkb() == TypeSpec(encoding=Encoding.LARGE_WKB) 320 | assert gt.wkt() == TypeSpec(encoding=Encoding.WKT) 321 | assert gt.large_wkt() == TypeSpec(encoding=Encoding.LARGE_WKT) 322 | 323 | assert gt.geoarrow() == TypeSpec(encoding=Encoding.GEOARROW) 324 | assert gt.box() == TypeSpec( 325 | encoding=Encoding.GEOARROW, geometry_type=GeometryType.BOX 326 | ) 327 | assert gt.point() == TypeSpec( 328 | encoding=Encoding.GEOARROW, geometry_type=GeometryType.POINT 329 | ) 330 | assert gt.linestring() == TypeSpec( 331 | encoding=Encoding.GEOARROW, geometry_type=GeometryType.LINESTRING 332 | ) 333 | assert gt.polygon() == TypeSpec( 334 | encoding=Encoding.GEOARROW, geometry_type=GeometryType.POLYGON 335 | ) 336 | assert gt.multipoint() == TypeSpec( 337 | encoding=Encoding.GEOARROW, geometry_type=GeometryType.MULTIPOINT 338 | ) 339 | assert gt.multilinestring() == TypeSpec( 340 | encoding=Encoding.GEOARROW, geometry_type=GeometryType.MULTILINESTRING 341 | ) 342 | assert gt.multipolygon() == TypeSpec( 343 | encoding=Encoding.GEOARROW, geometry_type=GeometryType.MULTIPOLYGON 344 | ) 345 | -------------------------------------------------------------------------------- /README.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "attachments": {}, 5 | "cell_type": "markdown", 6 | "metadata": {}, 7 | "source": [ 8 | "# GeoArrow for Python\n", 9 | "\n", 10 | "The GeoArrow Python packages provide an implementation of the [GeoArrow specification](https://geoarrow.org) that integrates with [pyarrow](https://arrow.apache.org/docs/python). The GeoArrow Python bindings enable input/output to/from Arrow-friendly formats (e.g., Parquet, Arrow Stream, Arrow File) and general-purpose coordinate shuffling tools among GeoArrow, WKT, and WKB encodings. \n", 11 | "\n", 12 | "## Installation\n", 13 | "\n", 14 | "Python bindings for GeoArrow are available on PyPI. You can install them with:\n", 15 | "\n", 16 | "```bash\n", 17 | "pip install geoarrow-pyarrow\n", 18 | "```\n", 19 | "\n", 20 | "You can install the latest development version with:\n", 21 | "\n", 22 | "```bash\n", 23 | "pip install \"git+https://github.com/geoarrow/geoarrow-python.git#subdirectory=geoarrow-pyarrow\"\n", 24 | "```\n", 25 | "\n", 26 | "If you can import the namespace, you're good to go!" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": 2, 32 | "metadata": {}, 33 | "outputs": [], 34 | "source": [ 35 | "import geoarrow.pyarrow as ga" 36 | ] 37 | }, 38 | { 39 | "cell_type": "markdown", 40 | "metadata": {}, 41 | "source": [ 42 | "## Example\n", 43 | "\n", 44 | "The most important thing that `geoarrow.pyarrow` does is register pyarrow extension types so that metadata is kept intact when reading files or interacting with other libraries. For example, we can now read Arrow IPC files written with GeoArrow extension types and the CRS and geometry type is kept:" 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": 5, 50 | "metadata": {}, 51 | "outputs": [ 52 | { 53 | "data": { 54 | "text/plain": [ 55 | "WkbType(geoarrow.wkb )" 56 | ] 57 | }, 58 | "execution_count": 5, 59 | "metadata": {}, 60 | "output_type": "execute_result" 61 | } 62 | ], 63 | "source": [ 64 | "import pyarrow as pa\n", 65 | "import urllib.request\n", 66 | "\n", 67 | "url = \"https://raw.githubusercontent.com/geoarrow/geoarrow-data/v0.2.0/natural-earth/files/natural-earth_cities_wkb.arrows\"\n", 68 | "with urllib.request.urlopen(url) as f, pa.ipc.open_stream(f) as reader:\n", 69 | " tab = reader.read_all()\n", 70 | "\n", 71 | "tab.schema.field(\"geometry\").type" 72 | ] 73 | }, 74 | { 75 | "cell_type": "markdown", 76 | "metadata": {}, 77 | "source": [ 78 | "Use `geoarrow.pyarrow.to_geopandas()` to convert to [geopandas](https://geopandas.org):" 79 | ] 80 | }, 81 | { 82 | "cell_type": "code", 83 | "execution_count": 9, 84 | "metadata": {}, 85 | "outputs": [ 86 | { 87 | "data": { 88 | "text/plain": [ 89 | "\n", 90 | "Name: WGS 84\n", 91 | "Axis Info [ellipsoidal]:\n", 92 | "- Lat[north]: Geodetic latitude (degree)\n", 93 | "- Lon[east]: Geodetic longitude (degree)\n", 94 | "Area of Use:\n", 95 | "- name: World.\n", 96 | "- bounds: (-180.0, -90.0, 180.0, 90.0)\n", 97 | "Datum: World Geodetic System 1984 ensemble\n", 98 | "- Ellipsoid: WGS 84\n", 99 | "- Prime Meridian: Greenwich" 100 | ] 101 | }, 102 | "execution_count": 9, 103 | "metadata": {}, 104 | "output_type": "execute_result" 105 | } 106 | ], 107 | "source": [ 108 | "df = ga.to_geopandas(tab)\n", 109 | "df.geometry.crs" 110 | ] 111 | }, 112 | { 113 | "cell_type": "markdown", 114 | "metadata": {}, 115 | "source": [ 116 | "...and use `GeoDataFrame.to_arrow()` to get it back:" 117 | ] 118 | }, 119 | { 120 | "cell_type": "code", 121 | "execution_count": 11, 122 | "metadata": {}, 123 | "outputs": [ 124 | { 125 | "data": { 126 | "text/plain": [ 127 | "ProjJsonCrs(EPSG:4326)" 128 | ] 129 | }, 130 | "execution_count": 11, 131 | "metadata": {}, 132 | "output_type": "execute_result" 133 | } 134 | ], 135 | "source": [ 136 | "pa.table(df.to_arrow())[\"geometry\"].type.crs" 137 | ] 138 | }, 139 | { 140 | "cell_type": "markdown", 141 | "metadata": {}, 142 | "source": [ 143 | "These Python bindings also include [GeoParquet](https://geoparquet.org) and [pyogrio](https://github.com/geopandas/pyogrio) integration for direct IO to/from pyarrow. This can be useful when loading data approaching the size of available memory as GeoPandas requires many times more memory for some types of data (notably: large numbers of points)." 144 | ] 145 | }, 146 | { 147 | "cell_type": "code", 148 | "execution_count": 12, 149 | "metadata": {}, 150 | "outputs": [ 151 | { 152 | "data": { 153 | "text/plain": [ 154 | "pyarrow.Table\n", 155 | "name: string\n", 156 | "geometry: extension>\n", 157 | "----\n", 158 | "name: [[\"Vatican City\",\"San Marino\",\"Vaduz\",\"Lobamba\",\"Luxembourg\",...,\"Rio de Janeiro\",\"Sao Paulo\",\"Sydney\",\"Singapore\",\"Hong Kong\"]]\n", 159 | "geometry: [[010100000054E57B4622E828408B074AC09EF34440,0101000000DCB122B42FE228402376B7FCD1F74540,01010000006DAE9AE78808234032D989DC1D914740,01010000007BCB8B0233333F40289B728577773AC0,0101000000C08D39741F8518400F2153E34ACE4840,...,0101000000667B47AA269B45C002B53F5745E836C0,0101000000F15A536A405047C0C1148A19868E37C0,0101000000A286FD30CDE662401F04CF2989EF40C0,01010000003A387DE2A5F659409AF3E7363CB8F43F,0101000000D865F84FB78B5C40144438C1924E3640]]" 160 | ] 161 | }, 162 | "execution_count": 12, 163 | "metadata": {}, 164 | "output_type": "execute_result" 165 | } 166 | ], 167 | "source": [ 168 | "import geoarrow.pyarrow.io\n", 169 | "\n", 170 | "url = \"https://raw.githubusercontent.com/geoarrow/geoarrow-data/v0.2.0/natural-earth/files/natural-earth_cities.fgb\"\n", 171 | "geoarrow.pyarrow.io.read_pyogrio_table(url)" 172 | ] 173 | }, 174 | { 175 | "cell_type": "code", 176 | "execution_count": 13, 177 | "metadata": {}, 178 | "outputs": [ 179 | { 180 | "data": { 181 | "text/plain": [ 182 | "pyarrow.Table\n", 183 | "name: string\n", 184 | "geometry: extension>\n", 185 | "----\n", 186 | "name: [[\"Vatican City\",\"San Marino\",\"Vaduz\",\"Lobamba\",\"Luxembourg\",...,\"Rio de Janeiro\",\"Sao Paulo\",\"Sydney\",\"Singapore\",\"Hong Kong\"]]\n", 187 | "geometry: [[010100000054E57B4622E828408B074AC09EF34440,0101000000DCB122B42FE228402376B7FCD1F74540,01010000006DAE9AE78808234032D989DC1D914740,01010000007BCB8B0233333F40289B728577773AC0,0101000000C08D39741F8518400F2153E34ACE4840,...,0101000000667B47AA269B45C002B53F5745E836C0,0101000000F15A536A405047C0C1148A19868E37C0,0101000000A286FD30CDE662401F04CF2989EF40C0,01010000003A387DE2A5F659409AF3E7363CB8F43F,0101000000D865F84FB78B5C40144438C1924E3640]]" 188 | ] 189 | }, 190 | "execution_count": 13, 191 | "metadata": {}, 192 | "output_type": "execute_result" 193 | } 194 | ], 195 | "source": [ 196 | "url = \"https://raw.githubusercontent.com/geoarrow/geoarrow-data/v0.2.0/natural-earth/files/natural-earth_cities_geo.parquet\"\n", 197 | "local_filename, _ = urllib.request.urlretrieve(url)\n", 198 | "\n", 199 | "geoarrow.pyarrow.io.read_geoparquet_table(local_filename)" 200 | ] 201 | }, 202 | { 203 | "cell_type": "markdown", 204 | "metadata": {}, 205 | "source": [ 206 | "Finally, a number of compute functions are provided for common transformations required to create/consume arrays of geometries:" 207 | ] 208 | }, 209 | { 210 | "cell_type": "code", 211 | "execution_count": 19, 212 | "metadata": {}, 213 | "outputs": [ 214 | { 215 | "data": { 216 | "text/plain": [ 217 | "\n", 218 | "[\n", 219 | " [\n", 220 | " \"POINT (12.4533865 41.9032822)\",\n", 221 | " \"POINT (12.4417702 43.9360958)\",\n", 222 | " \"POINT (9.5166695 47.1337238)\",\n", 223 | " \"POINT (31.1999971 -26.4666675)\",\n", 224 | " \"POINT (6.1300028 49.6116604)\"\n", 225 | " ]\n", 226 | "]" 227 | ] 228 | }, 229 | "execution_count": 19, 230 | "metadata": {}, 231 | "output_type": "execute_result" 232 | } 233 | ], 234 | "source": [ 235 | "ga.format_wkt(tab[\"geometry\"])[:5]" 236 | ] 237 | }, 238 | { 239 | "attachments": {}, 240 | "cell_type": "markdown", 241 | "metadata": {}, 242 | "source": [ 243 | "## Create/Consume GeoArrow Arrays\n", 244 | "\n", 245 | "The `geoarrow-pyarrow` package also provides a number of utilities for working with serialized and GeoArrow-native arrays. For example, you can create geoarrow-encoded `pyarrow.Array`s with `as_geoarrow()`:" 246 | ] 247 | }, 248 | { 249 | "cell_type": "code", 250 | "execution_count": 14, 251 | "metadata": {}, 252 | "outputs": [ 253 | { 254 | "data": { 255 | "text/plain": [ 256 | "GeometryExtensionArray:PointType(geoarrow.point)[1]\n", 257 | "" 258 | ] 259 | }, 260 | "execution_count": 14, 261 | "metadata": {}, 262 | "output_type": "execute_result" 263 | } 264 | ], 265 | "source": [ 266 | "ga.as_geoarrow([\"POINT (0 1)\"])" 267 | ] 268 | }, 269 | { 270 | "attachments": {}, 271 | "cell_type": "markdown", 272 | "metadata": {}, 273 | "source": [ 274 | "This will work with:\n", 275 | "\n", 276 | "- An existing array created by geoarrow\n", 277 | "- A `geopandas.GeoSeries`\n", 278 | "- A `pyarrow.Array` or `pyarrow.ChunkedArray` (geoarrow text interpreted as well-known text; binary interpreted as well-known binary)\n", 279 | "- Anything that `pyarrow.array()` will convert to a text or binary array\n", 280 | "\n", 281 | "If there is no common geometry type among elements of the input, `as_geoarrow()` will fall back to well-known binary encoding. To explicitly convert to well-known text or binary, use `as_wkt()` or `as_wkb()`.\n", 282 | "\n", 283 | "Alternatively, you can construct GeoArrow arrays directly from a series of buffers as described in the specification:" 284 | ] 285 | }, 286 | { 287 | "cell_type": "code", 288 | "execution_count": 15, 289 | "metadata": {}, 290 | "outputs": [ 291 | { 292 | "data": { 293 | "text/plain": [ 294 | "GeometryExtensionArray:PointType(geoarrow.point)[3]\n", 295 | "\n", 296 | "\n", 297 | "" 298 | ] 299 | }, 300 | "execution_count": 15, 301 | "metadata": {}, 302 | "output_type": "execute_result" 303 | } 304 | ], 305 | "source": [ 306 | "import numpy as np\n", 307 | "\n", 308 | "ga.point().from_geobuffers(\n", 309 | " None,\n", 310 | " np.array([1.0, 2.0, 3.0]),\n", 311 | " np.array([3.0, 4.0, 5.0])\n", 312 | ")" 313 | ] 314 | }, 315 | { 316 | "cell_type": "code", 317 | "execution_count": 16, 318 | "metadata": {}, 319 | "outputs": [ 320 | { 321 | "data": { 322 | "text/plain": [ 323 | "GeometryExtensionArray:PointType(interleaved geoarrow.point)[3]\n", 324 | "\n", 325 | "\n", 326 | "" 327 | ] 328 | }, 329 | "execution_count": 16, 330 | "metadata": {}, 331 | "output_type": "execute_result" 332 | } 333 | ], 334 | "source": [ 335 | "ga.point().with_coord_type(ga.CoordType.INTERLEAVED).from_geobuffers(\n", 336 | " None,\n", 337 | " np.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0])\n", 338 | ")" 339 | ] 340 | }, 341 | { 342 | "cell_type": "markdown", 343 | "metadata": {}, 344 | "source": [ 345 | "## For Developers\n", 346 | "\n", 347 | "One of the challeneges with GeoArrow data is the large number of permutations between X, Y, Z, M, geometry types, and serialized encodings. The `geoarrow-types` package provides pure Python utilities to manage, compute on, and specify these types (or parts of them, as required)." 348 | ] 349 | }, 350 | { 351 | "cell_type": "code", 352 | "execution_count": 23, 353 | "metadata": {}, 354 | "outputs": [ 355 | { 356 | "data": { 357 | "text/plain": [ 358 | "MultiPointType(geoarrow.multipoint_zm)" 359 | ] 360 | }, 361 | "execution_count": 23, 362 | "metadata": {}, 363 | "output_type": "execute_result" 364 | } 365 | ], 366 | "source": [ 367 | "import geoarrow.types as gt\n", 368 | "\n", 369 | "gt.TypeSpec.common(\n", 370 | " gt.Encoding.GEOARROW,\n", 371 | " gt.GeometryType.POINT,\n", 372 | " gt.GeometryType.MULTIPOINT,\n", 373 | " gt.Dimensions.XYM,\n", 374 | " gt.Dimensions.XYZ,\n", 375 | ").to_pyarrow()" 376 | ] 377 | }, 378 | { 379 | "attachments": {}, 380 | "cell_type": "markdown", 381 | "metadata": {}, 382 | "source": [ 383 | "## Building\n", 384 | "\n", 385 | "Python bindings for geoarrow are managed with [setuptools](https://setuptools.pypa.io/en/latest/index.html).\n", 386 | "This means you can build the project using:\n", 387 | "\n", 388 | "```shell\n", 389 | "git clone https://github.com/geoarrow/geoarrow-python.git\n", 390 | "pip install -e geoarrow-pyarrow/ geoarrow-types/\n", 391 | "```\n", 392 | "\n", 393 | "Tests use [pytest](https://docs.pytest.org/):\n", 394 | "\n", 395 | "```shell\n", 396 | "pytest\n", 397 | "```" 398 | ] 399 | } 400 | ], 401 | "metadata": { 402 | "kernelspec": { 403 | "display_name": ".venv", 404 | "language": "python", 405 | "name": "python3" 406 | }, 407 | "language_info": { 408 | "codemirror_mode": { 409 | "name": "ipython", 410 | "version": 3 411 | }, 412 | "file_extension": ".py", 413 | "mimetype": "text/x-python", 414 | "name": "python", 415 | "nbconvert_exporter": "python", 416 | "pygments_lexer": "ipython3", 417 | "version": "3.13.3" 418 | }, 419 | "orig_nbformat": 4 420 | }, 421 | "nbformat": 4, 422 | "nbformat_minor": 2 423 | } 424 | -------------------------------------------------------------------------------- /geoarrow-pyarrow/tests/test_io.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import tempfile 3 | import os 4 | import json 5 | 6 | import pyarrow as pa 7 | from pyarrow import parquet 8 | from geoarrow import types 9 | import geoarrow.pyarrow as ga 10 | from geoarrow.pyarrow import io 11 | 12 | 13 | def test_readpyogrio_table_gpkg(): 14 | pyogrio = pytest.importorskip("pyogrio") 15 | geopandas = pytest.importorskip("geopandas") 16 | 17 | with tempfile.TemporaryDirectory() as tmpdir: 18 | # Check gpkg (which has internal geometry column name) 19 | temp_gpkg = os.path.join(tmpdir, "test.gpkg") 20 | df = geopandas.GeoDataFrame( 21 | geometry=geopandas.GeoSeries.from_wkt(["POINT (0 1)"], crs="OGC:CRS84") 22 | ) 23 | crs_json = df.geometry.crs.to_json() 24 | pyogrio.write_dataframe(df, temp_gpkg) 25 | 26 | table = io.read_pyogrio_table(temp_gpkg) 27 | assert table.column("geom").type == ga.wkb().with_crs(crs_json) 28 | assert ga.format_wkt(table.column("geom")).to_pylist() == ["POINT (0 1)"] 29 | 30 | # Check fgb (which does not have an internal geometry column name) 31 | temp_fgb = os.path.join(tmpdir, "test.fgb") 32 | pyogrio.write_dataframe(df, temp_fgb) 33 | 34 | table = io.read_pyogrio_table(temp_fgb) 35 | assert table.column("geometry").type == ga.wkb().with_crs(crs_json) 36 | assert ga.format_wkt(table.column("geometry")).to_pylist() == ["POINT (0 1)"] 37 | 38 | 39 | def test_write_geoparquet_table_default(): 40 | with tempfile.TemporaryDirectory() as tmpdir: 41 | temp_pq = os.path.join(tmpdir, "test.parquet") 42 | tab = pa.table([ga.as_geoarrow(["POINT (0 1)"])], names=["geometry"]) 43 | 44 | # When geometry_encoding=None, geoarrow types stay geoarrow types 45 | # (probably need to workshop this based on geoparquet_version or something) 46 | io.write_geoparquet_table(tab, temp_pq, geometry_encoding=None) 47 | tab2 = parquet.read_table(temp_pq) 48 | assert b"geo" in tab2.schema.metadata 49 | ga.as_wkt(ga.point().wrap_array(tab2["geometry"])).to_pylist() == [ 50 | "POINT (0 1)" 51 | ] 52 | 53 | 54 | def test_write_geoparquet_table_wkb(): 55 | with tempfile.TemporaryDirectory() as tmpdir: 56 | temp_pq = os.path.join(tmpdir, "test.parquet") 57 | tab = pa.table([ga.array(["POINT (0 1)"])], names=["geometry"]) 58 | io.write_geoparquet_table(tab, temp_pq, geometry_encoding="WKB") 59 | tab2 = parquet.read_table(temp_pq) 60 | assert b"geo" in tab2.schema.metadata 61 | meta = json.loads(tab2.schema.metadata[b"geo"]) 62 | assert meta["version"] == "1.0.0" 63 | assert tab2.schema.types[0] == pa.binary() 64 | 65 | 66 | def test_write_geoparquet_table_geoarrow(): 67 | with tempfile.TemporaryDirectory() as tmpdir: 68 | temp_pq = os.path.join(tmpdir, "test.parquet") 69 | tab = pa.table([ga.array(["POINT (0 1)"])], names=["geometry"]) 70 | io.write_geoparquet_table( 71 | tab, temp_pq, geometry_encoding=io.geoparquet_encoding_geoarrow() 72 | ) 73 | tab2 = parquet.read_table(temp_pq) 74 | assert b"geo" in tab2.schema.metadata 75 | meta = json.loads(tab2.schema.metadata[b"geo"]) 76 | assert meta["version"] == "1.1.0" 77 | assert meta["columns"]["geometry"]["encoding"] == "point" 78 | ga.as_wkt(ga.point().wrap_array(tab2["geometry"])).to_pylist() == [ 79 | "POINT (0 1)" 80 | ] 81 | 82 | 83 | def test_read_geoparquet_table_wkb(): 84 | with tempfile.TemporaryDirectory() as tmpdir: 85 | temp_pq = os.path.join(tmpdir, "test.parquet") 86 | 87 | # With "geo" metadata key 88 | tab = pa.table([ga.array(["POINT (0 1)"])], names=["geometry"]) 89 | io.write_geoparquet_table(tab, temp_pq, geometry_encoding="WKB") 90 | tab2 = io.read_geoparquet_table(temp_pq) 91 | assert isinstance(tab2["geometry"].type, ga.GeometryExtensionType) 92 | assert b"geo" not in tab2.schema.metadata 93 | 94 | # Without "geo" metadata key 95 | tab = pa.table([pa.array(["POINT (0 1)"])], names=["geometry"]) 96 | parquet.write_table(tab, temp_pq) 97 | tab2 = io.read_geoparquet_table(temp_pq) 98 | assert isinstance(tab2["geometry"].type, ga.GeometryExtensionType) 99 | 100 | 101 | def test_read_geoparquet_table_geoarrow(): 102 | with tempfile.TemporaryDirectory() as tmpdir: 103 | temp_pq = os.path.join(tmpdir, "test.parquet") 104 | 105 | tab = pa.table([ga.array(["POINT (0 1)"])], names=["geometry"]) 106 | io.write_geoparquet_table( 107 | tab, temp_pq, geometry_encoding=io.geoparquet_encoding_geoarrow() 108 | ) 109 | tab2 = io.read_geoparquet_table(temp_pq) 110 | tab2["geometry"].type == ga.point() 111 | 112 | 113 | def test_geoparquet_column_spec_from_type_geom_type(): 114 | spec_wkb = io._geoparquet_column_spec_from_type(ga.wkb()) 115 | assert spec_wkb["geometry_types"] == [] 116 | 117 | spec_point = io._geoparquet_column_spec_from_type(ga.point()) 118 | assert spec_point["geometry_types"] == ["Point"] 119 | 120 | spec_linestring = io._geoparquet_column_spec_from_type(ga.linestring()) 121 | assert spec_linestring["geometry_types"] == ["LineString"] 122 | 123 | spec_polygon = io._geoparquet_column_spec_from_type(ga.polygon()) 124 | assert spec_polygon["geometry_types"] == ["Polygon"] 125 | 126 | spec_multipoint = io._geoparquet_column_spec_from_type(ga.multipoint()) 127 | assert spec_multipoint["geometry_types"] == ["MultiPoint"] 128 | 129 | spec_multilinestring = io._geoparquet_column_spec_from_type(ga.multilinestring()) 130 | assert spec_multilinestring["geometry_types"] == ["MultiLineString"] 131 | 132 | spec_multipolygon = io._geoparquet_column_spec_from_type(ga.multipolygon()) 133 | assert spec_multipolygon["geometry_types"] == ["MultiPolygon"] 134 | 135 | 136 | def test_geoparquet_column_spec_from_type_crs(): 137 | spec_storage = io._geoparquet_column_spec_from_type(pa.binary()) 138 | assert "crs" not in spec_storage 139 | 140 | spec_none = io._geoparquet_column_spec_from_type(ga.wkb()) 141 | assert spec_none["crs"] is None 142 | 143 | spec_projjson = io._geoparquet_column_spec_from_type( 144 | ga.wkb().with_crs(types.OGC_CRS84) 145 | ) 146 | assert spec_projjson["crs"]["id"]["code"] == "CRS84" 147 | 148 | 149 | def test_geoparquet_column_spec_from_type_edges(): 150 | spec_planar = io._geoparquet_column_spec_from_type(ga.wkb()) 151 | assert "edges" not in spec_planar 152 | 153 | spec_spherical = io._geoparquet_column_spec_from_type( 154 | ga.wkb().with_edge_type(ga.EdgeType.SPHERICAL) 155 | ) 156 | assert spec_spherical["edges"] == "spherical" 157 | 158 | 159 | def test_geoparquet_guess_primary_geometry_column(): 160 | assert ( 161 | io._geoparquet_guess_primary_geometry_column(pa.schema([]), "explicit_name") 162 | == "explicit_name" 163 | ) 164 | 165 | assert ( 166 | io._geoparquet_guess_primary_geometry_column( 167 | pa.schema([pa.field("geometry", pa.binary())]) 168 | ) 169 | == "geometry" 170 | ) 171 | 172 | assert ( 173 | io._geoparquet_guess_primary_geometry_column( 174 | pa.schema([pa.field("geography", pa.binary())]) 175 | ) 176 | == "geography" 177 | ) 178 | 179 | with pytest.raises(ValueError, match="at least one geometry column"): 180 | io._geoparquet_guess_primary_geometry_column( 181 | pa.schema([pa.field("not_geom", pa.binary())]) 182 | ) 183 | 184 | assert ( 185 | io._geoparquet_guess_primary_geometry_column( 186 | pa.schema([pa.field("first_def_geom", ga.wkb())]) 187 | ) 188 | == "first_def_geom" 189 | ) 190 | 191 | 192 | def test_geoparquet_columns_from_schema(): 193 | schema = pa.schema([pa.field("col_a", ga.wkb()), pa.field("col_b", pa.binary())]) 194 | 195 | # Guessing should just return GeoArrow columns 196 | cols = io._geoparquet_columns_from_schema(schema) 197 | assert list(cols.keys()) == ["col_a"] 198 | assert cols["col_a"] == {"encoding": "WKB", "geometry_types": [], "crs": None} 199 | 200 | # Explicit should just return specified columns 201 | cols_explicit = io._geoparquet_columns_from_schema(schema, ["col_b"]) 202 | assert list(cols_explicit.keys()) == ["col_b"] 203 | assert cols_explicit["col_b"] == {"encoding": "WKB", "geometry_types": []} 204 | 205 | # Guessing should always include primary geometry column 206 | cols_primary = io._geoparquet_columns_from_schema( 207 | schema, primary_geometry_column="col_b" 208 | ) 209 | assert list(cols_primary.keys()) == ["col_a", "col_b"] 210 | 211 | 212 | def test_geoparquet_metadata_from_schema(): 213 | schema = pa.schema([pa.field("col_a", ga.wkb()), pa.field("col_b", pa.binary())]) 214 | metadata = io._geoparquet_metadata_from_schema(schema) 215 | assert list(metadata.keys()) == ["version", "primary_column", "columns"] 216 | assert metadata["version"] == "1.0.0" 217 | assert metadata["primary_column"] == "col_a" 218 | assert list(metadata["columns"].keys()) == ["col_a"] 219 | 220 | 221 | def test_geoparquet_metadata_from_schema_geometry_types(): 222 | # GeoArrow encoding with add_geometry_types=False should not add geometry types 223 | schema = pa.schema([pa.field("col_a", ga.point())]) 224 | metadata = io._geoparquet_metadata_from_schema(schema, add_geometry_types=False) 225 | assert metadata["columns"]["col_a"]["geometry_types"] == [] 226 | 227 | # ...with None or True, it should be added 228 | metadata = io._geoparquet_metadata_from_schema(schema, add_geometry_types=None) 229 | assert metadata["columns"]["col_a"]["geometry_types"] == ["Point"] 230 | 231 | metadata = io._geoparquet_metadata_from_schema(schema, add_geometry_types=True) 232 | assert metadata["columns"]["col_a"]["geometry_types"] == ["Point"] 233 | 234 | # For WKB type, all values of add_geometry_types should not add geometry types 235 | schema = pa.schema([pa.field("col_a", ga.wkb())]) 236 | metadata = io._geoparquet_metadata_from_schema(schema, add_geometry_types=False) 237 | assert metadata["columns"]["col_a"]["geometry_types"] == [] 238 | 239 | metadata = io._geoparquet_metadata_from_schema(schema, add_geometry_types=None) 240 | assert metadata["columns"]["col_a"]["geometry_types"] == [] 241 | 242 | metadata = io._geoparquet_metadata_from_schema(schema, add_geometry_types=True) 243 | assert metadata["columns"]["col_a"]["geometry_types"] == [] 244 | 245 | 246 | def test_guess_geometry_columns(): 247 | assert io._geoparquet_guess_geometry_columns(pa.schema([])) == {} 248 | 249 | guessed_wkb = io._geoparquet_guess_geometry_columns( 250 | pa.schema([pa.field("geometry", pa.binary())]) 251 | ) 252 | assert list(guessed_wkb.keys()) == ["geometry"] 253 | assert guessed_wkb["geometry"] == {"encoding": "WKB"} 254 | 255 | guessed_wkt = io._geoparquet_guess_geometry_columns( 256 | pa.schema([pa.field("geometry", pa.utf8())]) 257 | ) 258 | assert list(guessed_wkt.keys()) == ["geometry"] 259 | assert guessed_wkt["geometry"] == {"encoding": "WKT"} 260 | 261 | 262 | def test_guess_geography_columns(): 263 | assert io._geoparquet_guess_geometry_columns(pa.schema([])) == {} 264 | 265 | guessed_wkb = io._geoparquet_guess_geometry_columns( 266 | pa.schema([pa.field("geography", pa.binary())]) 267 | ) 268 | assert list(guessed_wkb.keys()) == ["geography"] 269 | assert guessed_wkb["geography"] == {"encoding": "WKB", "edges": "spherical"} 270 | 271 | 272 | def test_encode_chunked_array(): 273 | with pytest.raises(ValueError, match="Expected column encoding to be one of"): 274 | io._geoparquet_encode_chunked_array( 275 | ga.array(["POINT (0 1)"]), {"encoding": "NotAnEncoding"} 276 | ) 277 | 278 | with pytest.raises(ValueError, match="Can't encode column with"): 279 | io._geoparquet_encode_chunked_array( 280 | ga.array(["POINT (0 1)", "LINESTRING (0 0, 1 1)"]), 281 | {"encoding": io.geoparquet_encoding_geoarrow()}, 282 | ) 283 | 284 | with pytest.raises(ValueError, match="Can't encode column with encoding"): 285 | io._geoparquet_encode_chunked_array( 286 | ga.as_geoarrow(["POINT (0 1)"]), 287 | {"encoding": "linestring"}, 288 | ) 289 | 290 | # Check geoarrow encoding when nothing is to be done 291 | already_point = ga.as_geoarrow(["POINT (0 1)"]) 292 | encoded = io._geoparquet_encode_chunked_array( 293 | already_point, spec={"encoding": "point"} 294 | ) 295 | assert encoded == already_point.storage 296 | 297 | # Check geoarrow encoding when some inference and encoding has to happen 298 | spec = {"encoding": io.geoparquet_encoding_geoarrow()} 299 | encoded = io._geoparquet_encode_chunked_array(ga.as_wkb(["POINT (0 1)"]), spec=spec) 300 | assert encoded == already_point.storage 301 | assert spec["encoding"] == "point" 302 | 303 | spec = {"encoding": "WKB"} 304 | encoded = io._geoparquet_encode_chunked_array(ga.as_wkb(["POINT (0 1)"]), spec) 305 | assert encoded.type == pa.binary() 306 | assert spec == {"encoding": "WKB"} 307 | 308 | spec = {"encoding": "WKB"} 309 | encoded = io._geoparquet_encode_chunked_array( 310 | ga.array(["POINT (0 -1)", "POINT Z (1 2 3)"]), 311 | spec, 312 | add_geometry_types=True, 313 | add_bbox=True, 314 | ) 315 | assert encoded.type == pa.binary() 316 | assert spec["bbox"] == [0, -1, 1, 2] 317 | assert spec["geometry_types"] == ["Point", "Point Z"] 318 | 319 | 320 | def test_chunked_array_to_geoarrow_encodings(): 321 | item_already_geoarrow = pa.chunked_array([ga.array(["POINT (0 1)"])]) 322 | assert ( 323 | io._geoparquet_chunked_array_to_geoarrow(item_already_geoarrow, {}) 324 | is item_already_geoarrow 325 | ) 326 | 327 | with pytest.raises(ValueError, match="missing 'encoding'"): 328 | io._geoparquet_chunked_array_to_geoarrow(pa.array([]), {}) 329 | 330 | with pytest.raises(ValueError, match="Invalid GeoParquet encoding"): 331 | io._geoparquet_chunked_array_to_geoarrow( 332 | pa.array([]), {"encoding": "NotAnEncoding"} 333 | ) 334 | 335 | item_binary = pa.chunked_array([ga.as_wkb(["POINT (0 1)"]).storage]) 336 | item_geoarrow = io._geoparquet_chunked_array_to_geoarrow( 337 | item_binary, {"encoding": "WKB", "crs": None} 338 | ) 339 | assert item_geoarrow.type == ga.wkb() 340 | 341 | item_wkt = pa.chunked_array([ga.as_wkt(["POINT (0 1)"]).storage]) 342 | item_geoarrow = io._geoparquet_chunked_array_to_geoarrow( 343 | item_wkt, {"encoding": "WKT", "crs": None} 344 | ) 345 | assert item_geoarrow.type == ga.wkt() 346 | 347 | 348 | def test_chunked_array_to_geoarrow_crs(): 349 | item_binary = pa.chunked_array([ga.as_wkb(["POINT (0 1)"]).storage]) 350 | 351 | item_missing_crs = io._geoparquet_chunked_array_to_geoarrow( 352 | item_binary, {"encoding": "WKB"} 353 | ) 354 | assert item_missing_crs.type.crs.to_json_dict() == types.OGC_CRS84.to_json_dict() 355 | 356 | item_explicit_crs = io._geoparquet_chunked_array_to_geoarrow( 357 | item_binary, {"encoding": "WKB", "crs": {}} 358 | ) 359 | assert item_explicit_crs.type.crs.to_json_dict() == {} 360 | 361 | 362 | def test_chunked_array_to_geoarrow_edges(): 363 | item_binary = pa.chunked_array([ga.as_wkb(["POINT (0 1)"]).storage]) 364 | 365 | item_planar_default = io._geoparquet_chunked_array_to_geoarrow( 366 | item_binary, {"encoding": "WKB"} 367 | ) 368 | assert item_planar_default.type.edge_type == ga.EdgeType.PLANAR 369 | 370 | item_planar_explicit = io._geoparquet_chunked_array_to_geoarrow( 371 | item_binary, {"encoding": "WKB", "edges": "planar"} 372 | ) 373 | assert item_planar_explicit.type.edge_type == ga.EdgeType.PLANAR 374 | 375 | item_spherical = io._geoparquet_chunked_array_to_geoarrow( 376 | item_binary, {"encoding": "WKB", "edges": "spherical"} 377 | ) 378 | assert item_spherical.type.edge_type == ga.EdgeType.SPHERICAL 379 | 380 | with pytest.raises(ValueError, match="Invalid GeoParquet column edges value"): 381 | io._geoparquet_chunked_array_to_geoarrow( 382 | item_binary, {"encoding": "WKB", "edges": "invalid_edges_value"} 383 | ) 384 | 385 | 386 | def test_table_to_geoarrow(): 387 | tab = pa.table([pa.array([], pa.binary())], names=["col_name"]) 388 | tab_geo = io._geoparquet_table_to_geoarrow(tab, {"col_name": {"encoding": "WKB"}}) 389 | assert "col_name" in tab_geo.schema.names 390 | assert isinstance(tab_geo["col_name"].type, ga.GeometryExtensionType) 391 | assert tab_geo["col_name"].type.crs.to_json_dict() == types.OGC_CRS84.to_json_dict() 392 | 393 | # Check with no columns selected 394 | tab_no_cols = tab.drop_columns(["col_name"]) 395 | tab_no_cols_geo = io._geoparquet_table_to_geoarrow( 396 | tab_no_cols, {"col_name": {"encoding": "WKB"}} 397 | ) 398 | assert tab_no_cols_geo == tab_no_cols 399 | -------------------------------------------------------------------------------- /geoarrow-pyarrow/tests/test_pyarrow.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import re 3 | from math import inf 4 | 5 | import pyarrow as pa 6 | import numpy as np 7 | import pytest 8 | 9 | import geoarrow.c.lib as lib 10 | from geoarrow import types 11 | import geoarrow.pyarrow as ga 12 | import geoarrow.pyarrow._type as _type 13 | import geoarrow.pyarrow._array as _array 14 | 15 | 16 | def test_version(): 17 | assert re.match(r"^[0-9]+\.[0-9]+", ga.__version__) 18 | 19 | 20 | def test_geometry_type_basic(): 21 | pa_type = _type.point() 22 | 23 | assert pa_type.geometry_type == ga.GeometryType.POINT 24 | assert pa_type.dimensions == ga.Dimensions.XY 25 | assert pa_type.coord_type == ga.CoordType.SEPARATED 26 | 27 | expected_storage = pa.struct( 28 | [ 29 | pa.field("x", pa.float64(), nullable=False), 30 | pa.field("y", pa.float64(), nullable=False), 31 | ] 32 | ) 33 | assert pa_type.storage_type == expected_storage 34 | 35 | 36 | def test_geometry_type_with(): 37 | type_obj = _type.point() 38 | 39 | type_linestring = type_obj.with_geometry_type(ga.GeometryType.LINESTRING) 40 | assert type_linestring.geometry_type == ga.GeometryType.LINESTRING 41 | 42 | type_xyz = type_obj.with_dimensions(ga.Dimensions.XYZ) 43 | assert type_xyz.dimensions == ga.Dimensions.XYZ 44 | 45 | type_interleaved = type_obj.with_coord_type(ga.CoordType.INTERLEAVED) 46 | assert type_interleaved.coord_type == ga.CoordType.INTERLEAVED 47 | 48 | type_spherical = type_obj.with_edge_type(ga.EdgeType.SPHERICAL) 49 | assert type_spherical.edge_type == ga.EdgeType.SPHERICAL 50 | 51 | type_crs = type_obj.with_crs(types.OGC_CRS84) 52 | assert type_crs.crs == types.OGC_CRS84 53 | 54 | type_crs = type_obj.with_crs("OGC:CRS84") 55 | assert repr(type_crs.crs) == "StringCrs(OGC:CRS84)" 56 | 57 | 58 | def test_type_with_crs_pyproj(): 59 | pyproj = pytest.importorskip("pyproj") 60 | type_obj = ga.wkb() 61 | 62 | type_crs = type_obj.with_crs(pyproj.CRS("EPSG:32620")) 63 | assert isinstance(type_crs.crs, pyproj.CRS) 64 | crs_dict = type_crs.crs.to_json_dict() 65 | assert crs_dict["id"]["code"] == 32620 66 | 67 | 68 | def test_constructors(): 69 | assert ga.wkb().extension_name == "geoarrow.wkb" 70 | assert ga.large_wkb().extension_name == "geoarrow.wkb" 71 | assert ga.wkb_view().extension_name == "geoarrow.wkb" 72 | assert ga.wkt().extension_name == "geoarrow.wkt" 73 | assert ga.large_wkt().extension_name == "geoarrow.wkt" 74 | assert ga.wkt_view().extension_name == "geoarrow.wkt" 75 | assert ga.point().extension_name == "geoarrow.point" 76 | assert ga.linestring().extension_name == "geoarrow.linestring" 77 | assert ga.polygon().extension_name == "geoarrow.polygon" 78 | assert ga.multipoint().extension_name == "geoarrow.multipoint" 79 | assert ga.multilinestring().extension_name == "geoarrow.multilinestring" 80 | assert ga.multipolygon().extension_name == "geoarrow.multipolygon" 81 | 82 | generic = ga.extension_type( 83 | types.type_spec( 84 | ga.Encoding.GEOARROW, 85 | ga.GeometryType.POINT, 86 | ga.Dimensions.XYZ, 87 | ga.CoordType.INTERLEAVED, 88 | ga.EdgeType.SPHERICAL, 89 | crs=types.OGC_CRS84, 90 | ) 91 | ) 92 | assert generic.geometry_type == ga.GeometryType.POINT 93 | assert generic.dimensions == ga.Dimensions.XYZ 94 | assert generic.coord_type == ga.CoordType.INTERLEAVED 95 | assert generic.edge_type == ga.EdgeType.SPHERICAL 96 | assert generic.crs == types.OGC_CRS84 97 | 98 | 99 | def test_type_common(): 100 | assert ga.geometry_type_common([]) == ga.wkb() 101 | assert ga.geometry_type_common([ga.wkt()]) == ga.wkt() 102 | assert ga.geometry_type_common([ga.point(), ga.point()]) == ga.point() 103 | assert ga.geometry_type_common([ga.point(), ga.linestring()]) == ga.wkb() 104 | 105 | 106 | def test_array(): 107 | array = ga.array(["POINT (30 10)"]) 108 | assert array.type == ga.wkt() 109 | assert isinstance(array[0], ga._scalar.WktScalar) 110 | 111 | wkb_item = b"\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x3e\x40\x00\x00\x00\x00\x00\x00\x24\x40" 112 | array = ga.array([wkb_item]) 113 | assert array.type == ga.wkb() 114 | assert isinstance(array[0], ga._scalar.WkbScalar) 115 | 116 | with pytest.raises(TypeError): 117 | ga.array([1]) 118 | 119 | array = ga.array(["POINT (30 10)"], ga.wkt()) 120 | assert array.type == ga.wkt() 121 | assert array.type.storage_type == pa.utf8() 122 | 123 | array = ga.array(["POINT (30 10)"], ga.large_wkt()) 124 | assert array.type == ga.large_wkt() 125 | assert array.type.storage_type == pa.large_utf8() 126 | 127 | array = ga.array([wkb_item], ga.wkb()) 128 | assert array.type == ga.wkb() 129 | assert array.type.storage_type == pa.binary() 130 | 131 | array = ga.array([wkb_item], ga.large_wkb()) 132 | assert array.type == ga.large_wkb() 133 | assert array.type.storage_type == pa.large_binary() 134 | 135 | 136 | def test_array_view_types(): 137 | # This one requires pyarrow >= 18, because that's when the necessary 138 | # cast() was added. 139 | try: 140 | pa.array(["foofy"]).cast(pa.string_view()) 141 | except pa.lib.ArrowNotImplementedError: 142 | pytest.skip("ga.array() with view types requires pyarrow >= 18.0.0") 143 | 144 | array = ga.array(["POINT (30 10)"], ga.wkt_view()) 145 | assert array.type == ga.wkt_view() 146 | assert array.type.storage_type == pa.string_view() 147 | 148 | wkb_item = b"\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x3e\x40\x00\x00\x00\x00\x00\x00\x24\x40" 149 | array = ga.array([wkb_item], ga.wkb_view()) 150 | assert array.type == ga.wkb_view() 151 | assert array.type.storage_type == pa.binary_view() 152 | 153 | schema_capsule, array_capsule = array.__arrow_c_array__() 154 | rearr = pa.Array._import_from_c_capsule(schema_capsule, array_capsule) 155 | rearr.validate(full=True) 156 | 157 | 158 | def test_array_repr(): 159 | array = ga.array(["POINT (30 10)"]) 160 | array_repr = repr(array) 161 | assert array_repr.startswith("GeometryExtensionArray") 162 | assert "" in array_repr 163 | 164 | array = ga.array(["POINT (30 10)"] * 12) 165 | array_repr = repr(array) 166 | assert "...2 values..." in array_repr 167 | 168 | array = ga.array( 169 | ["LINESTRING (100000 100000, 100000 100000, 100000 100000, 100000 100000)"] 170 | ) 171 | array_repr = repr(array) 172 | assert "...>" in array_repr 173 | 174 | array = ga.array(["THIS IS TOTALLY INVALID WKT"]) 175 | array_repr = repr(array) 176 | assert array_repr.startswith("GeometryExtensionArray") 177 | assert "* 1 or more display values failed to parse" in array_repr 178 | 179 | 180 | def test_scalar_wkt(): 181 | array = ga.array(["POINT (0 1)"]) 182 | assert array[0].wkt == "POINT (0 1)" 183 | assert array[0].wkb == ga.as_wkb(array).storage[0].as_py() 184 | assert repr(array[0]).startswith("WktScalar") 185 | 186 | 187 | def test_scalar_wkb(): 188 | array = ga.as_wkb(["POINT (0 1)"]) 189 | assert array[0].wkt == "POINT (0 1)" 190 | assert array[0].wkb == ga.as_wkb(array).storage[0].as_py() 191 | assert repr(array[0]).startswith("WkbScalar") 192 | 193 | 194 | def test_scalar_geoarrow(): 195 | array = ga.as_geoarrow(["POINT (0 1)"]) 196 | assert array[0].wkt == "POINT (0 1)" 197 | assert array[0].wkb == ga.as_wkb(array).storage[0].as_py() 198 | assert repr(array[0]).startswith("GeometryExtensionScalar") 199 | 200 | 201 | def test_scalar_box(): 202 | # The box kernel doesn't yet implement non XY boxes 203 | array = ga.box(["LINESTRING ZM (0 1 2 3, 4 5 6 7)"]) 204 | assert array[0].xmin == 0 205 | assert array[0].ymin == 1 206 | assert array[0].zmin is None 207 | assert array[0].mmin is None 208 | assert array[0].xmax == 4 209 | assert array[0].ymax == 5 210 | assert array[0].zmax is None 211 | assert array[0].mmax is None 212 | assert repr(array[0]).startswith("BoxScalar") 213 | 214 | 215 | def test_scalar_repr(): 216 | array = ga.array( 217 | ["LINESTRING (100000 100000, 100000 100000, 100000 100000, 100000 100000)"] 218 | ) 219 | assert repr(array[0]).endswith("...>") 220 | 221 | array = ga.array(["TOTALLY INVALID WKT"]) 222 | assert "value failed to parse" in repr(array[0]) 223 | 224 | 225 | def test_kernel_void(): 226 | with pytest.raises(TypeError): 227 | kernel = ga.Kernel.void(pa.int32()) 228 | kernel.push(5) 229 | 230 | array = ga.array(["POINT (30 10)"]) 231 | kernel = ga.Kernel.void(array.type) 232 | out = kernel.push(array) 233 | assert out.type == pa.null() 234 | assert len(out) == 1 235 | 236 | array = ga.array(["POINT (30 10)", "POINT (31 11)"]) 237 | kernel = ga.Kernel.void_agg(array.type) 238 | assert kernel.push(array) is None 239 | out = kernel.finish() 240 | assert out.type == pa.null() 241 | assert len(out) == 1 242 | 243 | 244 | def test_kernel_as(): 245 | array = ga.array(["POINT (30 10)"], ga.wkt().with_crs(types.OGC_CRS84)) 246 | kernel = ga.Kernel.as_wkt(array.type) 247 | out = kernel.push(array) 248 | assert out.type.extension_name == "geoarrow.wkt" 249 | assert out.type.crs.to_json_dict() == types.OGC_CRS84.to_json_dict() 250 | assert isinstance(out, _array.GeometryExtensionArray) 251 | 252 | array = ga.array(["POINT (30 10)"], ga.wkt().with_crs(types.OGC_CRS84)) 253 | kernel = ga.Kernel.as_wkb(array.type) 254 | out = kernel.push(array) 255 | assert out.type.extension_name == "geoarrow.wkb" 256 | assert out.type.crs.to_json_dict() == types.OGC_CRS84.to_json_dict() 257 | assert isinstance(out, _array.GeometryExtensionArray) 258 | 259 | if sys.byteorder == "little": 260 | wkb_item = b"\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x3e\x40\x00\x00\x00\x00\x00\x00\x24\x40" 261 | assert out[0].as_py() == wkb_item 262 | 263 | array = ga.array(["POINT (30 10)"], ga.wkt().with_crs(types.OGC_CRS84)) 264 | kernel = ga.Kernel.as_geoarrow(array.type, 1) 265 | out = kernel.push(array) 266 | assert out.type.extension_name == "geoarrow.point" 267 | assert out.type.crs.to_json_dict() == types.OGC_CRS84.to_json_dict() 268 | assert isinstance(out, _array.GeometryExtensionArray) 269 | 270 | 271 | def test_kernel_format(): 272 | array = ga.array(["POINT (30.12345 10.12345)"]) 273 | kernel = ga.Kernel.format_wkt(array.type, precision=3, max_element_size_bytes=15) 274 | 275 | out = kernel.push(array) 276 | assert out.type == pa.string() 277 | assert out[0].as_py() == "POINT (30.123 1" 278 | 279 | 280 | def test_kernel_unique_geometry_types(): 281 | array = ga.array(["POINT (0 1)", "POINT (30 10)", "LINESTRING Z (0 1 2, 3 4 5)"]) 282 | kernel = ga.Kernel.unique_geometry_types_agg(array.type) 283 | kernel.push(array) 284 | out = kernel.finish() 285 | 286 | assert out.type == pa.int32() 287 | out_py = [item.as_py() for item in out] 288 | assert out_py == [1, 1002] 289 | 290 | 291 | def test_kernel_box(): 292 | array = ga.array(["POINT (0 1)", "POINT (30 10)", "LINESTRING EMPTY"]) 293 | kernel = ga.Kernel.box(array.type) 294 | out = kernel.push(array) 295 | 296 | assert out[0].as_py() == {"xmin": 0, "xmax": 0, "ymin": 1, "ymax": 1} 297 | assert out[1].as_py() == {"xmin": 30, "xmax": 30, "ymin": 10, "ymax": 10} 298 | assert out[2].as_py() == {"xmin": inf, "xmax": -inf, "ymin": inf, "ymax": -inf} 299 | 300 | 301 | def test_kernel_box_agg(): 302 | array = ga.array(["POINT (0 1)", "POINT (30 10)", "LINESTRING EMPTY"]) 303 | kernel = ga.Kernel.box_agg(array.type) 304 | kernel.push(array) 305 | out = kernel.finish() 306 | 307 | assert out[0].as_py() == {"xmin": 0, "xmax": 30, "ymin": 1, "ymax": 10} 308 | 309 | 310 | def test_kernel_visit_void(): 311 | array = ga.array(["POINT (30 10)"], ga.wkt()) 312 | kernel = ga.Kernel.visit_void_agg(array.type) 313 | assert kernel.push(array) is None 314 | out = kernel.finish() 315 | assert out.type == pa.null() 316 | assert len(out) == 1 317 | 318 | array = ga.array(["POINT (30 10)", "NOT VALID WKT AT ALL"], ga.wkt()) 319 | kernel = ga.Kernel.visit_void_agg(array.type) 320 | with pytest.raises(lib.GeoArrowCException): 321 | kernel.push(array) 322 | out = kernel.finish() 323 | assert out.type == pa.null() 324 | assert len(out) == 1 325 | 326 | 327 | def test_array_geobuffers(): 328 | arr = ga.as_geoarrow(["POLYGON ((0 0, 1 0, 0 1, 0 0))"]) 329 | bufs = arr.geobuffers() 330 | assert bufs[0] is None 331 | np.testing.assert_array_equal(bufs[1], np.array([0, 1])) 332 | np.testing.assert_array_equal(bufs[2], np.array([0, 4])) 333 | np.testing.assert_array_equal(bufs[3], np.array([0.0, 1.0, 0.0, 0.0])) 334 | np.testing.assert_array_equal(bufs[4], np.array([0.0, 0.0, 1.0, 0.0])) 335 | 336 | 337 | def test_point_array_from_geobuffers(): 338 | arr = ga.point().from_geobuffers( 339 | b"\xff", 340 | np.array([1.0, 2.0, 3.0]), 341 | np.array([4.0, 5.0, 6.0]), 342 | ) 343 | assert len(arr) == 3 344 | assert ga.as_wkt(arr)[2].as_py() == "POINT (3 6)" 345 | 346 | arr = ( 347 | ga.point() 348 | .with_coord_type(ga.CoordType.INTERLEAVED) 349 | .from_geobuffers(None, np.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0])) 350 | ) 351 | assert len(arr) == 3 352 | assert ga.as_wkt(arr)[2].as_py() == "POINT (5 6)" 353 | 354 | 355 | def test_linestring_array_from_geobuffers(): 356 | arr = ga.linestring().from_geobuffers( 357 | None, 358 | np.array([0, 3], dtype=np.int32), 359 | np.array([1.0, 2.0, 3.0]), 360 | np.array([4.0, 5.0, 6.0]), 361 | ) 362 | assert len(arr) == 1 363 | assert ga.as_wkt(arr)[0].as_py() == "LINESTRING (1 4, 2 5, 3 6)" 364 | 365 | 366 | def test_polygon_array_from_geobuffers(): 367 | arr = ga.polygon().from_geobuffers( 368 | None, 369 | np.array([0, 1], dtype=np.int32), 370 | np.array([0, 4], dtype=np.int32), 371 | np.array([1.0, 2.0, 3.0, 1.0]), 372 | np.array([4.0, 5.0, 6.0, 4.0]), 373 | ) 374 | assert len(arr) == 1 375 | assert ga.as_wkt(arr)[0].as_py() == "POLYGON ((1 4, 2 5, 3 6, 1 4))" 376 | 377 | 378 | def test_multipoint_array_from_geobuffers(): 379 | arr = ga.multipoint().from_geobuffers( 380 | None, 381 | np.array([0, 3], dtype=np.int32), 382 | np.array([1.0, 2.0, 3.0]), 383 | np.array([4.0, 5.0, 6.0]), 384 | ) 385 | assert len(arr) == 1 386 | assert ga.as_wkt(arr)[0].as_py() == "MULTIPOINT (1 4, 2 5, 3 6)" 387 | 388 | 389 | def test_multilinestring_array_from_geobuffers(): 390 | arr = ga.multilinestring().from_geobuffers( 391 | None, 392 | np.array([0, 1], dtype=np.int32), 393 | np.array([0, 4], dtype=np.int32), 394 | np.array([1.0, 2.0, 3.0, 1.0]), 395 | np.array([4.0, 5.0, 6.0, 4.0]), 396 | ) 397 | assert len(arr) == 1 398 | assert ga.as_wkt(arr)[0].as_py() == "MULTILINESTRING ((1 4, 2 5, 3 6, 1 4))" 399 | 400 | 401 | def test_multipolygon_array_from_geobuffers(): 402 | arr = ga.multipolygon().from_geobuffers( 403 | None, 404 | np.array([0, 1], dtype=np.int32), 405 | np.array([0, 1], dtype=np.int32), 406 | np.array([0, 4], dtype=np.int32), 407 | np.array([1.0, 2.0, 3.0, 1.0]), 408 | np.array([4.0, 5.0, 6.0, 4.0]), 409 | ) 410 | assert len(arr) == 1 411 | assert ga.as_wkt(arr)[0].as_py() == "MULTIPOLYGON (((1 4, 2 5, 3 6, 1 4)))" 412 | 413 | 414 | def test_box_array_from_geobuffers(): 415 | arr = ( 416 | types.box() 417 | .to_pyarrow() 418 | .from_geobuffers( 419 | b"\xff", 420 | np.array([1.0, 2.0, 3.0]), 421 | np.array([4.0, 5.0, 6.0]), 422 | np.array([7.0, 8.0, 9.0]), 423 | np.array([10.0, 11.0, 12.0]), 424 | ) 425 | ) 426 | assert len(arr) == 3 427 | assert arr[2].bounds == {"xmin": 3.0, "ymin": 6.0, "xmax": 9.0, "ymax": 12.0} 428 | assert "BoxArray" in repr(arr) 429 | assert "'xmin': 3.0" in repr(arr) 430 | 431 | 432 | # Easier to test here because we have actual geoarrow arrays to parse 433 | def test_c_array_view(): 434 | arr = ga.as_geoarrow(["POLYGON ((0 0, 1 0, 0 1, 0 0))"]) 435 | 436 | cschema = lib.SchemaHolder() 437 | arr.type._export_to_c(cschema._addr()) 438 | carray = lib.ArrayHolder() 439 | arr._export_to_c(carray._addr()) 440 | 441 | array_view = lib.CArrayView(carray, cschema) 442 | buffers = array_view.buffers() 443 | assert len(buffers) == 5 444 | 445 | buffer_arrays = [np.array(b) for b in buffers] 446 | 447 | assert buffers[0] is None 448 | 449 | assert buffer_arrays[1].shape == (2,) 450 | assert buffer_arrays[1][0] == 0 451 | assert buffer_arrays[1][1] == 1 452 | 453 | assert buffer_arrays[2].shape == (2,) 454 | assert buffer_arrays[2][0] == 0 455 | assert buffer_arrays[2][1] == 4 456 | 457 | assert buffer_arrays[3].shape == (4,) 458 | assert buffer_arrays[3][1] == 1 459 | assert buffer_arrays[3][3] == 0 460 | 461 | assert buffer_arrays[4].shape == (4,) 462 | assert buffer_arrays[4][1] == 0 463 | assert buffer_arrays[4][3] == 0 464 | 465 | 466 | def test_c_array_view_interleaved(): 467 | arr = ga.array(["POLYGON ((0 0, 1 0, 0 1, 0 0))"]) 468 | arr = ga.as_geoarrow(arr, ga.polygon().with_coord_type(ga.CoordType.INTERLEAVED)) 469 | 470 | cschema = lib.SchemaHolder() 471 | arr.type._export_to_c(cschema._addr()) 472 | carray = lib.ArrayHolder() 473 | arr._export_to_c(carray._addr()) 474 | 475 | array_view = lib.CArrayView(carray, cschema) 476 | buffers = array_view.buffers() 477 | assert len(buffers) == 4 478 | 479 | buffer_arrays = [np.array(b) for b in buffers] 480 | 481 | assert buffers[0] is None 482 | 483 | assert buffer_arrays[1].shape == (2,) 484 | assert buffer_arrays[1][0] == 0 485 | assert buffer_arrays[1][1] == 1 486 | 487 | assert buffer_arrays[2].shape == (2,) 488 | assert buffer_arrays[2][0] == 0 489 | assert buffer_arrays[2][1] == 4 490 | 491 | assert buffer_arrays[3].shape == (8,) 492 | assert buffer_arrays[3][0] == 0 493 | assert buffer_arrays[3][1] == 0 494 | assert buffer_arrays[3][2] == 1 495 | assert buffer_arrays[3][3] == 0 496 | assert buffer_arrays[3][6] == 0 497 | assert buffer_arrays[3][7] == 0 498 | -------------------------------------------------------------------------------- /geoarrow-types/tests/test_type_pyarrow.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pyarrow as pa 3 | import pytest 4 | 5 | import geoarrow.types as gt 6 | from geoarrow.types import type_pyarrow 7 | 8 | 9 | def test_wrap_array_non_exact(): 10 | pa_version_tuple = tuple(int(component) for component in pa.__version__.split(".")) 11 | if pa_version_tuple < (14,): 12 | pytest.skip("wrap_array with non-exact type requires pyarrow >= 14") 13 | 14 | from pyarrow import compute as pc 15 | 16 | storage = pc.make_struct( 17 | pa.array([1.0, 2.0, 3.0]), pa.array([3.0, 4.0, 5.0]), field_names=["x", "y"] 18 | ) 19 | point = gt.point().to_pyarrow() 20 | point_ext = point.wrap_array(storage) 21 | assert point_ext.type.storage_type.field(0).nullable is False 22 | 23 | storage_chunked = pa.chunked_array([storage, storage]) 24 | point_chunked_ext = point.wrap_array(storage_chunked) 25 | assert point_chunked_ext.type.storage_type.field(0).nullable is False 26 | assert point_chunked_ext.num_chunks == 2 27 | 28 | 29 | def test_classes_serialized(): 30 | wkt = gt.wkt().to_pyarrow() 31 | assert isinstance(wkt, type_pyarrow.WktType) 32 | assert wkt.encoding == gt.Encoding.WKT 33 | assert wkt.geometry_type == gt.GeometryType.GEOMETRY 34 | assert wkt.dimensions == gt.Dimensions.UNKNOWN 35 | assert wkt.coord_type == gt.CoordType.UNSPECIFIED 36 | 37 | wkb = gt.wkb().to_pyarrow() 38 | assert isinstance(wkb, type_pyarrow.WkbType) 39 | assert wkb.encoding == gt.Encoding.WKB 40 | assert wkb.geometry_type == gt.GeometryType.GEOMETRY 41 | assert wkb.dimensions == gt.Dimensions.UNKNOWN 42 | assert wkb.coord_type == gt.CoordType.UNSPECIFIED 43 | 44 | 45 | def test_geometry_types(): 46 | xy = pa.struct( 47 | [ 48 | pa.field("x", pa.float64(), nullable=False), 49 | pa.field("y", pa.float64(), nullable=False), 50 | ] 51 | ) 52 | 53 | point = gt.point().to_pyarrow() 54 | assert isinstance(point, type_pyarrow.PointType) 55 | assert point.encoding == gt.Encoding.GEOARROW 56 | assert point.geometry_type == gt.GeometryType.POINT 57 | assert point.storage_type == xy 58 | 59 | linestring = gt.linestring().to_pyarrow() 60 | assert isinstance(linestring, type_pyarrow.LinestringType) 61 | assert linestring.encoding == gt.Encoding.GEOARROW 62 | assert linestring.geometry_type == gt.GeometryType.LINESTRING 63 | assert linestring.storage_type == pa.list_(pa.field("vertices", xy, False)) 64 | 65 | polygon = gt.polygon().to_pyarrow() 66 | assert isinstance(polygon, type_pyarrow.PolygonType) 67 | assert polygon.encoding == gt.Encoding.GEOARROW 68 | assert polygon.geometry_type == gt.GeometryType.POLYGON 69 | assert polygon.storage_type == pa.list_( 70 | pa.field("rings", pa.list_(pa.field("vertices", xy, False)), False) 71 | ) 72 | 73 | multipoint = gt.multipoint().to_pyarrow() 74 | assert isinstance(multipoint, type_pyarrow.MultiPointType) 75 | assert multipoint.encoding == gt.Encoding.GEOARROW 76 | assert multipoint.geometry_type == gt.GeometryType.MULTIPOINT 77 | assert multipoint.storage_type == pa.list_(pa.field("points", xy, False)) 78 | 79 | multilinestring = gt.multilinestring().to_pyarrow() 80 | assert isinstance(multilinestring, type_pyarrow.MultiLinestringType) 81 | assert multilinestring.encoding == gt.Encoding.GEOARROW 82 | assert multilinestring.geometry_type == gt.GeometryType.MULTILINESTRING 83 | assert polygon.storage_type == pa.list_( 84 | pa.field("linestrings", pa.list_(pa.field("vertices", xy, False)), False) 85 | ) 86 | 87 | multipolygon = gt.multipolygon().to_pyarrow() 88 | assert isinstance(multipolygon, type_pyarrow.MultiPolygonType) 89 | assert multipolygon.encoding == gt.Encoding.GEOARROW 90 | assert multipolygon.geometry_type == gt.GeometryType.MULTIPOLYGON 91 | assert multipolygon.storage_type == pa.list_( 92 | pa.field( 93 | "polygons", 94 | pa.list_( 95 | pa.field("rings", pa.list_(pa.field("vertices", xy, False)), False) 96 | ), 97 | False, 98 | ) 99 | ) 100 | 101 | with pytest.raises(ValueError, match="Can't compute extension name"): 102 | gt.type_spec(gt.GeometryType.GEOMETRYCOLLECTION).to_pyarrow() 103 | 104 | 105 | def test_interleaved_dimensions(): 106 | point_xy = gt.point(dimensions="xy", coord_type="interleaved").to_pyarrow() 107 | assert point_xy.coord_type == gt.CoordType.INTERLEAVED 108 | assert point_xy.dimensions == gt.Dimensions.XY 109 | assert point_xy.storage_type.field(0).name == "xy" 110 | 111 | point_xyz = gt.point(dimensions="xyz", coord_type="interleaved").to_pyarrow() 112 | assert point_xyz.coord_type == gt.CoordType.INTERLEAVED 113 | assert point_xyz.dimensions == gt.Dimensions.XYZ 114 | assert point_xyz.storage_type.field(0).name == "xyz" 115 | 116 | point_xym = gt.point(dimensions="xym", coord_type="interleaved").to_pyarrow() 117 | assert point_xym.coord_type == gt.CoordType.INTERLEAVED 118 | assert point_xym.dimensions == gt.Dimensions.XYM 119 | assert point_xym.storage_type.field(0).name == "xym" 120 | 121 | point_xyzm = gt.point(dimensions="xyzm", coord_type="interleaved").to_pyarrow() 122 | assert point_xyzm.coord_type == gt.CoordType.INTERLEAVED 123 | assert point_xyzm.dimensions == gt.Dimensions.XYZM 124 | assert point_xyzm.storage_type.field(0).name == "xyzm" 125 | 126 | 127 | def test_separated_dimensions(): 128 | point_xy = gt.point(dimensions="xy", coord_type="separated").to_pyarrow() 129 | assert point_xy.coord_type == gt.CoordType.SEPARATED 130 | assert point_xy.dimensions == gt.Dimensions.XY 131 | storage_names = [ 132 | point_xy.storage_type.field(i).name 133 | for i in range(point_xy.storage_type.num_fields) 134 | ] 135 | assert storage_names == ["x", "y"] 136 | 137 | point_xyz = gt.point(dimensions="xyz", coord_type="separated").to_pyarrow() 138 | assert point_xy.coord_type == gt.CoordType.SEPARATED 139 | assert point_xyz.dimensions == gt.Dimensions.XYZ 140 | storage_names = [ 141 | point_xyz.storage_type.field(i).name 142 | for i in range(point_xyz.storage_type.num_fields) 143 | ] 144 | assert storage_names == ["x", "y", "z"] 145 | 146 | point_xym = gt.point(dimensions="xym", coord_type="separated").to_pyarrow() 147 | assert point_xy.coord_type == gt.CoordType.SEPARATED 148 | assert point_xym.dimensions == gt.Dimensions.XYM 149 | storage_names = [ 150 | point_xym.storage_type.field(i).name 151 | for i in range(point_xym.storage_type.num_fields) 152 | ] 153 | assert storage_names == ["x", "y", "m"] 154 | 155 | point_xyzm = gt.point(dimensions="xyzm", coord_type="separated").to_pyarrow() 156 | assert point_xy.coord_type == gt.CoordType.SEPARATED 157 | assert point_xyzm.dimensions == gt.Dimensions.XYZM 158 | storage_names = [ 159 | point_xyzm.storage_type.field(i).name 160 | for i in range(point_xyzm.storage_type.num_fields) 161 | ] 162 | assert storage_names == ["x", "y", "z", "m"] 163 | 164 | 165 | def test_deserialize_infer_encoding(): 166 | extension_type = type_pyarrow._deserialize_storage(pa.utf8()) 167 | assert extension_type.encoding == gt.Encoding.WKT 168 | 169 | extension_type = type_pyarrow._deserialize_storage(pa.large_utf8()) 170 | assert extension_type.encoding == gt.Encoding.LARGE_WKT 171 | 172 | extension_type = type_pyarrow._deserialize_storage(pa.binary()) 173 | assert extension_type.encoding == gt.Encoding.WKB 174 | 175 | extension_type = type_pyarrow._deserialize_storage(pa.large_binary()) 176 | assert extension_type.encoding == gt.Encoding.LARGE_WKB 177 | 178 | # Should fail if given if given a non-sensical type 179 | with pytest.raises(ValueError, match="Can't guess encoding from type nesting"): 180 | type_pyarrow._deserialize_storage(pa.float64()) 181 | 182 | # ...and slightly differently if it uses a type that is never 183 | # used in any geoarrow storage 184 | with pytest.raises( 185 | ValueError, match="Type int8 is not a valid GeoArrow type component" 186 | ): 187 | type_pyarrow._deserialize_storage(pa.int8()) 188 | 189 | 190 | def test_deserialize_infer_geometry_type(): 191 | # We can infer the required information for points and multipolygons 192 | # based purely on the level of nesting. 193 | point = pa.struct({"x": pa.float64(), "y": pa.float64()}) 194 | multipolygon = pa.list_(pa.list_(pa.list_(point))) 195 | interleaved_point = pa.list_(pa.float64(), list_size=2) 196 | interleaved_multipolygon = pa.list_(pa.list_(pa.list_(interleaved_point))) 197 | 198 | extension_type = type_pyarrow._deserialize_storage(point) 199 | assert extension_type.encoding == gt.Encoding.GEOARROW 200 | assert extension_type.geometry_type == gt.GeometryType.POINT 201 | assert extension_type.coord_type == gt.CoordType.SEPARATED 202 | 203 | extension_type = type_pyarrow._deserialize_storage(multipolygon) 204 | assert extension_type.encoding == gt.Encoding.GEOARROW 205 | assert extension_type.geometry_type == gt.GeometryType.MULTIPOLYGON 206 | assert extension_type.coord_type == gt.CoordType.SEPARATED 207 | 208 | extension_type = type_pyarrow._deserialize_storage(interleaved_point) 209 | assert extension_type.encoding == gt.Encoding.GEOARROW 210 | assert extension_type.geometry_type == gt.GeometryType.POINT 211 | assert extension_type.coord_type == gt.CoordType.INTERLEAVED 212 | 213 | extension_type = type_pyarrow._deserialize_storage(interleaved_multipolygon) 214 | assert extension_type.encoding == gt.Encoding.GEOARROW 215 | assert extension_type.geometry_type == gt.GeometryType.MULTIPOLYGON 216 | assert extension_type.coord_type == gt.CoordType.INTERLEAVED 217 | 218 | # extension name would be required for other levels of nesting 219 | with pytest.raises(ValueError, match="Can't compute extension name"): 220 | type_pyarrow._deserialize_storage(pa.list_(point)) 221 | 222 | # If we manually specify the wrong extension name, this should error 223 | with pytest.raises(ValueError, match="GeometryType is overspecified"): 224 | type_pyarrow._deserialize_storage(point, "geoarrow.linestring") 225 | 226 | 227 | def test_deserialize_infer_dimensions_separated(): 228 | extension_type = type_pyarrow._deserialize_storage( 229 | pa.struct({d: pa.float64() for d in "xy"}) 230 | ) 231 | assert extension_type.dimensions == gt.Dimensions.XY 232 | 233 | extension_type = type_pyarrow._deserialize_storage( 234 | pa.struct({d: pa.float64() for d in "xyz"}) 235 | ) 236 | assert extension_type.dimensions == gt.Dimensions.XYZ 237 | 238 | extension_type = type_pyarrow._deserialize_storage( 239 | pa.struct({d: pa.float64() for d in "xym"}) 240 | ) 241 | assert extension_type.dimensions == gt.Dimensions.XYM 242 | 243 | extension_type = type_pyarrow._deserialize_storage( 244 | pa.struct({d: pa.float64() for d in "xyzm"}) 245 | ) 246 | assert extension_type.dimensions == gt.Dimensions.XYZM 247 | 248 | # Struct coordinates should never have dimensions inferred from number of children 249 | with pytest.raises( 250 | ValueError, match="Can't infer dimensions from coord field names" 251 | ): 252 | type_pyarrow._deserialize_storage(pa.struct({d: pa.float64() for d in "ab"})) 253 | 254 | 255 | def test_deserialize_infer_dimensions_interleaved(): 256 | extension_type = type_pyarrow._deserialize_storage( 257 | pa.list_(pa.float64(), list_size=2) 258 | ) 259 | assert extension_type.dimensions == gt.Dimensions.XY 260 | 261 | extension_type = type_pyarrow._deserialize_storage( 262 | pa.list_(pa.float64(), list_size=4) 263 | ) 264 | assert extension_type.dimensions == gt.Dimensions.XYZM 265 | 266 | # Fixed-size list should never have dimensions inferred where this would be 267 | # ambiguous. 268 | with pytest.raises( 269 | ValueError, match="Can't infer dimensions from coord field names" 270 | ): 271 | type_pyarrow._deserialize_storage(pa.list_(pa.float64(), list_size=3)) 272 | 273 | # ...but this should be able to be specified using the field name 274 | extension_type = type_pyarrow._deserialize_storage( 275 | pa.list_(pa.field("xyz", pa.float64()), list_size=3) 276 | ) 277 | assert extension_type.dimensions == gt.Dimensions.XYZ 278 | 279 | extension_type = type_pyarrow._deserialize_storage( 280 | pa.list_(pa.field("xym", pa.float64()), list_size=3) 281 | ) 282 | assert extension_type.dimensions == gt.Dimensions.XYM 283 | 284 | # If the number of inferred dimensions does not match the number of actual dimensions, 285 | # this should error 286 | with pytest.raises(ValueError, match="Expected 4 dimensions but got"): 287 | type_pyarrow._deserialize_storage( 288 | pa.list_(pa.field("xyz", pa.float64()), list_size=4) 289 | ) 290 | 291 | 292 | def test_geometry_union_type(): 293 | geometry = gt.type_spec(gt.Encoding.GEOARROW, gt.GeometryType.GEOMETRY).to_pyarrow() 294 | assert isinstance(geometry, type_pyarrow.GeometryUnionType) 295 | assert geometry.encoding == gt.Encoding.GEOARROW 296 | assert geometry.geometry_type == gt.GeometryType.GEOMETRY 297 | 298 | 299 | def test_geometry_collection_union_type(): 300 | geometry = gt.type_spec( 301 | gt.Encoding.GEOARROW, gt.GeometryType.GEOMETRYCOLLECTION 302 | ).to_pyarrow() 303 | assert isinstance(geometry, type_pyarrow.GeometryCollectionUnionType) 304 | assert geometry.encoding == gt.Encoding.GEOARROW 305 | assert geometry.geometry_type == gt.GeometryType.GEOMETRYCOLLECTION 306 | 307 | 308 | def test_box_array_from_geobuffers(): 309 | pa_type = gt.box(dimensions=gt.Dimensions.XY).to_pyarrow() 310 | arr = pa_type.from_geobuffers( 311 | b"\xff", 312 | np.array([1.0, 2.0, 3.0]), 313 | np.array([4.0, 5.0, 6.0]), 314 | np.array([7.0, 8.0, 9.0]), 315 | np.array([10.0, 11.0, 12.0]), 316 | ) 317 | assert len(arr) == 3 318 | assert arr.type == pa_type 319 | assert arr.storage == pa.array( 320 | [ 321 | {"xmin": 1.0, "ymin": 4.0, "xmax": 7.0, "ymax": 10.0}, 322 | {"xmin": 2.0, "ymin": 5.0, "xmax": 8.0, "ymax": 11.0}, 323 | {"xmin": 3.0, "ymin": 6.0, "xmax": 9.0, "ymax": 12.0}, 324 | ], 325 | pa_type.storage_type, 326 | ) 327 | 328 | 329 | def test_point_array_from_geobuffers(): 330 | pa_type = gt.point(dimensions=gt.Dimensions.XYZM).to_pyarrow() 331 | arr = pa_type.from_geobuffers( 332 | b"\xff", 333 | np.array([1.0, 2.0, 3.0]), 334 | np.array([4.0, 5.0, 6.0]), 335 | np.array([7.0, 8.0, 9.0]), 336 | np.array([10.0, 11.0, 12.0]), 337 | ) 338 | assert len(arr) == 3 339 | assert arr.type == pa_type 340 | assert arr.storage == pa.array( 341 | [ 342 | {"x": 1.0, "y": 4.0, "z": 7.0, "m": 10.0}, 343 | {"x": 2.0, "y": 5.0, "z": 8.0, "m": 11.0}, 344 | {"x": 3.0, "y": 6.0, "z": 9.0, "m": 12.0}, 345 | ], 346 | pa_type.storage_type, 347 | ) 348 | 349 | pa_type = gt.point(coord_type=gt.CoordType.INTERLEAVED).to_pyarrow() 350 | arr = pa_type.from_geobuffers(None, np.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0])) 351 | assert len(arr) == 3 352 | assert arr.storage == pa.array( 353 | [[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]], pa_type.storage_type 354 | ) 355 | 356 | 357 | @pytest.mark.parametrize( 358 | "pa_type", [gt.linestring().to_pyarrow(), gt.multipoint().to_pyarrow()] 359 | ) 360 | def test_linestringish_array_from_geobuffers(pa_type): 361 | arr = pa_type.from_geobuffers( 362 | b"\xff", 363 | np.array([0, 4], np.int32), 364 | np.array([0.0, 1.0, 0.0, 0.0]), 365 | np.array([0.0, 0.0, 1.0, 0.0]), 366 | ) 367 | assert len(arr) == 1 368 | assert arr.storage == pa.array( 369 | [ 370 | [ 371 | {"x": 0.0, "y": 0.0}, 372 | {"x": 1.0, "y": 0.0}, 373 | {"x": 0.0, "y": 1.0}, 374 | {"x": 0.0, "y": 0.0}, 375 | ] 376 | ], 377 | pa_type.storage_type, 378 | ) 379 | 380 | 381 | @pytest.mark.parametrize( 382 | "pa_type", [gt.polygon().to_pyarrow(), gt.multilinestring().to_pyarrow()] 383 | ) 384 | def test_polygonish_array_from_geobuffers(pa_type): 385 | arr = pa_type.from_geobuffers( 386 | b"\xff", 387 | np.array([0, 1], np.int32), 388 | np.array([0, 4], np.int32), 389 | np.array([0.0, 1.0, 0.0, 0.0]), 390 | np.array([0.0, 0.0, 1.0, 0.0]), 391 | ) 392 | assert len(arr) == 1 393 | assert arr.storage == pa.array( 394 | [ 395 | [ 396 | [ 397 | {"x": 0.0, "y": 0.0}, 398 | {"x": 1.0, "y": 0.0}, 399 | {"x": 0.0, "y": 1.0}, 400 | {"x": 0.0, "y": 0.0}, 401 | ] 402 | ] 403 | ], 404 | pa_type.storage_type, 405 | ) 406 | 407 | 408 | def test_multipolygon_array_from_geobuffers(): 409 | pa_type = gt.multipolygon().to_pyarrow() 410 | arr = pa_type.from_geobuffers( 411 | b"\xff", 412 | np.array([0, 1], np.int32), 413 | np.array([0, 1], np.int32), 414 | np.array([0, 4], np.int32), 415 | np.array([0.0, 1.0, 0.0, 0.0]), 416 | np.array([0.0, 0.0, 1.0, 0.0]), 417 | ) 418 | assert len(arr) == 1 419 | assert arr.storage == pa.array( 420 | [ 421 | [ 422 | [ 423 | [ 424 | {"x": 0.0, "y": 0.0}, 425 | {"x": 1.0, "y": 0.0}, 426 | {"x": 0.0, "y": 1.0}, 427 | {"x": 0.0, "y": 0.0}, 428 | ] 429 | ] 430 | ] 431 | ], 432 | pa_type.storage_type, 433 | ) 434 | 435 | 436 | @pytest.mark.parametrize( 437 | "spec", 438 | [ 439 | # Serialized types 440 | gt.wkt(), 441 | gt.large_wkt(), 442 | gt.wkb(), 443 | gt.large_wkb(), 444 | gt.wkt_view(), 445 | gt.wkb_view(), 446 | # Geometry types 447 | gt.box(), 448 | gt.point(), 449 | gt.linestring(), 450 | gt.polygon(), 451 | gt.multipoint(), 452 | gt.multilinestring(), 453 | gt.multipolygon(), 454 | # All dimensions, separated coords 455 | gt.point(dimensions="xy", coord_type="separated"), 456 | gt.point(dimensions="xyz", coord_type="separated"), 457 | gt.point(dimensions="xym", coord_type="separated"), 458 | gt.point(dimensions="xyzm", coord_type="separated"), 459 | # All dimensions, interleaved coords 460 | gt.point(dimensions="xy", coord_type="interleaved"), 461 | gt.point(dimensions="xyz", coord_type="interleaved"), 462 | gt.point(dimensions="xym", coord_type="interleaved"), 463 | gt.point(dimensions="xyzm", coord_type="interleaved"), 464 | # Box with all dimensions 465 | gt.box(dimensions="xy"), 466 | gt.box(dimensions="xyz"), 467 | gt.box(dimensions="xym"), 468 | gt.box(dimensions="xyzm"), 469 | # Union types 470 | gt.type_spec(gt.Encoding.GEOARROW, gt.GeometryType.GEOMETRY), 471 | gt.type_spec(gt.Encoding.GEOARROW, gt.GeometryType.GEOMETRYCOLLECTION), 472 | ], 473 | ) 474 | def test_roundtrip_extension_type(spec): 475 | if not hasattr(pa, "binary_view") and spec.encoding in ( 476 | gt.Encoding.WKB_VIEW, 477 | gt.Encoding.WKT_VIEW, 478 | ): 479 | pytest.skip("binary_view/string_view requires pyarrow >= 14") 480 | 481 | extension_type = type_pyarrow.extension_type(spec) 482 | serialized = extension_type.__arrow_ext_serialize__() 483 | extension_type2 = type_pyarrow._deserialize_storage( 484 | extension_type.storage_type, extension_type._extension_name, serialized 485 | ) 486 | assert extension_type2 == extension_type 487 | 488 | 489 | def test_register_extension_type(): 490 | pa_version_tuple = tuple(int(component) for component in pa.__version__.split(".")) 491 | if pa_version_tuple < (14,): 492 | pytest.skip("Can't test extension type registration pyarrow < 14") 493 | 494 | with type_pyarrow.registered_extension_types(): 495 | schema_capsule = gt.point().to_pyarrow().__arrow_c_schema__() 496 | pa_type = pa.DataType._import_from_c_capsule(schema_capsule) 497 | assert isinstance(pa_type, type_pyarrow.GeometryExtensionType) 498 | 499 | with type_pyarrow.unregistered_extension_types(): 500 | schema_capsule = gt.point().to_pyarrow().__arrow_c_schema__() 501 | pa_type = pa.DataType._import_from_c_capsule(schema_capsule) 502 | assert not isinstance(pa_type, type_pyarrow.GeometryExtensionType) 503 | --------------------------------------------------------------------------------