├── pyogrio ├── _io.pxd ├── tests │ ├── __init__.py │ ├── fixtures │ │ ├── naturalearth_lowres │ │ │ ├── naturalearth_lowres.cpg │ │ │ ├── naturalearth_lowres.dbf │ │ │ ├── naturalearth_lowres.shp │ │ │ ├── naturalearth_lowres.shx │ │ │ └── naturalearth_lowres.prj │ │ ├── curve.gpkg │ │ ├── line_zm.gpkg │ │ ├── sample.osm.pbf │ │ ├── curvepolygon.gpkg │ │ ├── multisurface.gpkg │ │ ├── test_gpkg_nulls.gpkg │ │ ├── list_field_values_file.parquet │ │ ├── list_nested_struct_file.parquet │ │ └── README.md │ ├── test_util.py │ └── test_path.py ├── _geometry.pxd ├── _vsi.pxd ├── _err.pxd ├── errors.py ├── __init__.py ├── _compat.py ├── _env.py ├── arrow_bridge.h ├── _geometry.pyx ├── util.py ├── _vsi.pyx ├── _ogr.pyx ├── _err.pyx └── core.py ├── .gitattributes ├── ci ├── envs │ ├── minimal.yml │ ├── latest.yml │ └── nightly-deps.yml ├── vcpkg-custom-ports │ └── zlib │ │ ├── usage │ │ ├── vcpkg.json │ │ ├── 0002-skip-building-examples.patch │ │ ├── vcpkg-cmake-wrapper.cmake │ │ ├── LICENSE │ │ └── portfile.cmake ├── custom-triplets │ ├── x64-windows-dynamic-release.cmake │ ├── x64-linux-dynamic-release.cmake │ ├── arm64-linux-dynamic-release.cmake │ ├── x64-osx-dynamic-release.cmake │ └── arm64-osx-dynamic-release.cmake ├── requirements-wheel-test.txt ├── vcpkg-manylinux2014.json ├── vcpkg.json ├── manylinux_2_28_x86_64-vcpkg-gdal.Dockerfile ├── manylinux_2_28_aarch64-vcpkg-gdal.Dockerfile └── manylinux2014_x86_64-vcpkg-gdal.Dockerfile ├── MANIFEST.in ├── setup.cfg ├── .github ├── dependabot.yml └── workflows │ ├── lint.yml │ ├── tests-conda.yml │ ├── docker-gdal.yml │ └── release.yml ├── docs ├── environment.yml ├── source │ ├── api.rst │ ├── errors.md │ ├── index.md │ ├── concepts.md │ ├── supported_formats.md │ ├── about.md │ ├── conf.py │ ├── known_issues.md │ └── install.md ├── Makefile └── make.bat ├── readthedocs.yml ├── .pre-commit-config.yaml ├── environment-dev.yml ├── .gitignore ├── LICENSE ├── benchmarks ├── README.md ├── conftest.py ├── test_core_benchmarks.py ├── test_raw_io_benchmarks.py └── test_io_benchmarks_geopandas.py ├── README.md ├── SECURITY.md ├── pyproject.toml ├── setup.py └── CHANGES.md /pyogrio/_io.pxd: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pyogrio/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | pyogrio/_version.py export-subst 2 | -------------------------------------------------------------------------------- /pyogrio/tests/fixtures/naturalearth_lowres/naturalearth_lowres.cpg: -------------------------------------------------------------------------------- 1 | ISO-8859-1 -------------------------------------------------------------------------------- /pyogrio/tests/fixtures/curve.gpkg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geopandas/pyogrio/main/pyogrio/tests/fixtures/curve.gpkg -------------------------------------------------------------------------------- /pyogrio/tests/fixtures/line_zm.gpkg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geopandas/pyogrio/main/pyogrio/tests/fixtures/line_zm.gpkg -------------------------------------------------------------------------------- /pyogrio/tests/fixtures/sample.osm.pbf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geopandas/pyogrio/main/pyogrio/tests/fixtures/sample.osm.pbf -------------------------------------------------------------------------------- /pyogrio/tests/fixtures/curvepolygon.gpkg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geopandas/pyogrio/main/pyogrio/tests/fixtures/curvepolygon.gpkg -------------------------------------------------------------------------------- /pyogrio/tests/fixtures/multisurface.gpkg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geopandas/pyogrio/main/pyogrio/tests/fixtures/multisurface.gpkg -------------------------------------------------------------------------------- /ci/envs/minimal.yml: -------------------------------------------------------------------------------- 1 | name: test 2 | channels: 3 | - conda-forge 4 | dependencies: 5 | - numpy 6 | - libgdal-core 7 | - pytest 8 | -------------------------------------------------------------------------------- /pyogrio/tests/fixtures/test_gpkg_nulls.gpkg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geopandas/pyogrio/main/pyogrio/tests/fixtures/test_gpkg_nulls.gpkg -------------------------------------------------------------------------------- /pyogrio/tests/fixtures/list_field_values_file.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geopandas/pyogrio/main/pyogrio/tests/fixtures/list_field_values_file.parquet -------------------------------------------------------------------------------- /pyogrio/tests/fixtures/list_nested_struct_file.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geopandas/pyogrio/main/pyogrio/tests/fixtures/list_nested_struct_file.parquet -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include pyogrio/_version.py 2 | include pyogrio/*.pyx pyogrio/*.pxd 3 | include pyogrio/arrow_bridge.h 4 | exclude pyogrio/*.c 5 | recursive-include pyogrio/tests/fixtures * -------------------------------------------------------------------------------- /pyogrio/_geometry.pxd: -------------------------------------------------------------------------------- 1 | from pyogrio._ogr cimport * 2 | 3 | cdef str get_geometry_type(void *ogr_layer) 4 | cdef OGRwkbGeometryType get_geometry_type_code(str geometry_type) except * 5 | -------------------------------------------------------------------------------- /pyogrio/tests/fixtures/naturalearth_lowres/naturalearth_lowres.dbf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geopandas/pyogrio/main/pyogrio/tests/fixtures/naturalearth_lowres/naturalearth_lowres.dbf -------------------------------------------------------------------------------- /pyogrio/tests/fixtures/naturalearth_lowres/naturalearth_lowres.shp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geopandas/pyogrio/main/pyogrio/tests/fixtures/naturalearth_lowres/naturalearth_lowres.shp -------------------------------------------------------------------------------- /pyogrio/tests/fixtures/naturalearth_lowres/naturalearth_lowres.shx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geopandas/pyogrio/main/pyogrio/tests/fixtures/naturalearth_lowres/naturalearth_lowres.shx -------------------------------------------------------------------------------- /ci/vcpkg-custom-ports/zlib/usage: -------------------------------------------------------------------------------- 1 | The package zlib is compatible with built-in CMake targets: 2 | 3 | find_package(ZLIB REQUIRED) 4 | target_link_libraries(main PRIVATE ZLIB::ZLIB) 5 | -------------------------------------------------------------------------------- /ci/custom-triplets/x64-windows-dynamic-release.cmake: -------------------------------------------------------------------------------- 1 | set(VCPKG_TARGET_ARCHITECTURE x64) 2 | set(VCPKG_CRT_LINKAGE dynamic) 3 | set(VCPKG_LIBRARY_LINKAGE dynamic) 4 | set(VCPKG_BUILD_TYPE release) 5 | -------------------------------------------------------------------------------- /ci/vcpkg-custom-ports/zlib/vcpkg.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "zlib", 3 | "version": "1.2.5.2", 4 | "port-version": 0, 5 | "description": "A compression library", 6 | "homepage": "https://www.zlib.net/" 7 | } 8 | -------------------------------------------------------------------------------- /pyogrio/tests/fixtures/naturalearth_lowres/naturalearth_lowres.prj: -------------------------------------------------------------------------------- 1 | GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137,298.257223563]],PRIMEM["Greenwich",0],UNIT["Degree",0.017453292519943295]] -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [tool:pytest] 2 | testpaths = pyogrio/tests 3 | 4 | markers = 5 | network: marks tests that require a network connection 6 | requires_arrow_write_api: marks tests that require the Arrow write API 7 | -------------------------------------------------------------------------------- /ci/envs/latest.yml: -------------------------------------------------------------------------------- 1 | name: test 2 | channels: 3 | - conda-forge 4 | dependencies: 5 | - numpy 6 | - libgdal-core 7 | - libgdal-arrow-parquet 8 | - pytest 9 | - shapely>=2 10 | - geopandas-base 11 | - pyarrow-core 12 | -------------------------------------------------------------------------------- /pyogrio/_vsi.pxd: -------------------------------------------------------------------------------- 1 | cdef tuple get_ogr_vsimem_write_path(object path_or_fp, str driver) 2 | cdef str read_buffer_to_vsimem(bytes bytes_buffer) 3 | cdef read_vsimem_to_buffer(str path, object out_buffer) 4 | cpdef vsimem_rmtree_toplevel(str path) 5 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | # Maintain dependencies for GitHub Actions 4 | - package-ecosystem: "github-actions" 5 | directory: "/" 6 | schedule: 7 | # Check for updates to GitHub Actions every week 8 | interval: "weekly" 9 | -------------------------------------------------------------------------------- /docs/environment.yml: -------------------------------------------------------------------------------- 1 | name: pyogrio 2 | channels: 3 | - conda-forge 4 | dependencies: 5 | - python==3.10.* 6 | - gdal 7 | - numpy==1.24.* 8 | - numpydoc==1.1.* 9 | - Cython==0.29.* 10 | - docutils==0.16.* 11 | - myst-parser 12 | - pip 13 | - pip: 14 | - sphinx_rtd_theme 15 | -------------------------------------------------------------------------------- /readthedocs.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | formats: [] 3 | 4 | build: 5 | os: "ubuntu-22.04" 6 | tools: 7 | python: "mambaforge-4.10" 8 | 9 | sphinx: 10 | configuration: docs/source/conf.py 11 | 12 | conda: 13 | environment: docs/environment.yml 14 | 15 | python: 16 | install: 17 | - path: . 18 | -------------------------------------------------------------------------------- /ci/custom-triplets/x64-linux-dynamic-release.cmake: -------------------------------------------------------------------------------- 1 | set(VCPKG_TARGET_ARCHITECTURE x64) 2 | set(VCPKG_CRT_LINKAGE dynamic) 3 | set(VCPKG_CMAKE_SYSTEM_NAME Linux) 4 | set(VCPKG_BUILD_TYPE release) 5 | 6 | set(VCPKG_LIBRARY_LINKAGE static) 7 | if(PORT MATCHES "gdal") 8 | set(VCPKG_LIBRARY_LINKAGE dynamic) 9 | endif() 10 | -------------------------------------------------------------------------------- /ci/custom-triplets/arm64-linux-dynamic-release.cmake: -------------------------------------------------------------------------------- 1 | set(VCPKG_TARGET_ARCHITECTURE arm64) 2 | set(VCPKG_CRT_LINKAGE dynamic) 3 | set(VCPKG_CMAKE_SYSTEM_NAME Linux) 4 | set(VCPKG_BUILD_TYPE release) 5 | 6 | set(VCPKG_LIBRARY_LINKAGE static) 7 | if(PORT MATCHES "gdal") 8 | set(VCPKG_LIBRARY_LINKAGE dynamic) 9 | endif() 10 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/astral-sh/ruff-pre-commit 3 | rev: "v0.11.5" 4 | hooks: 5 | - id: ruff-format 6 | - id: ruff 7 | - repo: https://github.com/MarcoGorelli/cython-lint 8 | rev: v0.16.6 9 | hooks: 10 | - id: cython-lint 11 | - id: double-quote-cython-strings -------------------------------------------------------------------------------- /environment-dev.yml: -------------------------------------------------------------------------------- 1 | name: pyogrio-dev 2 | channels: 3 | - conda-forge 4 | dependencies: 5 | # Required 6 | - libgdal-core 7 | - numpy 8 | - shapely>=2 9 | # Optional 10 | - geopandas-base 11 | - pyarrow 12 | - pyproj 13 | # Specific for dev 14 | - cython 15 | - pre-commit 16 | - pytest 17 | - ruff==0.11.5 18 | - versioneer 19 | -------------------------------------------------------------------------------- /ci/custom-triplets/x64-osx-dynamic-release.cmake: -------------------------------------------------------------------------------- 1 | set(VCPKG_TARGET_ARCHITECTURE x64) 2 | set(VCPKG_CRT_LINKAGE dynamic) 3 | set(VCPKG_CMAKE_SYSTEM_NAME Darwin) 4 | set(VCPKG_BUILD_TYPE release) 5 | set(VCPKG_OSX_DEPLOYMENT_TARGET "12.0") 6 | 7 | set(VCPKG_LIBRARY_LINKAGE static) 8 | if(PORT MATCHES "gdal") 9 | set(VCPKG_LIBRARY_LINKAGE dynamic) 10 | endif() 11 | -------------------------------------------------------------------------------- /ci/custom-triplets/arm64-osx-dynamic-release.cmake: -------------------------------------------------------------------------------- 1 | set(VCPKG_TARGET_ARCHITECTURE arm64) 2 | set(VCPKG_CRT_LINKAGE dynamic) 3 | set(VCPKG_CMAKE_SYSTEM_NAME Darwin) 4 | set(VCPKG_OSX_ARCHITECTURES arm64) 5 | set(VCPKG_BUILD_TYPE release) 6 | set(VCPKG_OSX_DEPLOYMENT_TARGET "12.0") 7 | 8 | set(VCPKG_LIBRARY_LINKAGE static) 9 | if(PORT MATCHES "gdal") 10 | set(VCPKG_LIBRARY_LINKAGE dynamic) 11 | endif() 12 | -------------------------------------------------------------------------------- /.github/workflows/lint.yml: -------------------------------------------------------------------------------- 1 | name: Linting 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | pull_request: 8 | workflow_dispatch: 9 | 10 | jobs: 11 | Linting: 12 | runs-on: ubuntu-latest 13 | 14 | steps: 15 | - uses: actions/checkout@v6 16 | - uses: actions/setup-python@v6 17 | with: 18 | python-version: "3.11" 19 | - uses: pre-commit/action@v3.0.1 20 | -------------------------------------------------------------------------------- /pyogrio/_err.pxd: -------------------------------------------------------------------------------- 1 | cdef object check_last_error() 2 | cdef int check_int(int retval) except -1 3 | cdef void *check_pointer(void *ptr) except NULL 4 | 5 | cdef class ErrorHandler: 6 | cdef object error_stack 7 | cdef int check_int(self, int retval, bint squash_errors) except -1 8 | cdef void *check_pointer(self, void *ptr, bint squash_errors) except NULL 9 | cdef void _handle_error_stack(self, bint squash_errors) 10 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.c 2 | *.pyc 3 | *.pyd 4 | *.so 5 | *.dll 6 | 7 | */__pycache__/* 8 | .benchmarks/* 9 | .coverage 10 | .pytest_cache/* 11 | pip-wheel-metadata/* 12 | 13 | Pipfile.lock 14 | 15 | benchmarks/fixtures/* 16 | 17 | .libs 18 | 19 | docs/build 20 | 21 | # Distribution / packaging 22 | .Python 23 | env/ 24 | venv*/ 25 | build/ 26 | dist/ 27 | eggs/ 28 | .eggs/ 29 | lib/ 30 | lib64/ 31 | parts/ 32 | sdist/ 33 | var/ 34 | wheels/ 35 | *.egg-info/ 36 | .installed.cfg 37 | *.egg 38 | 39 | .ruff_cache -------------------------------------------------------------------------------- /ci/envs/nightly-deps.yml: -------------------------------------------------------------------------------- 1 | name: test-nightly-deps 2 | channels: 3 | - gdal-master 4 | - conda-forge 5 | dependencies: 6 | - libgdal-core 7 | - pytest 8 | - pandas 9 | - pip 10 | - pip: 11 | - --pre --prefer-binary --index-url https://pypi.anaconda.org/scientific-python-nightly-wheels/simple --extra-index-url https://pypi.fury.io/arrow-nightlies/ --extra-index-url https://pypi.org/simple 12 | - geopandas 13 | - numpy>=2.0.0.dev 14 | - shapely 15 | # - pandas 16 | - pyarrow -------------------------------------------------------------------------------- /ci/requirements-wheel-test.txt: -------------------------------------------------------------------------------- 1 | # pytest 8.0 gives permission error (https://github.com/pytest-dev/pytest/issues/11904) 2 | pytest<8 3 | # dependencies of geopandas (installed separately with --no-deps to avoid fiona) 4 | pandas 5 | pyproj ; (python_version < '3.12') or (python_full_version >= '3.12.1') 6 | shapely>=2 ; (python_version < '3.12') or (python_full_version >= '3.12.1') 7 | packaging 8 | # optional test dependencies 9 | pyarrow ; (python_version < '3.12') or (python_full_version >= '3.12.1') 10 | -------------------------------------------------------------------------------- /ci/vcpkg-manylinux2014.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "pyogrio", 3 | "version": "0.12.0", 4 | "dependencies": [ 5 | { 6 | "name": "libspatialite", 7 | "default-features": false 8 | }, 9 | { 10 | "name": "gdal", 11 | "default-features": false, 12 | "features": ["recommended-features", "curl", "geos", "iconv", "libspatialite", "openssl"] 13 | } 14 | ], 15 | "builtin-baseline": "da096fdc67db437bee863ae73c4c12e289f82789" 16 | } 17 | -------------------------------------------------------------------------------- /ci/vcpkg.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "pyogrio", 3 | "version": "0.12.0", 4 | "dependencies": [ 5 | { 6 | "name": "libspatialite", 7 | "default-features": false 8 | }, 9 | { 10 | "name": "gdal", 11 | "default-features": false, 12 | "features": [ 13 | "recommended-features", "curl", "geos", "iconv", "libkml", "libspatialite", "openssl" 14 | ] 15 | } 16 | ], 17 | "builtin-baseline": "da096fdc67db437bee863ae73c4c12e289f82789" 18 | } 19 | -------------------------------------------------------------------------------- /docs/source/api.rst: -------------------------------------------------------------------------------- 1 | API reference 2 | ============= 3 | 4 | Core 5 | ---- 6 | 7 | .. automodule:: pyogrio 8 | :members: list_drivers, detect_write_driver, list_layers, read_bounds, read_info, set_gdal_config_options, get_gdal_config_option, vsi_listtree, vsi_rmtree, vsi_unlink, __gdal_version__, __gdal_version_string__ 9 | 10 | GeoPandas integration 11 | --------------------- 12 | 13 | .. autofunction:: pyogrio.read_dataframe 14 | .. autofunction:: pyogrio.write_dataframe 15 | 16 | Arrow integration 17 | ----------------- 18 | 19 | .. autofunction:: pyogrio.read_arrow 20 | .. autofunction:: pyogrio.open_arrow 21 | .. autofunction:: pyogrio.write_arrow 22 | -------------------------------------------------------------------------------- /ci/vcpkg-custom-ports/zlib/0002-skip-building-examples.patch: -------------------------------------------------------------------------------- 1 | diff --git a/CMakeLists.txt b/CMakeLists.txt 2 | index ee25365..40a1936 100644 3 | --- a/CMakeLists.txt 4 | +++ b/CMakeLists.txt 5 | @@ -171,6 +171,7 @@ endif() 6 | # Example binaries 7 | #============================================================================ 8 | 9 | +if (0) 10 | add_executable(example test/example.c) 11 | target_link_libraries(example zlib) 12 | add_test(example example) 13 | @@ -188,3 +189,4 @@ if(HAVE_OFF64_T) 14 | target_link_libraries(minigzip64 zlib) 15 | set_target_properties(minigzip64 PROPERTIES COMPILE_FLAGS "-D_FILE_OFFSET_BITS=64") 16 | endif() 17 | +endif() 18 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = source 9 | BUILDDIR = build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /pyogrio/errors.py: -------------------------------------------------------------------------------- 1 | """Custom errors.""" 2 | 3 | 4 | class DataSourceError(RuntimeError): 5 | """Errors relating to opening or closing an OGRDataSource (with >= 1 layers).""" 6 | 7 | 8 | class DataLayerError(RuntimeError): 9 | """Errors relating to working with a single OGRLayer.""" 10 | 11 | 12 | class CRSError(DataLayerError): 13 | """Errors relating to getting or setting CRS values.""" 14 | 15 | 16 | class FeatureError(DataLayerError): 17 | """Errors related to reading or writing a feature.""" 18 | 19 | 20 | class GeometryError(DataLayerError): 21 | """Errors relating to getting or setting a geometry field.""" 22 | 23 | 24 | class FieldError(DataLayerError): 25 | """Errors relating to getting or setting a non-geometry field.""" 26 | -------------------------------------------------------------------------------- /ci/vcpkg-custom-ports/zlib/vcpkg-cmake-wrapper.cmake: -------------------------------------------------------------------------------- 1 | find_path(ZLIB_INCLUDE_DIR NAMES zlib.h PATHS "${_VCPKG_INSTALLED_DIR}/${VCPKG_TARGET_TRIPLET}/include" NO_DEFAULT_PATH) 2 | find_library(ZLIB_LIBRARY_RELEASE NAMES zlib z PATHS "${_VCPKG_INSTALLED_DIR}/${VCPKG_TARGET_TRIPLET}/lib" NO_DEFAULT_PATH) 3 | find_library(ZLIB_LIBRARY_DEBUG NAMES zlibd z PATHS "${_VCPKG_INSTALLED_DIR}/${VCPKG_TARGET_TRIPLET}/debug/lib" NO_DEFAULT_PATH) 4 | if(NOT ZLIB_INCLUDE_DIR OR NOT (ZLIB_LIBRARY_RELEASE OR ZLIB_LIBRARY_DEBUG)) 5 | message(FATAL_ERROR "Broken installation of vcpkg port zlib") 6 | endif() 7 | if(CMAKE_VERSION VERSION_LESS 3.4) 8 | include(SelectLibraryConfigurations) 9 | select_library_configurations(ZLIB) 10 | unset(ZLIB_FOUND) 11 | endif() 12 | _find_package(${ARGS}) 13 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=source 11 | set BUILDDIR=build 12 | 13 | if "%1" == "" goto help 14 | 15 | %SPHINXBUILD% >NUL 2>NUL 16 | if errorlevel 9009 ( 17 | echo. 18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 19 | echo.installed, then set the SPHINXBUILD environment variable to point 20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 21 | echo.may add the Sphinx directory to PATH. 22 | echo. 23 | echo.If you don't have Sphinx installed, grab it from 24 | echo.http://sphinx-doc.org/ 25 | exit /b 1 26 | ) 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /ci/vcpkg-custom-ports/zlib/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (C) 1995-2017 Jean-loup Gailly and Mark Adler 2 | 3 | This software is provided 'as-is', without any express or implied 4 | warranty. In no event will the authors be held liable for any damages 5 | arising from the use of this software. 6 | 7 | Permission is granted to anyone to use this software for any purpose, 8 | including commercial applications, and to alter it and redistribute it 9 | freely, subject to the following restrictions: 10 | 11 | 1. The origin of this software must not be misrepresented; you must not 12 | claim that you wrote the original software. If you use this software 13 | in a product, an acknowledgment in the product documentation would be 14 | appreciated but is not required. 15 | 2. Altered source versions must be plainly marked as such, and must not be 16 | misrepresented as being the original software. 17 | 3. This notice may not be removed or altered from any source distribution. 18 | 19 | Jean-loup Gailly Mark Adler 20 | jloup@gzip.org madler@alumni.caltech.edu -------------------------------------------------------------------------------- /docs/source/errors.md: -------------------------------------------------------------------------------- 1 | # Error handling 2 | 3 | Pyogrio tries to capture and wrap errors from GDAL/OGR where possible, but defers 4 | to error messages emitted by GDAL where available. The error types below are 5 | intended to help assist in determining the source of the error in case the 6 | error message is a bit cryptic. 7 | 8 | Some of the errors that may be emitted by pyogrio include: 9 | 10 | - `ValueError` / `TypeError`: indicates that a user-provided is invalid for a particular 11 | operation 12 | - `DataSourceError`: indicates an error opening or using a transaction against a data source 13 | - `DataLayerError`: indicates an error obtaining a data layer or its properties (subclassed by all of following) 14 | - `CRSError`: indicates an error reading or writing CRS information 15 | - `FeatureError`: indicates an error reading or writing a specific feature 16 | - `GeometryError`: indicates an error reading or writing a geometry field of a single feature 17 | - `FieldError`: indicates an error reading or writing a non-geometry field of a single feature 18 | 19 | All the pyogrio specific errors are subclasses of `RuntimeError`. 20 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020-2024 Brendan C. Ward and pyogrio contributors 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /benchmarks/README.md: -------------------------------------------------------------------------------- 1 | # Benchmarks 2 | 3 | 4 | ## Benchmark Data 5 | 6 | Test datasets are downloaded and placed into `tests/fixtures/datasets` (each gets its own folder): 7 | 8 | [Natural Earth](https://www.naturalearthdata.com/downloads/): 9 | 10 | - [Admin 0 (countries) at 1:110m](https://www.naturalearthdata.com/http//www.naturalearthdata.com/download/110m/cultural/ne_110m_admin_0_countries.zip) 11 | - [Admin 0 (countries at 1:10m)](https://www.naturalearthdata.com/http//www.naturalearthdata.com/download/10m/cultural/ne_10m_admin_0_countries.zip) 12 | - [Admin 1 (states / provinces) at 1:110m](https://www.naturalearthdata.com/http//www.naturalearthdata.com/download/110m/cultural/ne_110m_admin_1_states_provinces.zip) 13 | - [Admin 1 (states / provinces) at 1:10m](https://www.naturalearthdata.com/http//www.naturalearthdata.com/download/10m/cultural/ne_10m_admin_1_states_provinces.zip) 14 | 15 | Hydrography: 16 | 17 | - [Watershed boundaries](https://prd-tnm.s3.amazonaws.com/StagedProducts/Hydrography/WBD/HU2/GDB/WBD_17_HU2_GDB.zip) 18 | - [Flowlines, waterbodies, etc](https://prd-tnm.s3.amazonaws.com/StagedProducts/Hydrography/NHDPlusHR/Beta/GDB/NHDPLUS_H_1704_HU4_GDB.zip) 19 | -------------------------------------------------------------------------------- /ci/vcpkg-custom-ports/zlib/portfile.cmake: -------------------------------------------------------------------------------- 1 | set(VERSION 1.2.5.2) 2 | 3 | vcpkg_download_distfile(ARCHIVE_FILE 4 | URLS "http://zlib.net/fossils/zlib-1.2.5.2.tar.gz" 5 | FILENAME "zlib-1.2.5.2.tar.gz" 6 | SHA512 d4bd29ebfd5642253cecb9b8364ee6de87442d192229a9080cc306b819745e80c0791bd0a8abefd0c5e11c958bc85485d5d5d051b4770e45f6f479f3bb16e867 7 | ) 8 | 9 | vcpkg_extract_source_archive_ex( 10 | OUT_SOURCE_PATH SOURCE_PATH 11 | ARCHIVE ${ARCHIVE_FILE} 12 | REF ${VERSION} 13 | PATCHES 14 | "0002-skip-building-examples.patch" 15 | ) 16 | 17 | # This is generated during the cmake build 18 | file(REMOVE ${SOURCE_PATH}/zconf.h) 19 | 20 | vcpkg_configure_cmake( 21 | SOURCE_PATH ${SOURCE_PATH} 22 | PREFER_NINJA 23 | OPTIONS 24 | -DSKIP_INSTALL_FILES=ON 25 | OPTIONS_DEBUG 26 | -DSKIP_INSTALL_HEADERS=ON 27 | ) 28 | 29 | vcpkg_install_cmake() 30 | file(INSTALL ${CMAKE_CURRENT_LIST_DIR}/vcpkg-cmake-wrapper.cmake DESTINATION ${CURRENT_PACKAGES_DIR}/share/${PORT}) 31 | 32 | vcpkg_fixup_pkgconfig() 33 | 34 | file(INSTALL ${CMAKE_CURRENT_LIST_DIR}/LICENSE DESTINATION ${CURRENT_PACKAGES_DIR}/share/${PORT} RENAME copyright) 35 | 36 | vcpkg_copy_pdbs() 37 | 38 | file(COPY ${CMAKE_CURRENT_LIST_DIR}/usage DESTINATION ${CURRENT_PACKAGES_DIR}/share/${PORT}) 39 | -------------------------------------------------------------------------------- /benchmarks/conftest.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import pytest 4 | 5 | 6 | data_dir = Path(__file__).parent.resolve() / "fixtures" 7 | 8 | 9 | @pytest.fixture(scope="session") 10 | def naturalearth_lowres(): 11 | return data_dir / "ne_110m_admin_0_countries/ne_110m_admin_0_countries.shp" 12 | 13 | 14 | @pytest.fixture(scope="session") 15 | def naturalearth_modres(): 16 | return data_dir / "ne_10m_admin_0_countries/ne_10m_admin_0_countries.shp" 17 | 18 | 19 | @pytest.fixture(scope="session") 20 | def naturalearth_modres_vsi(): 21 | path = data_dir / "ne_10m_admin_0_countries.zip/ne_10m_admin_0_countries.shp" 22 | return f"/vsizip/{path}" 23 | 24 | 25 | @pytest.fixture(scope="session") 26 | def naturalearth_lowres1(): 27 | return ( 28 | data_dir 29 | / "ne_110m_admin_1_states_provinces/ne_110m_admin_1_states_provinces.shp" 30 | ) 31 | 32 | 33 | @pytest.fixture(scope="session") 34 | def naturalearth_modres1(): 35 | return ( 36 | data_dir / "ne_10m_admin_1_states_provinces/ne_10m_admin_1_states_provinces.shp" 37 | ) 38 | 39 | 40 | @pytest.fixture(scope="session") 41 | def nhd_wbd(): 42 | return data_dir / "WBD_17_HU2_GDB/WBD_17_HU2_GDB.gdb" 43 | 44 | 45 | @pytest.fixture(scope="session") 46 | def nhd_hr(): 47 | return data_dir / "NHDPLUS_H_1704_HU4_GDB/NHDPLUS_H_1704_HU4_GDB.gdb" 48 | -------------------------------------------------------------------------------- /docs/source/index.md: -------------------------------------------------------------------------------- 1 | # pyogrio - bulk-oriented spatial vector file I/O using GDAL/OGR 2 | 3 | Pyogrio provides fast, bulk-oriented read and write access to 4 | [GDAL/OGR](https://gdal.org/en/latest/drivers/vector/index.html) vector data 5 | sources, such as ESRI Shapefile, GeoPackage, GeoJSON, and several others. 6 | Vector data sources typically have geometries, such as points, lines, or 7 | polygons, and associated records with potentially many columns worth of data. 8 | 9 | The typical use is to read or write these data sources to/from 10 | [GeoPandas](https://github.com/geopandas/geopandas) `GeoDataFrames`. Because 11 | the geometry column is optional, reading or writing only non-spatial data is 12 | also possible. Hence, GeoPackage attribute tables, DBF files, or CSV files are 13 | also supported. 14 | 15 | Pyogrio is fast because it uses pre-compiled bindings for GDAL/OGR to read and 16 | write the data records in bulk. This approach avoids multiple steps of 17 | converting to and from Python data types within Python, so performance becomes 18 | primarily limited by the underlying I/O speed of data source drivers in 19 | GDAL/OGR. 20 | 21 | We have seen \>5-10x speedups reading files and \>5-20x speedups writing files 22 | compared to using row-per-row approaches (e.g. Fiona). 23 | 24 | ```{toctree} 25 | --- 26 | maxdepth: 2 27 | caption: Contents 28 | --- 29 | 30 | about 31 | concepts 32 | supported_formats 33 | install 34 | introduction 35 | api 36 | errors 37 | known_issues 38 | ``` 39 | -------------------------------------------------------------------------------- /ci/manylinux_2_28_x86_64-vcpkg-gdal.Dockerfile: -------------------------------------------------------------------------------- 1 | FROM quay.io/pypa/manylinux_2_28_x86_64:2025.09.19-1 2 | 3 | # building openssl needs IPC-Cmd (https://github.com/microsoft/vcpkg/issues/24988) 4 | RUN dnf -y install curl zip unzip tar ninja-build perl-IPC-Cmd 5 | 6 | RUN git clone https://github.com/Microsoft/vcpkg.git /opt/vcpkg && \ 7 | git -C /opt/vcpkg checkout da096fdc67db437bee863ae73c4c12e289f82789 8 | 9 | ENV VCPKG_INSTALLATION_ROOT="/opt/vcpkg" 10 | ENV PATH="${PATH}:/opt/vcpkg" 11 | 12 | ENV VCPKG_DEFAULT_TRIPLET="x64-linux-dynamic-release" 13 | 14 | # mkdir & touch -> workaround for https://github.com/microsoft/vcpkg/issues/27786 15 | RUN bootstrap-vcpkg.sh && \ 16 | mkdir -p /root/.vcpkg/ $HOME/.vcpkg && \ 17 | touch /root/.vcpkg/vcpkg.path.txt $HOME/.vcpkg/vcpkg.path.txt && \ 18 | vcpkg integrate install && \ 19 | vcpkg integrate bash 20 | 21 | COPY ci/custom-triplets/x64-linux-dynamic-release.cmake opt/vcpkg/custom-triplets/x64-linux-dynamic-release.cmake 22 | COPY ci/vcpkg.json opt/vcpkg/ 23 | 24 | ENV LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:/opt/vcpkg/installed/x64-linux-dynamic-release/lib" 25 | RUN vcpkg install --overlay-triplets=opt/vcpkg/custom-triplets \ 26 | --feature-flags="versions,manifests" \ 27 | --x-manifest-root=opt/vcpkg \ 28 | --x-install-root=opt/vcpkg/installed && \ 29 | vcpkg list 30 | 31 | # setting git safe directory is required for properly building wheels when 32 | # git >= 2.35.3 33 | RUN git config --global --add safe.directory "*" 34 | -------------------------------------------------------------------------------- /pyogrio/__init__.py: -------------------------------------------------------------------------------- 1 | """Vectorized vector I/O using OGR.""" 2 | 3 | try: 4 | # we try importing shapely, to ensure it is imported (and it can load its 5 | # own GEOS copy) before we load GDAL and its linked GEOS 6 | import shapely 7 | 8 | if shapely.__version__ < "2.0.0": 9 | import shapely.geos 10 | except Exception: 11 | pass 12 | 13 | from pyogrio._version import get_versions 14 | from pyogrio.core import ( 15 | __gdal_geos_version__, 16 | __gdal_version__, 17 | __gdal_version_string__, 18 | detect_write_driver, 19 | get_gdal_config_option, 20 | get_gdal_data_path, 21 | list_drivers, 22 | list_layers, 23 | read_bounds, 24 | read_info, 25 | set_gdal_config_options, 26 | vsi_listtree, 27 | vsi_rmtree, 28 | vsi_unlink, 29 | ) 30 | from pyogrio.geopandas import read_dataframe, write_dataframe 31 | from pyogrio.raw import open_arrow, read_arrow, write_arrow 32 | 33 | __version__ = get_versions()["version"] 34 | del get_versions 35 | 36 | __all__ = [ 37 | "__gdal_geos_version__", 38 | "__gdal_version__", 39 | "__gdal_version_string__", 40 | "__version__", 41 | "detect_write_driver", 42 | "get_gdal_config_option", 43 | "get_gdal_data_path", 44 | "list_drivers", 45 | "list_layers", 46 | "open_arrow", 47 | "read_arrow", 48 | "read_bounds", 49 | "read_dataframe", 50 | "read_info", 51 | "set_gdal_config_options", 52 | "vsi_listtree", 53 | "vsi_rmtree", 54 | "vsi_unlink", 55 | "write_arrow", 56 | "write_dataframe", 57 | ] 58 | -------------------------------------------------------------------------------- /docs/source/concepts.md: -------------------------------------------------------------------------------- 1 | # Concepts and Terminology 2 | 3 | ## GDAL / OGR 4 | 5 | Pyogrio is powered by [GDAL/OGR](https://gdal.org/index.html). OGR is the part 6 | of the GDAL library that specifically provides interoperability with vector data 7 | sources. Vector data sources are those that contain geometries (points, lines, 8 | or polygons) and associated columns of data. 9 | 10 | We refer to GDAL / OGR interchangeably throughout the documentation. 11 | 12 | ## OGR vector data source 13 | 14 | An OGR vector data source is a container file format, it may contain one or 15 | several spatial and / or nonspatial data layers or tables depending on its type. 16 | 17 | For example, a GeoPackage may contain several spatial data layers. In contrast, 18 | an ESRI Shapefile always consists of a single data layer. 19 | 20 | ## OGR vector data layer 21 | 22 | An OGR vector data layer is a single entity within a vector data source, and may 23 | have 0 or more records and may or may not include a geometry column, depending 24 | on the data layer type. 25 | 26 | ## OGR vector driver 27 | 28 | An OGR vector driver is implemented directly within the GDAl / OGR library, and 29 | is what ultimately provides the ability to read or write a specific vector data 30 | source format. GDAL is typically distributed with drivers enabled for some of 31 | the most common vector formats, whereas others are opt-in and included only 32 | within specific distributions of GDAL or if you compile it yourself. 33 | 34 | See the [list of drivers](https://gdal.org/drivers/vector/index.html) for 35 | more information. 36 | -------------------------------------------------------------------------------- /docs/source/supported_formats.md: -------------------------------------------------------------------------------- 1 | # Supported vector formats 2 | 3 | Support for reading and writing spatial data ultimately depends on what is 4 | available in your particular distribution of GDAL. GDAL supports reading from 5 | a wide number of vector file formats, and writing for a much smaller number. 6 | 7 | Please see the [list of drivers](https://gdal.org/drivers/vector/index.html) for 8 | more information. 9 | 10 | ## Full read and write support 11 | 12 | - [ESRI Shapefile](https://gdal.org/drivers/vector/shapefile.html) 13 | - [GeoPackage](https://gdal.org/drivers/vector/gpkg.html) 14 | - [GeoJSON](https://gdal.org/drivers/vector/geojson.html) / [GeoJSONSeq](https://gdal.org/drivers/vector/geojsonseq.html) 15 | - [FlatGeobuf](https://gdal.org/drivers/vector/flatgeobuf.html) (requires GDAL >= 3.1) 16 | 17 | ## Read support 18 | 19 | - [ESRI FileGDB (via OpenFileGDB)](https://gdal.org/drivers/vector/openfilegdb.html#vector-openfilegdb) 20 | - above formats using the [Virtual File System](https://gdal.org/user/virtual_file_systems.html#virtual-file-systems), which supports zipped data sources and directories 21 | 22 | ## Support for other formats 23 | 24 | Other vector formats that are registered within your particular installation of 25 | GDAL may be supported. Please be aware that these likely have not been tested 26 | for compatibility with Pyogrio and you may encounter specific issues with these 27 | formats and / or their constituent geometry or field data types. 28 | 29 | We are unlikely to support obscure, rarely-used, proprietary vector formats, 30 | especially if they require advanced GDAL installation procedures. 31 | -------------------------------------------------------------------------------- /ci/manylinux_2_28_aarch64-vcpkg-gdal.Dockerfile: -------------------------------------------------------------------------------- 1 | FROM quay.io/pypa/manylinux_2_28_aarch64:2025.09.19-1 2 | 3 | # building openssl needs IPC-Cmd (https://github.com/microsoft/vcpkg/issues/24988) 4 | RUN dnf -y install curl zip unzip tar ninja-build perl-IPC-Cmd 5 | 6 | RUN git clone https://github.com/Microsoft/vcpkg.git /opt/vcpkg && \ 7 | git -C /opt/vcpkg checkout da096fdc67db437bee863ae73c4c12e289f82789 8 | 9 | ENV VCPKG_INSTALLATION_ROOT="/opt/vcpkg" 10 | ENV PATH="${PATH}:/opt/vcpkg" 11 | 12 | ENV VCPKG_DEFAULT_TRIPLET="arm64-linux-dynamic-release" 13 | # pkgconf fails to build with default debug mode of arm64-linux host 14 | ENV VCPKG_DEFAULT_HOST_TRIPLET="arm64-linux-release" 15 | 16 | # Must be set when building on arm 17 | ENV VCPKG_FORCE_SYSTEM_BINARIES=1 18 | 19 | # mkdir & touch -> workaround for https://github.com/microsoft/vcpkg/issues/27786 20 | RUN bootstrap-vcpkg.sh && \ 21 | mkdir -p /root/.vcpkg/ $HOME/.vcpkg && \ 22 | touch /root/.vcpkg/vcpkg.path.txt $HOME/.vcpkg/vcpkg.path.txt && \ 23 | vcpkg integrate install && \ 24 | vcpkg integrate bash 25 | 26 | COPY ci/custom-triplets/arm64-linux-dynamic-release.cmake opt/vcpkg/custom-triplets/arm64-linux-dynamic-release.cmake 27 | COPY ci/vcpkg.json opt/vcpkg/ 28 | 29 | ENV LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:/opt/vcpkg/installed/arm64-linux-dynamic-release/lib" 30 | RUN vcpkg install --overlay-triplets=opt/vcpkg/custom-triplets \ 31 | --feature-flags="versions,manifests" \ 32 | --x-manifest-root=opt/vcpkg \ 33 | --x-install-root=opt/vcpkg/installed && \ 34 | vcpkg list 35 | 36 | # setting git safe directory is required for properly building wheels when 37 | # git >= 2.35.3 38 | RUN git config --global --add safe.directory "*" 39 | -------------------------------------------------------------------------------- /pyogrio/_compat.py: -------------------------------------------------------------------------------- 1 | from packaging.version import Version 2 | 3 | from pyogrio.core import __gdal_geos_version__, __gdal_version__ 4 | 5 | # detect optional dependencies 6 | try: 7 | import pyarrow 8 | except ImportError: 9 | pyarrow = None 10 | 11 | try: 12 | import pyproj 13 | except ImportError: 14 | pyproj = None 15 | 16 | try: 17 | import shapely 18 | except ImportError: 19 | shapely = None 20 | 21 | try: 22 | import geopandas 23 | except ImportError: 24 | geopandas = None 25 | 26 | try: 27 | import pandas 28 | except ImportError: 29 | pandas = None 30 | 31 | 32 | HAS_ARROW_WRITE_API = __gdal_version__ >= (3, 8, 0) 33 | HAS_PYARROW = pyarrow is not None 34 | HAS_PYPROJ = pyproj is not None 35 | PYARROW_GE_19 = pyarrow is not None and Version(pyarrow.__version__) >= Version( 36 | "19.0.0" 37 | ) 38 | 39 | HAS_GEOPANDAS = geopandas is not None 40 | 41 | PANDAS_GE_15 = pandas is not None and Version(pandas.__version__) >= Version("1.5.0") 42 | PANDAS_GE_20 = pandas is not None and Version(pandas.__version__) >= Version("2.0.0") 43 | PANDAS_GE_22 = pandas is not None and Version(pandas.__version__) >= Version("2.2.0") 44 | PANDAS_GE_23 = pandas is not None and Version(pandas.__version__) >= Version("2.3.0") 45 | PANDAS_GE_30 = pandas is not None and Version(pandas.__version__) >= Version("3.0.0dev") 46 | 47 | GDAL_GE_37 = __gdal_version__ >= (3, 7, 0) 48 | GDAL_GE_38 = __gdal_version__ >= (3, 8, 0) 49 | GDAL_GE_311 = __gdal_version__ >= (3, 11, 0) 50 | 51 | HAS_GDAL_GEOS = __gdal_geos_version__ is not None 52 | 53 | HAS_SHAPELY = shapely is not None and Version(shapely.__version__) >= Version("2.0.0") 54 | SHAPELY_GE_21 = shapely is not None and Version(shapely.__version__) >= Version("2.1.0") 55 | -------------------------------------------------------------------------------- /pyogrio/_env.py: -------------------------------------------------------------------------------- 1 | # With Python >= 3.8 on Windows directories in PATH are not automatically 2 | # searched for DLL dependencies and must be added manually with 3 | # os.add_dll_directory. 4 | # adapted from Fiona: https://github.com/Toblerity/Fiona/pull/875 5 | 6 | 7 | import logging 8 | import os 9 | import platform 10 | from contextlib import contextmanager 11 | from pathlib import Path 12 | 13 | log = logging.getLogger(__name__) 14 | log.addHandler(logging.NullHandler()) 15 | 16 | 17 | try: 18 | # set GDAL_CURL_CA_BUNDLE / PROJ_CURL_CA_BUNDLE for GDAL >= 3.2 19 | import certifi 20 | 21 | ca_bundle = certifi.where() 22 | os.environ.setdefault("GDAL_CURL_CA_BUNDLE", ca_bundle) 23 | os.environ.setdefault("PROJ_CURL_CA_BUNDLE", ca_bundle) 24 | except ImportError: 25 | pass 26 | 27 | 28 | gdal_dll_dir = None 29 | 30 | if platform.system() == "Windows": 31 | # if loading of extension modules fails, search for gdal dll directory 32 | try: 33 | import pyogrio._io # noqa: F401 34 | 35 | except ImportError: 36 | for path in os.getenv("PATH", "").split(os.pathsep): 37 | if list(Path(path).glob("gdal*.dll")): 38 | log.info(f"Found GDAL at {path}") 39 | gdal_dll_dir = path 40 | break 41 | 42 | if not gdal_dll_dir: 43 | raise ImportError( 44 | "GDAL DLL could not be found. It must be on the system PATH." 45 | ) 46 | 47 | 48 | @contextmanager 49 | def GDALEnv(): 50 | dll_dir = None 51 | 52 | if gdal_dll_dir: 53 | dll_dir = os.add_dll_directory(gdal_dll_dir) 54 | 55 | try: 56 | yield None 57 | finally: 58 | if dll_dir is not None: 59 | dll_dir.close() 60 | -------------------------------------------------------------------------------- /ci/manylinux2014_x86_64-vcpkg-gdal.Dockerfile: -------------------------------------------------------------------------------- 1 | FROM quay.io/pypa/manylinux2014_x86_64:2025.09.19-1 2 | 3 | # building openssl needs IPC-Cmd (https://github.com/microsoft/vcpkg/issues/24988) 4 | RUN yum install -y curl unzip zip tar perl-IPC-Cmd 5 | 6 | # require python >= 3.7 (python 3.6 is default on base image) for meson 7 | RUN ln -s /opt/python/cp38-cp38/bin/python3 /usr/bin/python3 8 | 9 | RUN git clone https://github.com/Microsoft/vcpkg.git /opt/vcpkg && \ 10 | git -C /opt/vcpkg checkout da096fdc67db437bee863ae73c4c12e289f82789 11 | 12 | ENV VCPKG_INSTALLATION_ROOT="/opt/vcpkg" 13 | ENV PATH="${PATH}:/opt/vcpkg" 14 | 15 | ENV VCPKG_DEFAULT_TRIPLET="x64-linux-dynamic-release" 16 | 17 | # mkdir & touch -> workaround for https://github.com/microsoft/vcpkg/issues/27786 18 | RUN bootstrap-vcpkg.sh && \ 19 | mkdir -p /root/.vcpkg/ $HOME/.vcpkg && \ 20 | touch /root/.vcpkg/vcpkg.path.txt $HOME/.vcpkg/vcpkg.path.txt && \ 21 | vcpkg integrate install && \ 22 | vcpkg integrate bash 23 | 24 | COPY ci/custom-triplets/x64-linux-dynamic-release.cmake opt/vcpkg/custom-triplets/x64-linux-dynamic-release.cmake 25 | COPY ci/vcpkg-custom-ports/ opt/vcpkg/custom-ports/ 26 | COPY ci/vcpkg-manylinux2014.json opt/vcpkg/vcpkg.json 27 | 28 | ENV LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:/opt/vcpkg/installed/x64-linux-dynamic-release/lib" 29 | RUN vcpkg install --overlay-triplets=opt/vcpkg/custom-triplets \ 30 | --overlay-ports=opt/vcpkg/custom-ports \ 31 | --feature-flags="versions,manifests" \ 32 | --x-manifest-root=opt/vcpkg \ 33 | --x-install-root=opt/vcpkg/installed && \ 34 | vcpkg list 35 | 36 | # setting git safe directory is required for properly building wheels when 37 | # git >= 2.35.3 38 | RUN git config --global --add safe.directory "*" 39 | -------------------------------------------------------------------------------- /docs/source/about.md: -------------------------------------------------------------------------------- 1 | # About 2 | 3 | ## How it works 4 | 5 | Internally, Pyogrio uses a numpy-oriented approach in Cython to read 6 | information about data sources and records from spatial data layers. Geometries 7 | are extracted from the data layer as Well-Known Binary (WKB) objects and fields 8 | (attributes) are read into numpy arrays of the appropriate data type. These are 9 | then converted to GeoPandas `GeoDataFrame`s. 10 | 11 | All records are read into memory, which may be problematic for very large data 12 | sources. You can use `skip_features` / `max_features` to read smaller parts of 13 | the file at a time. 14 | 15 | The entire `GeoDataFrame` is written at once. Incremental writes or appends to 16 | existing data sources are not supported. 17 | 18 | ## Comparison to Fiona 19 | 20 | [Fiona](https://github.com/Toblerity/Fiona) is a full-featured Python library 21 | for working with OGR vector data sources. It is **awesome**, has highly-dedicated 22 | maintainers and contributors, and exposes more functionality than Pyogrio ever will. 23 | This project would not be possible without Fiona having come first. 24 | 25 | Pyogrio uses a bulk-oriented approach for reading and writing 26 | spatial vector file formats, which enables faster I/O operations. It borrows 27 | from the internal mechanics and lessons learned of Fiona. It uses a stateless 28 | approach to reading or writing data; all data are read or written in a single 29 | pass. 30 | 31 | `Fiona` is a general-purpose spatial format I/O library that is used within many 32 | projects in the Python ecosystem. In contrast, Pyogrio specifically targets 33 | GeoPandas in order to reduce the number of data transformations currently 34 | required to read and write data between GeoPandas `GeoDataFrame`s and OGR data 35 | sources using Fiona (the current default in GeoPandas). 36 | -------------------------------------------------------------------------------- /pyogrio/tests/test_util.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | from pyogrio import vsi_listtree, vsi_unlink 4 | from pyogrio.raw import read, write 5 | from pyogrio.util import vsimem_rmtree_toplevel 6 | 7 | import pytest 8 | 9 | 10 | def test_vsimem_rmtree_toplevel(naturalearth_lowres): 11 | # Prepare test data in /vsimem/ 12 | meta, _, geometry, field_data = read(naturalearth_lowres) 13 | meta["spatial_index"] = False 14 | meta["geometry_type"] = "MultiPolygon" 15 | test_dir_path = Path(f"/vsimem/test/{naturalearth_lowres.stem}.gpkg") 16 | test_dir2_path = Path(f"/vsimem/test2/test2/{naturalearth_lowres.stem}.gpkg") 17 | 18 | write(test_dir_path, geometry, field_data, **meta) 19 | write(test_dir2_path, geometry, field_data, **meta) 20 | 21 | # Check if everything was created properly with listtree 22 | files = vsi_listtree("/vsimem/") 23 | assert test_dir_path.as_posix() in files 24 | assert test_dir2_path.as_posix() in files 25 | 26 | # Test deleting parent dir of file in single directory 27 | vsimem_rmtree_toplevel(test_dir_path) 28 | files = vsi_listtree("/vsimem/") 29 | assert test_dir_path.parent.as_posix() not in files 30 | assert test_dir2_path.as_posix() in files 31 | 32 | # Test deleting top-level dir of file in a subdirectory 33 | vsimem_rmtree_toplevel(test_dir2_path) 34 | assert test_dir2_path.as_posix() not in vsi_listtree("/vsimem/") 35 | 36 | 37 | def test_vsimem_rmtree_toplevel_error(naturalearth_lowres): 38 | # Prepare test data in /vsimem 39 | meta, _, geometry, field_data = read(naturalearth_lowres) 40 | meta["spatial_index"] = False 41 | meta["geometry_type"] = "MultiPolygon" 42 | test_file_path = Path(f"/vsimem/pyogrio_test_{naturalearth_lowres.stem}.gpkg") 43 | 44 | write(test_file_path, geometry, field_data, **meta) 45 | assert test_file_path.as_posix() in vsi_listtree("/vsimem/") 46 | 47 | # Deleting parent dir of non-existent file should raise an error. 48 | with pytest.raises(FileNotFoundError, match="Path does not exist"): 49 | vsimem_rmtree_toplevel("/vsimem/test/non-existent.gpkg") 50 | 51 | # File should still be there 52 | assert test_file_path.as_posix() in vsi_listtree("/vsimem/") 53 | 54 | # Cleanup. 55 | vsi_unlink(test_file_path) 56 | assert test_file_path not in vsi_listtree("/vsimem/") 57 | -------------------------------------------------------------------------------- /.github/workflows/tests-conda.yml: -------------------------------------------------------------------------------- 1 | name: Conda Tests 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | pull_request: 8 | workflow_dispatch: 9 | 10 | # cancel running jobs on new commit to PR 11 | concurrency: 12 | group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} 13 | cancel-in-progress: true 14 | 15 | jobs: 16 | test: 17 | name: ${{ matrix.os }}, python ${{ matrix.python }}, ${{ matrix.env }} 18 | runs-on: ${{ matrix.os }} 19 | defaults: 20 | run: 21 | shell: bash -l {0} 22 | strategy: 23 | fail-fast: false 24 | matrix: 25 | os: [ubuntu-latest, macos-latest, windows-2022] 26 | python: ["3.10", "3.11", "3.12", "3.13", "3.14"] 27 | env: ["latest"] 28 | include: 29 | # environment with lower versions of optional dependencies 30 | - python: "3.10" 31 | extra: >- 32 | pandas=1.5 33 | geopandas=0.12 34 | # minimal environment without optional dependencies 35 | - os: "ubuntu-latest" 36 | python: "3.10" 37 | env: "minimal" 38 | # environment with nightly wheels 39 | - os: "ubuntu-latest" 40 | python: "3.11" 41 | env: "nightly-deps" 42 | pandas_future_infer_string: "1" 43 | 44 | steps: 45 | - name: Checkout repo 46 | uses: actions/checkout@v6 47 | 48 | - name: Install Conda environment with Micromamba 49 | uses: mamba-org/setup-micromamba@v2 50 | with: 51 | environment-file: ci/envs/${{ matrix.env }}.yml 52 | create-args: >- 53 | python=${{ matrix.python }} 54 | ${{ matrix.extra }} 55 | 56 | - name: Set environment variables (Windows) 57 | if: runner.os == 'Windows' 58 | run: | 59 | echo "GDAL_INCLUDE_PATH=$MAMBA_ROOT_PREFIX/envs/test/Library/include." >> $GITHUB_ENV 60 | echo "GDAL_LIBRARY_PATH=$MAMBA_ROOT_PREFIX/envs/test/Library/lib" >> $GITHUB_ENV 61 | echo "GDAL_VERSION=$(gdalinfo --version | awk '{print $2}' | tr -d ",")" >> $GITHUB_ENV 62 | 63 | - name: Install pyogrio 64 | run: pip install -e . 65 | 66 | - name: Test 67 | env: 68 | PANDAS_FUTURE_INFER_STRING: ${{ matrix.pandas_future_infer_string || '0' }} 69 | run: | 70 | pytest -v --color=yes -r s pyogrio/tests 71 | -------------------------------------------------------------------------------- /docs/source/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # This file only contains a selection of the most common options. For a full 4 | # list see the documentation: 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 6 | 7 | # -- Path setup -------------------------------------------------------------- 8 | 9 | # If extensions (or modules to document with autodoc) are in another directory, 10 | # add these directories to sys.path here. If the directory is relative to the 11 | # documentation root, use os.path.abspath to make it absolute, like shown here. 12 | # 13 | # import os 14 | # import sys 15 | # sys.path.insert(0, os.path.abspath('.')) 16 | import sphinx_rtd_theme 17 | from pyogrio import __version__ 18 | 19 | 20 | autodoc_mock_imports = [ 21 | "geopandas", 22 | ] 23 | 24 | # -- Project information ----------------------------------------------------- 25 | 26 | project = "pyogrio" 27 | copyright = "2020-2021 Brendan C. Ward and pyogrio contributors" 28 | author = "Brendan C. Ward and pyogrio contributors" 29 | 30 | # The full version, including alpha/beta/rc tags 31 | release = __version__ 32 | 33 | 34 | # -- General configuration --------------------------------------------------- 35 | 36 | # Add any Sphinx extension module names here, as strings. They can be 37 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 38 | # ones. 39 | extensions = [ 40 | "sphinx.ext.autodoc", 41 | "numpydoc", 42 | "sphinx.ext.autosummary", 43 | "sphinx_rtd_theme", 44 | "myst_parser", 45 | ] 46 | 47 | # Add any paths that contain templates here, relative to this directory. 48 | templates_path = ["_templates"] 49 | 50 | # List of patterns, relative to source directory, that match files and 51 | # directories to ignore when looking for source files. 52 | # This pattern also affects html_static_path and html_extra_path. 53 | exclude_patterns = [] 54 | 55 | 56 | # -- Options for HTML output ------------------------------------------------- 57 | 58 | # The theme to use for HTML and HTML Help pages. See the documentation for 59 | # a list of builtin themes. 60 | # 61 | html_theme = "sphinx_rtd_theme" 62 | 63 | # Add any paths that contain custom static files (such as style sheets) here, 64 | # relative to this directory. They are copied after the builtin static files, 65 | # so a file named "default.css" will overwrite the builtin "default.css". 66 | html_static_path = ["_static"] 67 | -------------------------------------------------------------------------------- /.github/workflows/docker-gdal.yml: -------------------------------------------------------------------------------- 1 | name: Docker GDAL Test 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | pull_request: 8 | workflow_dispatch: 9 | 10 | # cancel running jobs on new commit to PR 11 | concurrency: 12 | group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} 13 | cancel-in-progress: true 14 | 15 | jobs: 16 | TestLinux: 17 | name: GDAL ${{ matrix.container }} 18 | runs-on: ubuntu-latest 19 | strategy: 20 | fail-fast: false 21 | matrix: 22 | container: 23 | - "ghcr.io/osgeo/gdal:ubuntu-small-latest" # >= python 3.12.3 24 | - "ghcr.io/osgeo/gdal:ubuntu-small-3.10.0" # python 3.12.3 25 | - "ghcr.io/osgeo/gdal:ubuntu-small-3.9.2" # python 3.12.3 26 | - "ghcr.io/osgeo/gdal:ubuntu-small-3.8.5" # python 3.10.12 27 | - "ghcr.io/osgeo/gdal:ubuntu-small-3.7.3" # python 3.10.12 28 | - "ghcr.io/osgeo/gdal:ubuntu-small-3.6.4" # python 3.10.6 29 | 30 | container: 31 | image: ${{ matrix.container }} 32 | 33 | steps: 34 | - name: Install packages 35 | run: | 36 | apt-get update && apt-get install -y build-essential git python3-dev 37 | 38 | # - name: Install Python 39 | # # the GDAL 3.4 and 3.5 images do have Python 3.8 installed, so have to 40 | # # install a more recent Python version manually 41 | # if: matrix.container == 'osgeo/gdal:ubuntu-small-3.5.3' || matrix.container == 'osgeo/gdal:ubuntu-small-3.4.3' 42 | # run: | 43 | # apt-get update && apt-get install -y software-properties-common 44 | # add-apt-repository -y ppa:deadsnakes/ppa 45 | # apt-get update && apt-get install -y python3.9-dev 46 | 47 | - uses: actions/checkout@v6 48 | 49 | - name: Install uv 50 | uses: astral-sh/setup-uv@v7 51 | 52 | - name: Create virtual environment 53 | # use uv to create a virtual environment, then add it to environment 54 | # variables so that it is automatically activated and can be used for 55 | # tests below 56 | run: | 57 | uv venv .venv 58 | echo "VIRTUAL_ENV=.venv" >> $GITHUB_ENV 59 | echo "$PWD/.venv/bin" >> $GITHUB_PATH 60 | 61 | - name: Install Python Dependencies 62 | run: | 63 | uv pip install -e .[dev,test,geopandas] 64 | uv pip install pyarrow 65 | 66 | - name: Test with pytest 67 | run: | 68 | pytest --cov=pyogrio --cov-report term-missing pyogrio/tests 69 | -------------------------------------------------------------------------------- /benchmarks/test_core_benchmarks.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import fiona 4 | import pytest 5 | 6 | from pyogrio import list_layers, read_bounds, read_info 7 | 8 | 9 | def fiona_read_info(path, layer=None): 10 | """Read basic info for an OGR data source using Fiona. 11 | 12 | NOTE: the information returned by Fiona is different, so this 13 | isn't entirely a fair comparison. 14 | """ 15 | with fiona.open(path, layer=layer) as src: 16 | src.meta 17 | 18 | 19 | @pytest.mark.benchmark(group="list-layers-single-lowres") 20 | def test_list_layers_lowres(naturalearth_lowres, benchmark): 21 | benchmark(list_layers, naturalearth_lowres) 22 | 23 | 24 | @pytest.mark.benchmark(group="list-layers-single-lowres") 25 | def test_list_layers_fiona_lowres(naturalearth_lowres, benchmark): 26 | benchmark(fiona.listlayers, naturalearth_lowres) 27 | 28 | 29 | @pytest.mark.benchmark(group="list-layers-single-modres") 30 | def test_list_layers_modres(naturalearth_modres, benchmark): 31 | benchmark(list_layers, naturalearth_modres) 32 | 33 | 34 | @pytest.mark.benchmark(group="list-layers-single-modres") 35 | def test_list_layers_fiona_modres(naturalearth_modres, benchmark): 36 | benchmark(fiona.listlayers, naturalearth_modres) 37 | 38 | 39 | @pytest.mark.benchmark(group="list-layers-nhd-hr") 40 | def test_list_layers_multi(nhd_hr, benchmark): 41 | benchmark(list_layers, nhd_hr) 42 | 43 | 44 | @pytest.mark.benchmark(group="list-layers-nhd-hr") 45 | def test_list_layers_fiona_multi(nhd_hr, benchmark): 46 | benchmark(fiona.listlayers, nhd_hr) 47 | 48 | 49 | @pytest.mark.benchmark(group="read-bounds-lowres") 50 | def test_read_bounds_lowres(naturalearth_lowres, benchmark): 51 | benchmark(read_bounds, naturalearth_lowres) 52 | 53 | 54 | @pytest.mark.benchmark(group="read-bounds-modres") 55 | def test_read_bounds_modres(naturalearth_modres, benchmark): 56 | benchmark(read_bounds, naturalearth_modres) 57 | 58 | 59 | @pytest.mark.benchmark(group="read-bounds-nhd-hr") 60 | def test_read_bounds_nhd_hr(nhd_hr, benchmark): 61 | benchmark(read_bounds, nhd_hr, layer="NHDFlowline") 62 | 63 | 64 | @pytest.mark.benchmark(group="read-info-lowres") 65 | def test_read_info_lowres(naturalearth_lowres, benchmark): 66 | benchmark(read_info, naturalearth_lowres) 67 | 68 | 69 | @pytest.mark.benchmark(group="read-info-lowres") 70 | def test_read_info_fiona_lowres(naturalearth_lowres, benchmark): 71 | benchmark(fiona_read_info, naturalearth_lowres) 72 | 73 | 74 | @pytest.mark.benchmark(group="read-info-modres") 75 | def test_read_info_modres(naturalearth_modres, benchmark): 76 | benchmark(read_info, naturalearth_modres) 77 | 78 | 79 | @pytest.mark.benchmark(group="read-info-modres") 80 | def test_read_info_fiona_modres(naturalearth_modres, benchmark): 81 | benchmark(fiona_read_info, naturalearth_modres) 82 | 83 | 84 | @pytest.mark.benchmark(group="read-info-nhd-hr") 85 | def test_read_info_nhd_hr(nhd_hr, benchmark): 86 | benchmark(read_info, nhd_hr, layer="NHDFlowline") 87 | 88 | 89 | @pytest.mark.benchmark(group="read-info-nhd-hr") 90 | def test_fiona_read_info_nhd_hr(nhd_hr, benchmark): 91 | benchmark(fiona_read_info, nhd_hr, layer="NHDFlowline") 92 | 93 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![pypi](https://img.shields.io/pypi/v/pyogrio.svg)](https://pypi.python.org/pypi/pyogrio/) 2 | [![Conda version](https://anaconda.org/conda-forge/pyogrio/badges/version.svg)](https://anaconda.org/conda-forge/pyogrio) 3 | [![Actions Status](https://github.com/geopandas/pyogrio/actions/workflows/tests-conda.yml/badge.svg?branch=main)](https://github.com/geopandas/pyogrio/actions?branch=main) 4 | [![Powered by NumFOCUS](https://img.shields.io/badge/powered%20by-NumFOCUS-orange.svg?style=flat&colorA=E1523D&colorB=007D8A)](https://numfocus.org) 5 | 6 | # pyogrio - bulk-oriented spatial vector file I/O using GDAL/OGR 7 | 8 | Pyogrio provides fast, bulk-oriented read and write access to 9 | [GDAL/OGR](https://gdal.org/en/latest/drivers/vector/index.html) vector data 10 | sources, such as ESRI Shapefile, GeoPackage, GeoJSON, and several others. 11 | Vector data sources typically have geometries, such as points, lines, or 12 | polygons, and associated records with potentially many columns worth of data. 13 | 14 | The typical use is to read or write these data sources to/from 15 | [GeoPandas](https://github.com/geopandas/geopandas) `GeoDataFrames`. Because 16 | the geometry column is optional, reading or writing only non-spatial data is 17 | also possible. Hence, GeoPackage attribute tables, DBF files, or CSV files are 18 | also supported. 19 | 20 | Pyogrio is fast because it uses pre-compiled bindings for GDAL/OGR to read and 21 | write the data records in bulk. This approach avoids multiple steps of 22 | converting to and from Python data types within Python, so performance becomes 23 | primarily limited by the underlying I/O speed of data source drivers in 24 | GDAL/OGR. 25 | 26 | We have seen \>5-10x speedups reading files and \>5-20x speedups writing files 27 | compared to using row-per-row approaches (e.g. Fiona). 28 | 29 | Read the documentation for more information: 30 | [https://pyogrio.readthedocs.io](https://pyogrio.readthedocs.io/en/latest/). 31 | 32 | ## Requirements 33 | 34 | Supports Python 3.10 - 3.14 and GDAL 3.6.x - 3.11.x. 35 | 36 | Reading to GeoDataFrames requires `geopandas>=0.12` with `shapely>=2`. 37 | 38 | Additionally, installing `pyarrow` in combination with GDAL 3.6+ enables 39 | a further speed-up when specifying `use_arrow=True`. 40 | 41 | ## Installation 42 | 43 | Pyogrio is currently available on 44 | [conda-forge](https://anaconda.org/conda-forge/pyogrio) 45 | and [PyPI](https://pypi.org/project/pyogrio/) 46 | for Linux, MacOS, and Windows. 47 | 48 | Please read the 49 | [installation documentation](https://pyogrio.readthedocs.io/en/latest/install.html) 50 | for more information. 51 | 52 | ## Supported vector formats 53 | 54 | Pyogrio supports most common vector data source formats (provided they are also 55 | supported by GDAL/OGR), including ESRI Shapefile, GeoPackage, GeoJSON, and 56 | FlatGeobuf. 57 | 58 | Please see the [list of supported formats](https://pyogrio.readthedocs.io/en/latest/supported_formats.html) 59 | for more information. 60 | 61 | ## Getting started 62 | 63 | Please read the [introduction](https://pyogrio.readthedocs.io/en/latest/supported_formats.html) 64 | for more information and examples to get started using Pyogrio. 65 | 66 | You can also check out the [API documentation](https://pyogrio.readthedocs.io/en/latest/api.html) 67 | for full details on using the API. 68 | 69 | ## Credits 70 | 71 | This project is made possible by the tremendous efforts of the GDAL, Fiona, and 72 | Geopandas communities. 73 | 74 | - Core I/O methods and supporting functions adapted from [Fiona](https://github.com/Toblerity/Fiona) 75 | - Inspired by [Fiona PR](https://github.com/Toblerity/Fiona/pull/540/files) 76 | -------------------------------------------------------------------------------- /pyogrio/arrow_bridge.h: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | // This file is an extract https://github.com/apache/arrow/blob/master/cpp/src/arrow/c/abi.h 19 | // commit 9cbb8a1a626ee301cfe85905b6c18c5d880e176b (2022-06-14) 20 | // WARNING: DO NOT MODIFY the content as it would break interoperability ! 21 | 22 | #pragma once 23 | 24 | #include 25 | 26 | #ifdef __cplusplus 27 | extern "C" { 28 | #endif 29 | 30 | #ifndef ARROW_C_DATA_INTERFACE 31 | #define ARROW_C_DATA_INTERFACE 32 | 33 | #define ARROW_FLAG_DICTIONARY_ORDERED 1 34 | #define ARROW_FLAG_NULLABLE 2 35 | #define ARROW_FLAG_MAP_KEYS_SORTED 4 36 | 37 | struct ArrowSchema { 38 | // Array type description 39 | const char* format; 40 | const char* name; 41 | const char* metadata; 42 | int64_t flags; 43 | int64_t n_children; 44 | struct ArrowSchema** children; 45 | struct ArrowSchema* dictionary; 46 | 47 | // Release callback 48 | void (*release)(struct ArrowSchema*); 49 | // Opaque producer-specific data 50 | void* private_data; 51 | }; 52 | 53 | struct ArrowArray { 54 | // Array data description 55 | int64_t length; 56 | int64_t null_count; 57 | int64_t offset; 58 | int64_t n_buffers; 59 | int64_t n_children; 60 | const void** buffers; 61 | struct ArrowArray** children; 62 | struct ArrowArray* dictionary; 63 | 64 | // Release callback 65 | void (*release)(struct ArrowArray*); 66 | // Opaque producer-specific data 67 | void* private_data; 68 | }; 69 | 70 | #endif // ARROW_C_DATA_INTERFACE 71 | 72 | #ifndef ARROW_C_STREAM_INTERFACE 73 | #define ARROW_C_STREAM_INTERFACE 74 | 75 | struct ArrowArrayStream { 76 | // Callback to get the stream type 77 | // (will be the same for all arrays in the stream). 78 | // 79 | // Return value: 0 if successful, an `errno`-compatible error code otherwise. 80 | // 81 | // If successful, the ArrowSchema must be released independently from the stream. 82 | int (*get_schema)(struct ArrowArrayStream*, struct ArrowSchema* out); 83 | 84 | // Callback to get the next array 85 | // (if no error and the array is released, the stream has ended) 86 | // 87 | // Return value: 0 if successful, an `errno`-compatible error code otherwise. 88 | // 89 | // If successful, the ArrowArray must be released independently from the stream. 90 | int (*get_next)(struct ArrowArrayStream*, struct ArrowArray* out); 91 | 92 | // Callback to get optional detailed error information. 93 | // This must only be called if the last stream operation failed 94 | // with a non-0 return code. 95 | // 96 | // Return value: pointer to a null-terminated character array describing 97 | // the last error, or NULL if no description is available. 98 | // 99 | // The returned pointer is only valid until the next operation on this stream 100 | // (including release). 101 | const char* (*get_last_error)(struct ArrowArrayStream*); 102 | 103 | // Release callback: release the stream's own resources. 104 | // Note that arrays returned by `get_next` must be individually released. 105 | void (*release)(struct ArrowArrayStream*); 106 | 107 | // Opaque producer-specific data 108 | void* private_data; 109 | }; 110 | 111 | #endif // ARROW_C_STREAM_INTERFACE 112 | 113 | #ifdef __cplusplus 114 | } 115 | #endif 116 | -------------------------------------------------------------------------------- /pyogrio/tests/fixtures/README.md: -------------------------------------------------------------------------------- 1 | # Test datasets 2 | 3 | ## Obtaining / creating test datasets 4 | 5 | If a test dataset can be created in code, do that instead. If it is used in a 6 | single test, create the test dataset as part of that test. If it is used in 7 | more than a single test, add it to `pyogrio/tests/conftest.py` instead, as a 8 | function-scoped test fixture. 9 | 10 | If you need to obtain 3rd party test files: 11 | 12 | - add a section below that describes the source location and processing steps 13 | to derive that dataset 14 | - make sure the license is compatible with including in Pyogrio (public domain or open-source) 15 | and record that license below 16 | 17 | Please keep the test files no larger than necessary to use in tests. 18 | 19 | ## Included test datasets 20 | 21 | ### Natural Earth lowres 22 | 23 | `naturalearth_lowres.shp` was copied from GeoPandas. 24 | 25 | License: public domain 26 | 27 | ### GPKG test dataset with null values 28 | 29 | `test_gpkg_nulls.gpkg` was created using Fiona backend to GeoPandas: 30 | 31 | ``` 32 | from collections import OrderedDict 33 | 34 | import fiona 35 | import geopandas as gp 36 | import numpy as np 37 | from pyogrio import write_dataframe 38 | 39 | filename = "test_gpkg_nulls.gpkg" 40 | 41 | df = gp.GeoDataFrame( 42 | { 43 | "col_bool": np.array([True, False, True], dtype="bool"), 44 | "col_int8": np.array([1, 2, 3], dtype="int8"), 45 | "col_int16": np.array([1, 2, 3], dtype="int16"), 46 | "col_int32": np.array([1, 2, 3], dtype="int32"), 47 | "col_int64": np.array([1, 2, 3], dtype="int64"), 48 | "col_uint8": np.array([1, 2, 3], dtype="uint8"), 49 | "col_uint16": np.array([1, 2, 3], dtype="uint16"), 50 | "col_uint32": np.array([1, 2, 3], dtype="uint32"), 51 | "col_uint64": np.array([1, 2, 3], dtype="uint64"), 52 | "col_float32": np.array([1.5, 2.5, 3.5], dtype="float32"), 53 | "col_float64": np.array([1.5, 2.5, 3.5], dtype="float64"), 54 | }, 55 | geometry=gp.points_from_xy([0, 1, 2], [0, 1, 2]), 56 | crs="EPSG:4326", 57 | ) 58 | 59 | write_dataframe(df, filename) 60 | 61 | # construct row with null values 62 | # Note: np.nan can only be used for float values 63 | null_row = { 64 | "type": "Fetaure", 65 | "id": 4, 66 | "properties": OrderedDict( 67 | [ 68 | ("col_bool", None), 69 | ("col_int8", None), 70 | ("col_int16", None), 71 | ("col_int32", None), 72 | ("col_int64", None), 73 | ("col_uint8", None), 74 | ("col_uint16", None), 75 | ("col_uint32", None), 76 | ("col_uint64", None), 77 | ("col_float32", np.nan), 78 | ("col_float64", np.nan), 79 | ] 80 | ), 81 | "geometry": {"type": "Point", "coordinates": (4.0, 4.0)}, 82 | } 83 | 84 | # append row with nulls to GPKG 85 | with fiona.open(filename, "a") as c: 86 | c.write(null_row) 87 | ``` 88 | 89 | NOTE: Reading boolean values into GeoPandas using Fiona backend treats those 90 | values as `None` and column dtype as `object`; Pyogrio treats those values as 91 | `np.nan` and column dtype as `float64`. 92 | 93 | License: same as Pyogrio 94 | 95 | ### OSM PBF test 96 | 97 | This was downloaded from https://github.com/openstreetmap/OSM-binary/blob/master/resources/sample.pbf 98 | 99 | License: [Open Data Commons Open Database License (ODbL)](https://opendatacommons.org/licenses/odbl/) 100 | 101 | ### Test files for geometry types that are downgraded on read 102 | 103 | `line_zm.gpkg` was created using QGIS to digitize a LineString GPKG layer with Z and M enabled. Downgraded to LineString Z on read. 104 | `curve.gpkg` was created using QGIS to digitize a Curve GPKG layer. Downgraded to LineString on read. 105 | `curvepolygon.gpkg` was created using QGIS to digitize a CurvePolygon GPKG layer. Downgraded to Polygon on read. 106 | `multisurface.gpkg` was created using QGIS to digitize a MultiSurface GPKG layer. Downgraded to MultiPolygon on read. 107 | 108 | License: same as Pyogrio 109 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | # Security Policy 2 | 3 | Pyogrio wraps [GDAL](https://gdal.org/), which depends on a variety of third- 4 | party dependencies, such as [GEOS](https://libgeos.org/), 5 | [PROJ](https://proj.org/), and [CURL](https://curl.se/). These dependencies 6 | vary based on which features are enabled when GDAL is compiled and vary across 7 | platforms. 8 | 9 | Most vulnerabilities are likely to occur in these dependencies. 10 | 11 | Please see [GDAL's security policy and published advisories](https://github.com/OSGeo/gdal/security). 12 | 13 | If you know that a vulnerability originates from a third-party dependency, 14 | please report the vulnerability directly to the affected dependency. 15 | 16 | If the vulnerability requires a modification of Pyogrio's packaging to 17 | specifically avoid linking to a known vulnerable version of a third party 18 | dependency, please report the vulnerability here as well. 19 | 20 | ## Available packages 21 | 22 | Pyogrio is available in 3 basic forms: 23 | 24 | ### Binary wheels on PyPI 25 | 26 | Binary wheels are published to [PyPI](https://pypi.org/project/pyogrio/) and 27 | are created using [VCPKG](https://vcpkg.io/en/) versions of GDAL and associated 28 | dependencies. These wheels include binary libraries of these dependencies. 29 | Because these binary wheels are specifically under our control, these are the 30 | packages where we are most concerned with ensuring that we avoid known 31 | vulnerable versions of dependencies. 32 | 33 | ### Conda-forge packages 34 | 35 | Conda packages are available on 36 | [conda-forge](https://anaconda.org/conda-forge/pyogrio). Pyogrio uses packages 37 | for GDAL and associated dependencies. Please contact the appropriate maintainers 38 | for those packages to report vulnerabilities. 39 | 40 | ### Self-compiled / local development 41 | 42 | When you build Pyogrio locally, you link it to your local version of GDAL and 43 | associated dependencies in a way that depends on how you installed GDAL and its 44 | dependencies (e.g., system package, homebrew package, etc). Please contact the 45 | appropriate maintainers of these packages to report vulnerabilities. 46 | 47 | ## Supported versions 48 | 49 | Pyogrio has not yet reached 1.0. Only the latest available version is being 50 | supported with security updates. 51 | 52 | Please see the [releases page](https://github.com/geopandas/pyogrio/releases) 53 | for the latest available release. 54 | 55 | ## Security advisories 56 | 57 | Please see the [security page](https://github.com/geopandas/pyogrio/security) 58 | for published security advisories. 59 | 60 | ## Reporting a vulnerability 61 | 62 | To report a vulnerability in Pyogrio, please use GitHub's "Report a vulnerability" 63 | feature on the [security page](https://github.com/geopandas/pyogrio/security). 64 | 65 | ### Vulnerabilities in Pyogrio 66 | 67 | If the vulnerability is included within Pyogrio source code, please include at 68 | least the following information: 69 | 70 | - location of the vulnerability within the source code (file and expression); 71 | you can provide a URL to a line or range of lines from within GitHub 72 | - brief description of the vulnerability sufficient for project maintainers 73 | to understand the nature of the vulnerability, including conditions that 74 | will trigger the vulnerable code 75 | - a small test dataset (if possible) or detailed description of the structure 76 | and contents of a dataset that will trigger the vulnerability 77 | - operating system, Python version, and Pyogrio version you were using when 78 | you detected the vulnerability 79 | - severity of the dependency: does it pose a critical risk to system or data 80 | integrity or security, does it pose a high risk for potential loss of data, 81 | or is it an edge case that poses a minor risk only under specific 82 | circumstances? 83 | 84 | ### Vulnerabilities in Pyogrio's dependencies 85 | 86 | If the vulnerability is included within Pyogrio's binary wheels from a 87 | third-party dependency or is linked from Pyogrio's conda-forge package, and 88 | would require a specific change in Pyogrio's packaging to avoid linking to a 89 | vulnerable version of the dependency, please include at least the following 90 | information: 91 | 92 | - a link to the published CVE or other description of the vulnerabilty 93 | - operating system, Python version, and Pyogrio versions that may be impacted 94 | - if known, the version of the dependency impacted by the vulnerability 95 | - the package of Pyogrio that is impacted by the vulnerability: binary wheel, 96 | conda-forge package, etc 97 | - if known, the range of vulnerable versions of the dependency and the version 98 | that resolves the vulnerability 99 | -------------------------------------------------------------------------------- /pyogrio/_geometry.pyx: -------------------------------------------------------------------------------- 1 | import warnings 2 | from pyogrio._ogr cimport * 3 | from pyogrio._err cimport * 4 | from pyogrio._err import CPLE_BaseError, NullPointerError 5 | from pyogrio.errors import DataLayerError, GeometryError 6 | 7 | 8 | # Mapping of OGR integer geometry types to GeoJSON type names. 9 | 10 | GEOMETRY_TYPES = { 11 | wkbUnknown: "Unknown", 12 | wkbPoint: "Point", 13 | wkbLineString: "LineString", 14 | wkbPolygon: "Polygon", 15 | wkbMultiPoint: "MultiPoint", 16 | wkbMultiLineString: "MultiLineString", 17 | wkbMultiPolygon: "MultiPolygon", 18 | wkbGeometryCollection: "GeometryCollection", 19 | wkbNone: None, 20 | wkbLinearRing: "LinearRing", 21 | # WARNING: Measured types are not supported in GEOS and downstream uses 22 | # these are stripped automatically to their corresponding 2D / 3D types 23 | wkbPointM: "PointM", 24 | wkbLineStringM: "Measured LineString", 25 | wkbPolygonM: "Measured Polygon", 26 | wkbMultiPointM: "Measured MultiPoint", 27 | wkbMultiLineStringM: "Measured MultiLineString", 28 | wkbMultiPolygonM: "Measured MultiPolygon", 29 | wkbGeometryCollectionM: "Measured GeometryCollection", 30 | wkbPointZM: "Measured 3D Point", 31 | wkbLineStringZM: "Measured 3D LineString", 32 | wkbPolygonZM: "Measured 3D Polygon", 33 | wkbMultiPointZM: "Measured 3D MultiPoint", 34 | wkbMultiLineStringZM: "Measured 3D MultiLineString", 35 | wkbMultiPolygonZM: "Measured 3D MultiPolygon", 36 | wkbGeometryCollectionZM: "Measured 3D GeometryCollection", 37 | wkbPoint25D: "Point Z", 38 | wkbLineString25D: "LineString Z", 39 | wkbPolygon25D: "Polygon Z", 40 | wkbMultiPoint25D: "MultiPoint Z", 41 | wkbMultiLineString25D: "MultiLineString Z", 42 | wkbMultiPolygon25D: "MultiPolygon Z", 43 | wkbGeometryCollection25D: "GeometryCollection Z", 44 | } 45 | 46 | GEOMETRY_TYPE_CODES = {v: k for k, v in GEOMETRY_TYPES.items()} 47 | 48 | # add additional aliases from 2.5D format 49 | GEOMETRY_TYPE_CODES.update({ 50 | "2.5D Point": wkbPoint25D, 51 | "2.5D LineString": wkbLineString25D, 52 | "2.5D Polygon": wkbPolygon25D, 53 | "2.5D MultiPoint": wkbMultiPoint25D, 54 | "2.5D MultiLineString": wkbMultiLineString25D, 55 | "2.5D MultiPolygon": wkbMultiPolygon25D, 56 | "2.5D GeometryCollection": wkbGeometryCollection25D 57 | }) 58 | 59 | # 2.5D also represented using negative numbers not enumerated above 60 | GEOMETRY_TYPES.update({ 61 | -2147483647: "Point Z", 62 | -2147483646: "LineString Z", 63 | -2147483645: "Polygon Z", 64 | -2147483644: "MultiPoint Z", 65 | -2147483643: "MultiLineString Z", 66 | -2147483642: "MultiPolygon Z", 67 | -2147483641: "GeometryCollection Z", 68 | }) 69 | 70 | 71 | cdef str get_geometry_type(void *ogr_layer): 72 | """Get geometry type for layer. 73 | 74 | Parameters 75 | ---------- 76 | ogr_layer : pointer to open OGR layer 77 | 78 | Returns 79 | ------- 80 | str 81 | geometry type 82 | """ 83 | cdef void *ogr_featuredef = NULL 84 | cdef OGRwkbGeometryType ogr_type 85 | 86 | try: 87 | ogr_featuredef = check_pointer(OGR_L_GetLayerDefn(ogr_layer)) 88 | except NullPointerError: 89 | raise DataLayerError("Could not get layer definition") 90 | 91 | except CPLE_BaseError as exc: 92 | raise DataLayerError(str(exc)) 93 | 94 | ogr_type = OGR_FD_GetGeomType(ogr_featuredef) 95 | 96 | if ogr_type not in GEOMETRY_TYPES: 97 | raise GeometryError(f"Geometry type is not supported: {ogr_type}") 98 | 99 | if OGR_GT_HasM(ogr_type): 100 | original_type = GEOMETRY_TYPES[ogr_type] 101 | 102 | # Downgrade the type to 2D / 3D 103 | ogr_type = OGR_GT_SetModifier(ogr_type, OGR_GT_HasZ(ogr_type), 0) 104 | 105 | # TODO: review; this might be annoying... 106 | warnings.warn( 107 | "Measured (M) geometry types are not supported. " 108 | f"Original type '{original_type}' " 109 | f"is converted to '{GEOMETRY_TYPES[ogr_type]}'") 110 | 111 | return GEOMETRY_TYPES[ogr_type] 112 | 113 | 114 | cdef OGRwkbGeometryType get_geometry_type_code(str geometry_type) except *: 115 | """Get geometry type code for string geometry type. 116 | 117 | Parameters 118 | ---------- 119 | geometry_type : str 120 | 121 | Returns 122 | ------- 123 | int 124 | geometry type code 125 | """ 126 | if geometry_type not in GEOMETRY_TYPE_CODES: 127 | raise GeometryError(f"Geometry type is not supported: {geometry_type}") 128 | 129 | return GEOMETRY_TYPE_CODES[geometry_type] 130 | -------------------------------------------------------------------------------- /docs/source/known_issues.md: -------------------------------------------------------------------------------- 1 | # Limitations and Known Issues 2 | 3 | ## Support for null values 4 | 5 | Some data sources support NULL or otherwise unset field values. These cannot be 6 | properly stored into the ndarray for certain types. If NULL or unset values are 7 | encountered, the following occurs: 8 | 9 | - If the field is a string type, NULL values are represented as None 10 | - If the field is a boolean or an integer type (np.int32, np.int64), the field 11 | data are re-cast to np.float64 values, and NULL values are represented as 12 | np.nan 13 | - If the field is a date or datetime type, the field is set as np.datetime64('NaT') 14 | 15 | Note: detection of NULL or otherwise unset field values is limited to the subset 16 | of records that are read from the data layer, which means that reading different 17 | subsets of records may yield different data types for the same columns. You 18 | can use `read_info()` to determine the original data types of each column. 19 | 20 | ## No support for measured geometries 21 | 22 | Measured geometry types are not supported for reading or writing. These are not 23 | supported by the GEOS library and cannot be converted to geometry objects in 24 | GeoDataFrames. 25 | 26 | These are automatically downgraded to their 3D (x,y,z) equivalent and 27 | a warning is raised. 28 | 29 | To ignore this warning: 30 | 31 | ```python 32 | >>> import warnings 33 | >>> warnings.filterwarnings("ignore", message=".*Measured \(M\) geometry types are not supported.*") 34 | ``` 35 | 36 | ## No support for curvilinear, triangle, TIN, and surface geometries 37 | 38 | Pyogrio does not support curvilinear, triangle, TIN, and surface geometries. 39 | These are automatically converted to their linear approximation when reading 40 | geometries from the data layer. 41 | 42 | ## Character encoding 43 | 44 | Pyogrio supports reading / writing data layers with a defined encoding. Where 45 | possible and the `encoding` option is not specified, GDAL will attempt to 46 | automatically decode from the native encoding to `UTF-8`, and pyogrio will report 47 | that the encoding is `UTF-8` in that case instead of the native encoding. For 48 | [ESRI Shapefiles](https://gdal.org/drivers/vector/shapefile.html#encoding), 49 | GDAL will use the associated `.cpg` file or a code page specified in the `.dbf` 50 | file to infer the native encoding, but may incorrectly assume the native encoding 51 | is `ISO-8859-1`, leading to miscoding errors. Most other drivers are assumed to 52 | be in `UTF-8`, but it is possible (in theory) to specify the `encoding` parameter 53 | manually to force conversions to use the specified encoding value. 54 | 55 | Field names and values are read into Python `UTF-8` strings. 56 | 57 | ## No validation of geometry or field types 58 | 59 | Pyogrio does not currently validate attribute values or geometry types before 60 | attempting to write to the output file. Invalid types may crash during writing 61 | with obscure error messages. 62 | 63 | ## Support for OpenStreetMap (OSM) data 64 | 65 | OpenStreetMap data do not natively support calculating the feature count by data 66 | layer due to the internal data structures. To get around this, Pyogrio iterates 67 | over all features first to calculate the feature count that is used to allocate 68 | arrays that contain the geometries and attributes read from the data layer, and 69 | then iterates over all feature again to populate those arrays. Further, data 70 | within the file are not structured at the top level to support fast reading by 71 | layer, which means that reading data by layer may need to read all records 72 | within the data source, not just those belonging to a particular layer. This is 73 | inefficient and slow, and is exacerbated when attemping to read from 74 | remotely-hosted data sources rather than local files. 75 | 76 | You may also be instructed by GDAL to enable interleaved reading mode via an 77 | error message when you try to read a large file without it, which you can do in 78 | one of two ways: 79 | 80 | 1. Set config option used for all operations 81 | 82 | ```python 83 | from pyogrio import set_gdal_config_options 84 | 85 | set_gdal_config_options({"OGR_INTERLEAVED_READING": True}) 86 | ``` 87 | 88 | 2. Set dataset open option 89 | 90 | ```python 91 | 92 | from pyogrio import read_dataframe 93 | 94 | df = read_dataframe(path, INTERLEAVED_READING=True) 95 | ``` 96 | 97 | We recommend the following to sidestep performance issues: 98 | 99 | - download remote OSM data sources to local files before attempting 100 | to read 101 | - the `use_arrow=True` option may speed up reading from OSM files 102 | - if possible, use a different tool such as `ogr2ogr` to translate the OSM 103 | data source into a more performant format for reading by layer, such as GPKG 104 | 105 | ## Incorrect results when using a spatial filter and Arrow interface 106 | 107 | Due to [a bug in GDAL](https://github.com/OSGeo/gdal/issues/8347), when using 108 | the Arrow interface (e.g., via `use_arrow` on `read_dataframe`) certain drivers 109 | (e.g., GPKG, FlatGeobuf, Arrow, Parquet) returned features whose bounding boxes 110 | intersected the bounding box specified by `bbox` or `mask` geometry instead of 111 | those whose geometry intersected the `bbox` or `mask`. 112 | 113 | A fix is expected in GDAL 3.8.0. 114 | -------------------------------------------------------------------------------- /benchmarks/test_raw_io_benchmarks.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import fiona 4 | import pytest 5 | 6 | from pyogrio.raw import read, write 7 | 8 | 9 | def fiona_read(path, layer=None): 10 | """Read records from OGR data source using Fiona. 11 | 12 | Note: Fiona returns different information than pyogrio and we have to 13 | use a list here to force reading from Fiona's records generator - 14 | both of which incur a slight performance penalty. 15 | """ 16 | with fiona.open(path, layer=layer) as src: 17 | list(src) 18 | 19 | 20 | def fiona_write(path, records, **kwargs): 21 | with fiona.open(path, "w", **kwargs) as out: 22 | for record in records: 23 | out.write(record) 24 | 25 | 26 | @pytest.mark.benchmark(group="read-lowres") 27 | def test_read_lowres(naturalearth_lowres, benchmark): 28 | benchmark(read, naturalearth_lowres) 29 | 30 | 31 | @pytest.mark.benchmark(group="read-lowres") 32 | def test_read_fiona_lowres(naturalearth_lowres, benchmark): 33 | benchmark(fiona_read, naturalearth_lowres) 34 | 35 | 36 | @pytest.mark.benchmark(group="read-modres-admin0") 37 | def test_read_modres(naturalearth_modres, benchmark): 38 | benchmark(read, naturalearth_modres) 39 | 40 | 41 | @pytest.mark.benchmark(group="read-modres-admin0") 42 | def test_read_vsi_modres(naturalearth_modres_vsi, benchmark): 43 | benchmark(read, naturalearth_modres_vsi) 44 | 45 | 46 | @pytest.mark.benchmark(group="read-modres-admin0") 47 | def test_read_fiona_modres(naturalearth_modres, benchmark): 48 | benchmark(fiona_read, naturalearth_modres) 49 | 50 | 51 | @pytest.mark.benchmark(group="read-modres-admin1") 52 | def test_read_modres1(naturalearth_modres1, benchmark): 53 | benchmark(read, naturalearth_modres1) 54 | 55 | 56 | @pytest.mark.benchmark(group="read-modres-admin1") 57 | def test_read_fiona_modres1(naturalearth_modres1, benchmark): 58 | benchmark(fiona_read, naturalearth_modres1) 59 | 60 | 61 | @pytest.mark.benchmark(group="read-nhd_hr") 62 | def test_read_nhd_hr(nhd_hr, benchmark): 63 | benchmark(read, nhd_hr, layer="NHDFlowline") 64 | 65 | 66 | @pytest.mark.benchmark(group="read-nhd_hr") 67 | def test_read_fiona_nhd_hr(nhd_hr, benchmark): 68 | benchmark(fiona_read, nhd_hr, layer="NHDFlowline") 69 | 70 | 71 | @pytest.mark.benchmark(group="read-subset") 72 | def test_read_full_modres1(naturalearth_modres1, benchmark): 73 | benchmark(read, naturalearth_modres1) 74 | 75 | 76 | @pytest.mark.benchmark(group="read-subset") 77 | def test_read_no_geometry_modres1(naturalearth_modres1, benchmark): 78 | benchmark(read, naturalearth_modres1, read_geometry=False) 79 | 80 | 81 | @pytest.mark.benchmark(group="read-subset") 82 | def test_read_one_column_modres1(naturalearth_modres1, benchmark): 83 | benchmark(read, naturalearth_modres1, columns=["NAME"]) 84 | 85 | 86 | @pytest.mark.benchmark(group="read-subset") 87 | def test_read_only_geometry_modres1(naturalearth_modres1, benchmark): 88 | benchmark(read, naturalearth_modres1, columns=[]) 89 | 90 | 91 | @pytest.mark.benchmark(group="read-subset") 92 | def test_read_only_meta_modres1(naturalearth_modres1, benchmark): 93 | benchmark(read, naturalearth_modres1, columns=[], read_geometry=False) 94 | 95 | 96 | @pytest.mark.benchmark(group="write-lowres") 97 | def test_write_lowres_shp(tmp_path, naturalearth_lowres, benchmark): 98 | meta, _, geometry, field_data = read(naturalearth_lowres) 99 | benchmark(write, tmp_path / "test.shp", geometry, field_data, driver="ESRI Shapefile", **meta) 100 | 101 | 102 | @pytest.mark.benchmark(group="write-lowres") 103 | def test_write_lowres_gpkg(tmp_path, naturalearth_lowres, benchmark): 104 | meta, _, geometry, field_data = read(naturalearth_lowres) 105 | benchmark(write, tmp_path / "test.gpkg", geometry, field_data, driver="GPKG", **meta) 106 | 107 | 108 | @pytest.mark.benchmark(group="write-lowres") 109 | def test_write_lowres_geojson(tmp_path, naturalearth_lowres, benchmark): 110 | meta, _, geometry, field_data = read(naturalearth_lowres) 111 | benchmark(write, tmp_path / "test.json", geometry, field_data, driver="GeoJSON", **meta) 112 | 113 | 114 | @pytest.mark.benchmark(group="write-lowres") 115 | def test_write_lowres_geojsonseq(tmp_path, naturalearth_lowres, benchmark): 116 | meta, _, geometry, field_data = read(naturalearth_lowres) 117 | benchmark(write, tmp_path / "test.json", geometry, field_data, driver="GeoJSONSeq", **meta) 118 | 119 | 120 | @pytest.mark.benchmark(group="write-lowres") 121 | def test_write_fiona_lowres_shp(tmp_path, naturalearth_lowres, benchmark): 122 | with fiona.open(naturalearth_lowres) as source: 123 | crs = source.crs 124 | schema = source.schema 125 | records = list(source) 126 | 127 | benchmark( 128 | fiona_write, tmp_path / "test.shp", records, driver="ESRI Shapefile", crs=crs, schema=schema 129 | ) 130 | 131 | 132 | # @pytest.mark.benchmark(group="write-lowres") 133 | # def test_write_fiona_lowres_gpkg(tmp_path, naturalearth_lowres, benchmark): 134 | # with fiona.open(naturalearth_lowres) as source: 135 | # crs = source.crs 136 | # schema = source.schema 137 | # records = list(source) 138 | 139 | # benchmark(fiona_write, tmp_path / "test.gpkg", records, driver="GPKG", crs=crs, schema=schema) 140 | 141 | 142 | # @pytest.mark.benchmark(group="write-lowres") 143 | # def test_write_fiona_lowres_geojson(tmp_path, naturalearth_lowres, benchmark): 144 | # with fiona.open(naturalearth_lowres) as source: 145 | # crs = source.crs 146 | # schema = source.schema 147 | # records = list(source) 148 | 149 | # benchmark(fiona_write, tmp_path / "test.json", records, driver="GeoJSON", crs=crs, schema=schema) 150 | 151 | 152 | @pytest.mark.benchmark(group="write-modres") 153 | def test_write_modres_shp(tmp_path, naturalearth_modres, benchmark): 154 | meta, _, geometry, field_data = read(naturalearth_modres) 155 | benchmark(write, tmp_path / "test.shp", geometry, field_data, **meta) 156 | 157 | 158 | @pytest.mark.benchmark(group="write-modres") 159 | def test_write_fiona_modres_shp(tmp_path, naturalearth_modres, benchmark): 160 | with fiona.open(naturalearth_modres) as source: 161 | crs = source.crs 162 | schema = source.schema 163 | records = list(source) 164 | 165 | benchmark( 166 | fiona_write, tmp_path / "test.shp", records, driver="ESRI Shapefile", crs=crs, schema=schema 167 | ) 168 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = [ 3 | "setuptools", 4 | "Cython>=3.1", 5 | "versioneer[toml]==0.28", 6 | # tomli is used by versioneer 7 | "tomli; python_version < '3.11'", 8 | ] 9 | build-backend = "setuptools.build_meta" 10 | 11 | [project] 12 | name = "pyogrio" 13 | dynamic = ["version"] 14 | authors = [ 15 | { name = "Brendan C. Ward", email = "bcward@astutespruce.com" }, 16 | { name = "pyogrio contributors" }, 17 | ] 18 | maintainers = [{ name = "pyogrio contributors" }] 19 | license = { file = "LICENSE" } 20 | description = "Vectorized spatial vector file format I/O using GDAL/OGR" 21 | readme = "README.md" 22 | classifiers = [ 23 | "Development Status :: 5 - Production/Stable", 24 | "Intended Audience :: Science/Research", 25 | "License :: OSI Approved :: MIT License", 26 | "Operating System :: OS Independent", 27 | "Programming Language :: Python :: 3", 28 | "Topic :: Scientific/Engineering :: GIS", 29 | "Programming Language :: Python :: Free Threading :: 2 - Beta", 30 | ] 31 | requires-python = ">=3.10" 32 | dependencies = ["certifi", "numpy", "packaging"] 33 | 34 | [project.optional-dependencies] 35 | dev = ["cython>=3.1"] 36 | test = ["pytest", "pytest-cov"] 37 | benchmark = ["pytest-benchmark"] 38 | geopandas = ["geopandas"] 39 | 40 | [project.urls] 41 | Home = "https://pyogrio.readthedocs.io/" 42 | Repository = "https://github.com/geopandas/pyogrio" 43 | 44 | [tool.cibuildwheel] 45 | skip = ["*musllinux*"] 46 | archs = ["auto64"] 47 | manylinux-x86_64-image = "manylinux-x86_64-vcpkg-gdal:latest" 48 | manylinux-aarch64-image = "manylinux-aarch64-vcpkg-gdal:latest" 49 | build-verbosity = 3 50 | enable = ["cpython-freethreading"] 51 | 52 | [tool.cibuildwheel.linux.environment] 53 | VCPKG_INSTALL = "$VCPKG_INSTALLATION_ROOT/installed/$VCPKG_DEFAULT_TRIPLET" 54 | GDAL_INCLUDE_PATH = "$VCPKG_INSTALL/include" 55 | GDAL_LIBRARY_PATH = "$VCPKG_INSTALL/lib" 56 | GDAL_VERSION = "3.11.4" 57 | PYOGRIO_PACKAGE_DATA = 1 58 | GDAL_DATA = "$VCPKG_INSTALL/share/gdal" 59 | PROJ_LIB = "$VCPKG_INSTALL/share/proj" 60 | 61 | [tool.cibuildwheel.macos] 62 | repair-wheel-command = [ 63 | "DYLD_LIBRARY_PATH=$GDAL_LIBRARY_PATH delocate-listdeps {wheel}", 64 | "DYLD_LIBRARY_PATH=$GDAL_LIBRARY_PATH delocate-wheel --require-archs {delocate_archs} -w {dest_dir} {wheel}", 65 | ] 66 | 67 | [tool.cibuildwheel.macos.environment] 68 | VCPKG_INSTALL = "$VCPKG_INSTALLATION_ROOT/installed/$VCPKG_DEFAULT_TRIPLET" 69 | GDAL_INCLUDE_PATH = "$VCPKG_INSTALL/include" 70 | GDAL_LIBRARY_PATH = "$VCPKG_INSTALL/lib" 71 | GDAL_VERSION = "3.11.4" 72 | PYOGRIO_PACKAGE_DATA = 1 73 | GDAL_DATA = "$VCPKG_INSTALL/share/gdal" 74 | PROJ_LIB = "$VCPKG_INSTALL/share/proj" 75 | MACOSX_DEPLOYMENT_TARGET = "12.0" 76 | 77 | [tool.cibuildwheel.windows] 78 | before-build = "pip install delvewheel" 79 | repair-wheel-command = "delvewheel repair --add-path C:/vcpkg/installed/x64-windows-dynamic-release/bin -w {dest_dir} {wheel}" 80 | 81 | [tool.cibuildwheel.windows.environment] 82 | VCPKG_INSTALL = "$VCPKG_INSTALLATION_ROOT/installed/x64-windows-dynamic-release" 83 | GDAL_INCLUDE_PATH = "$VCPKG_INSTALL/include" 84 | GDAL_LIBRARY_PATH = "$VCPKG_INSTALL/lib" 85 | GDAL_VERSION = "3.11.4" 86 | PYOGRIO_PACKAGE_DATA = 1 87 | GDAL_DATA = "$VCPKG_INSTALL/share/gdal" 88 | PROJ_LIB = "$VCPKG_INSTALL/share/proj" 89 | 90 | [tool.cython-lint] 91 | ignore = ["E265", "E222"] 92 | 93 | [tool.versioneer] 94 | VCS = "git" 95 | style = "pep440" 96 | versionfile_source = "pyogrio/_version.py" 97 | versionfile_build = "pyogrio/_version.py" 98 | parentdir_prefix = "pyogrio-" 99 | tag_prefix = "v" 100 | 101 | [tool.ruff] 102 | line-length = 88 103 | extend-exclude = ["doc/*", "benchmarks/*", "pyogrio/_version.py", "conf.py", "setup.py"] 104 | 105 | [tool.ruff.lint] 106 | select = [ 107 | # pyflakes 108 | "F", 109 | # pycodestyle 110 | "E", 111 | "W", 112 | # pyupgrade 113 | "UP", 114 | # flake8-bugbear 115 | "B", 116 | # flake8-debugger 117 | "T10", 118 | # flake8-simplify 119 | # "SIM", 120 | # pylint 121 | "PLC", 122 | "PLE", 123 | "PLR", 124 | "PLW", 125 | # misc lints 126 | "PIE", 127 | # implicit string concatenation 128 | "ISC", 129 | # type-checking imports 130 | "TCH", 131 | # comprehensions 132 | "C4", 133 | # Ruff-specific rules 134 | "RUF", 135 | # isort 136 | "I", 137 | # pydocstyle 138 | "D", 139 | ] 140 | 141 | ignore = [ 142 | ### Intentionally disabled 143 | # module level import not at top of file 144 | "E402", 145 | # do not assign a lambda expression, use a def 146 | "E731", 147 | # mutable-argument-default 148 | "B006", 149 | # unused-loop-control-variable 150 | "B007", 151 | # get-attr-with-constant 152 | "B009", 153 | # Only works with python >=3.10 154 | "B905", 155 | # dict literals 156 | "C408", 157 | # Too many arguments to function call 158 | "PLR0913", 159 | # Too many returns 160 | "PLR0911", 161 | # Too many branches 162 | "PLR0912", 163 | # Too many statements 164 | "PLR0915", 165 | # Magic number 166 | "PLR2004", 167 | # Redefined loop name 168 | "PLW2901", 169 | # Global statements are discouraged 170 | "PLW0603", 171 | # compare-to-empty-string 172 | "PLC1901", 173 | 174 | ### Additional checks that don't pass yet 175 | # Useless statement 176 | "B018", 177 | # Within an except clause, raise exceptions with ... 178 | "B904", 179 | # Consider `elif` instead of `else` then `if` to remove indentation level 180 | "PLR5501", 181 | # collection-literal-concatenation 182 | "RUF005", 183 | # Mutable class attributes should be annotated with `typing.ClassVar`, 184 | "RUF012", 185 | ] 186 | 187 | [tool.ruff.lint.per-file-ignores] 188 | # ignore pydocstyle errors in tests 189 | "**/tests/*" = ["D"] 190 | 191 | [tool.ruff.lint.isort] 192 | combine-as-imports = true 193 | extra-standard-library = ["packaging"] 194 | 195 | section-order = [ 196 | "future", 197 | "standard-library", 198 | "third-party", 199 | "geo", 200 | "first-party", 201 | "local-folder", 202 | "testing", 203 | ] 204 | 205 | [tool.ruff.lint.isort.sections] 206 | "geo" = ["shapely", "pyproj"] 207 | "testing" = [ 208 | "pytest", 209 | "pandas.testing", 210 | "numpy.testing", 211 | "geopandas.tests", 212 | "geopandas.testing", 213 | ] 214 | 215 | [tool.ruff.lint.pydocstyle] 216 | convention = "numpy" 217 | -------------------------------------------------------------------------------- /benchmarks/test_io_benchmarks_geopandas.py: -------------------------------------------------------------------------------- 1 | """ 2 | NOTE: this requires that all packages use the same version of GEOS. 3 | Install each so that they use the system GEOS. 4 | After installing geopandas, reinstall shapely via: 5 | `pip install shapely --no-binary shapely` 6 | """ 7 | 8 | import os 9 | 10 | import geopandas as gp 11 | import pytest 12 | 13 | from pyogrio.geopandas import read_dataframe, write_dataframe 14 | 15 | 16 | @pytest.mark.benchmark(group="read-geopandas-lowres-admin0") 17 | def test_read_dataframe_benchmark_lowres(naturalearth_lowres, benchmark): 18 | benchmark(read_dataframe, naturalearth_lowres) 19 | 20 | 21 | @pytest.mark.benchmark(group="read-geopandas-lowres-admin0") 22 | def test_read_dataframe_benchmark_geopandas_lowres(naturalearth_lowres, benchmark): 23 | benchmark(gp.read_file, naturalearth_lowres) 24 | 25 | 26 | @pytest.mark.benchmark(group="read-geopandas-modres-admin0") 27 | def test_read_dataframe_benchmark_modres(naturalearth_modres, benchmark): 28 | benchmark(read_dataframe, naturalearth_modres) 29 | 30 | 31 | @pytest.mark.benchmark(group="read-geopandas-modres-admin0") 32 | def test_read_dataframe_benchmark_vsi_modres(naturalearth_modres_vsi, benchmark): 33 | benchmark(read_dataframe, naturalearth_modres_vsi) 34 | 35 | 36 | @pytest.mark.benchmark(group="read-geopandas-modres-admin0") 37 | def test_read_dataframe_benchmark_geopandas_modres(naturalearth_modres, benchmark): 38 | benchmark(gp.read_file, naturalearth_modres) 39 | 40 | 41 | @pytest.mark.benchmark(group="read-geopandas-modres-admin1") 42 | def test_read_dataframe_benchmark_modres1(naturalearth_modres1, benchmark): 43 | benchmark(read_dataframe, naturalearth_modres1) 44 | 45 | 46 | @pytest.mark.benchmark(group="read-geopandas-modres-admin1") 47 | def test_read_dataframe_benchmark_geopandas_modres1(naturalearth_modres1, benchmark): 48 | benchmark(gp.read_file, naturalearth_modres1) 49 | 50 | 51 | @pytest.mark.benchmark(group="read-geopandas-nhd_hr") 52 | def test_read_dataframe_benchmark_nhd_hr(nhd_hr, benchmark): 53 | benchmark(read_dataframe, nhd_hr, layer="NHDFlowline") 54 | 55 | 56 | @pytest.mark.benchmark(group="read-geopandas-nhd_hr") 57 | def test_read_dataframe_benchmark_geopandas_nhd_hr(nhd_hr, benchmark): 58 | benchmark(gp.read_file, nhd_hr, layer="NHDFlowline") 59 | 60 | 61 | ### Write lowres Admin 0 62 | @pytest.mark.benchmark(group="write-geopandas-lowres-admin0") 63 | def test_write_dataframe_benchmark_lowres_shp(tmp_path, naturalearth_lowres, benchmark): 64 | df = read_dataframe(naturalearth_lowres) 65 | benchmark(write_dataframe, df, tmp_path / "test.shp", driver="ESRI Shapefile") 66 | 67 | 68 | @pytest.mark.benchmark(group="write-geopandas-lowres-admin0") 69 | def test_write_dataframe_benchmark_lowres_gpkg(tmp_path, naturalearth_lowres, benchmark): 70 | df = read_dataframe(naturalearth_lowres) 71 | benchmark(write_dataframe, df, tmp_path / "test.gpkg", driver="GPKG") 72 | 73 | 74 | @pytest.mark.benchmark(group="write-geopandas-lowres-admin0") 75 | def test_write_dataframe_benchmark_lowres_geojson( 76 | tmp_path, naturalearth_lowres, benchmark 77 | ): 78 | df = read_dataframe(naturalearth_lowres) 79 | benchmark(write_dataframe, df, tmp_path / "test.json", driver="GeoJSON") 80 | 81 | 82 | @pytest.mark.benchmark(group="write-geopandas-lowres-admin0") 83 | def test_write_dataframe_benchmark_lowres_geojsonseq( 84 | tmp_path, naturalearth_lowres, benchmark 85 | ): 86 | df = read_dataframe(naturalearth_lowres) 87 | benchmark(write_dataframe, df, tmp_path / "test.json", driver="GeoJSONSeq") 88 | 89 | 90 | @pytest.mark.benchmark(group="write-geopandas-lowres-admin0") 91 | def test_write_dataframe_benchmark_geopandas_lowres_shp( 92 | tmp_path, naturalearth_lowres, benchmark 93 | ): 94 | df = gp.read_file(naturalearth_lowres) 95 | benchmark(df.to_file, tmp_path / "test.shp", driver="ESRI Shapefile") 96 | 97 | 98 | @pytest.mark.benchmark(group="write-geopandas-lowres-admin0") 99 | def test_write_dataframe_benchmark_geopandas_lowres_gpkg( 100 | tmp_path, naturalearth_lowres, benchmark 101 | ): 102 | df = gp.read_file(naturalearth_lowres) 103 | benchmark(df.to_file, tmp_path / "test.gpkg", driver="GPKG") 104 | 105 | 106 | ### Write modres Admin 0 107 | @pytest.mark.benchmark(group="write-geopandas-modres-admin0") 108 | def test_write_dataframe_benchmark_modres_shp(tmp_path, naturalearth_modres, benchmark): 109 | df = read_dataframe(naturalearth_modres) 110 | benchmark(write_dataframe, df, tmp_path / "test.shp", driver="ESRI Shapefile") 111 | 112 | 113 | @pytest.mark.benchmark(group="write-geopandas-modres-admin0") 114 | def test_write_dataframe_benchmark_modres_gpkg(tmp_path, naturalearth_modres, benchmark): 115 | df = read_dataframe(naturalearth_modres) 116 | benchmark(write_dataframe, df, tmp_path / "test.gpkg", driver="GPKG") 117 | 118 | 119 | @pytest.mark.benchmark(group="write-geopandas-modres-admin0") 120 | def test_write_dataframe_benchmark_modres_geojson( 121 | tmp_path, naturalearth_modres, benchmark 122 | ): 123 | df = read_dataframe(naturalearth_modres) 124 | benchmark(write_dataframe, df, tmp_path / "test.json", driver="GeoJSON") 125 | 126 | 127 | @pytest.mark.benchmark(group="write-geopandas-modres-admin0") 128 | def test_write_dataframe_benchmark_modres_geojsonseq( 129 | tmp_path, naturalearth_modres, benchmark 130 | ): 131 | df = read_dataframe(naturalearth_modres) 132 | benchmark(write_dataframe, df, tmp_path / "test.json", driver="GeoJSONSeq") 133 | 134 | 135 | @pytest.mark.benchmark(group="write-geopandas-modres-admin0") 136 | def test_write_dataframe_benchmark_geopandas_modres_shp( 137 | tmp_path, naturalearth_modres, benchmark 138 | ): 139 | df = gp.read_file(naturalearth_modres) 140 | benchmark(df.to_file, tmp_path / "test.shp", driver="ESRI Shapefile") 141 | 142 | 143 | @pytest.mark.benchmark(group="write-geopandas-modres-admin0") 144 | def test_write_dataframe_benchmark_geopandas_modres_gpkg( 145 | tmp_path, naturalearth_modres, benchmark 146 | ): 147 | df = gp.read_file(naturalearth_modres) 148 | benchmark(df.to_file, tmp_path / "test.gpkg", driver="GPKG") 149 | 150 | 151 | ### Write NHD 152 | @pytest.mark.filterwarnings("ignore: RuntimeWarning") 153 | @pytest.mark.benchmark(group="write-geopandas-nhd_hr") 154 | def test_write_dataframe_benchmark_nhd_shp(tmp_path, nhd_hr, benchmark): 155 | layer = "NHDFlowline" 156 | df = read_dataframe(nhd_hr, layer=layer) 157 | 158 | # Datetime not currently supported 159 | df = df.drop(columns="FDate") 160 | 161 | benchmark(write_dataframe, df, tmp_path / "test.shp", layer=layer, driver="ESRI Shapefile") 162 | 163 | 164 | @pytest.mark.filterwarnings("ignore: RuntimeWarning") 165 | @pytest.mark.benchmark(group="write-geopandas-nhd_hr") 166 | def test_write_dataframe_benchmark_geopandas_nhd_shp(tmp_path, nhd_hr, benchmark): 167 | layer = "NHDFlowline" 168 | df = gp.read_file(nhd_hr, layer=layer) 169 | 170 | # Datetime not currently supported by pyogrio, so drop here too so that the 171 | # benchmark is fair. 172 | df = df.drop(columns="FDate") 173 | 174 | benchmark(df.to_file, tmp_path/"test.shp", layer=layer, driver="ESRI Shapefile") 175 | -------------------------------------------------------------------------------- /docs/source/install.md: -------------------------------------------------------------------------------- 1 | # Installation 2 | 3 | ## Requirements 4 | 5 | Supports Python 3.10 - 3.14 and GDAL 3.6.x - 3.11.x 6 | 7 | Reading to GeoDataFrames requires `geopandas>=0.12` with `shapely>=2`. 8 | 9 | Additionally, installing `pyarrow` in combination with GDAL 3.6+ enables 10 | a further speed-up when specifying `use_arrow=True`. 11 | 12 | ## Installation 13 | 14 | ### Conda-forge 15 | 16 | This package is available on [conda-forge](https://anaconda.org/conda-forge/pyogrio) 17 | for Linux, MacOS, and Windows. 18 | 19 | ```bash 20 | conda install -c conda-forge pyogrio 21 | ``` 22 | 23 | This requires compatible versions of `GDAL` and `numpy` from `conda-forge` for 24 | raw I/O support and `geopandas` and their dependencies for GeoDataFrame 25 | I/O support. By default, the `GDAL` package on conda-forge already supports a 26 | wide range of vector formats. If needed, you can install additional drivers by 27 | installing the associated 28 | [conda-forge package](https://gdal.org/en/latest/download.html#conda). The 29 | following packages are currently available to install extra vector drivers: 30 | 31 | - `libgdal-arrow-parquet` ((Geo)Parquet and (Geo)Arrow IPC) 32 | - `libgdal-pg` (PostgreSQL / PostGIS) 33 | - `libgdal-xls` (XLS - MS Excel format) 34 | 35 | ### PyPI 36 | 37 | This package is available on [PyPI](https://pypi.org/project/pyogrio/) for Linux, 38 | MacOS, and Windows. 39 | 40 | ```bash 41 | pip install pyogrio 42 | ``` 43 | 44 | This installs binary wheels that include GDAL. 45 | 46 | If you get installation errors about Cython or GDAL not being available, this is 47 | most likely due to the installation process falling back to installing from the 48 | source distribution because the available wheels are not compatible with your 49 | platform. 50 | 51 | The binary wheels available on PyPI include the core GDAL drivers (GeoJSON, 52 | ESRI Shapefile, GPKG, FGB, OpenFileGDB, etc) but do not include more advanced 53 | drivers such as LIBKML and Spatialite. If you need such drivers, we recommend 54 | that you use conda-forge to install pyogrio as explained above. 55 | 56 | ### Troubleshooting installation errors 57 | 58 | If you install GeoPandas or Fiona using `pip`, you may encounter issues related 59 | to incompatibility of the exact GDAL library pre-installed with Fiona and the 60 | version of GDAL that gets compiled with Pyogrio. 61 | 62 | This may show up as an exception like this for a supported driver (e.g., 63 | `ESRI Shapefile`): 64 | 65 | ```Python 66 | pyogrio.errors.DataSourceError: Could not obtain driver ... 67 | ``` 68 | 69 | To get around it, uninstall `fiona` then reinstall to use system GDAL: 70 | 71 | ```bash 72 | pip uninstall fiona 73 | pip install fiona --no-binary fiona 74 | ``` 75 | 76 | Then restart your interpreter. This ensures that both Pyogrio and Fiona use 77 | exactly the same GDAL library. 78 | 79 | ## Development 80 | 81 | Clone this repository to a local folder. 82 | 83 | Install an appropriate distribution of GDAL for your system. Either `gdal-config` must 84 | be on your system path (to automatically determine the GDAL paths), or either the 85 | `GDAL_INCLUDE_PATH`, `GDAL_LIBRARY_PATH`, and `GDAL_VERSION` environment variables need 86 | to be set. Specific instructions on how to install these dependencies on Windows can be 87 | found below. 88 | 89 | Building Pyogrio requires requires `Cython`, `numpy`, and `pandas`. 90 | 91 | Pyogrio follows the [GeoPandas Style Guide](https://geopandas.org/en/stable/community/contributing.html#style-guide-linting) 92 | and uses `Ruff` to ensure consistent formatting. 93 | 94 | It is recommended to install `pre-commit` and register its hooks so the formatting is 95 | automatically verified when you commit code. 96 | 97 | ``` 98 | pre-commit install 99 | ``` 100 | 101 | Run `python setup.py develop` to build the extensions in Cython. 102 | 103 | Tests are run using `pytest`: 104 | 105 | ```bash 106 | pytest pyogrio/tests 107 | ``` 108 | 109 | ### Windows 110 | 111 | There are different ways to install the necessary dependencies and setup your local 112 | development environment on windows. 113 | 114 | #### vcpkg 115 | 116 | [vcpkg](https://vcpkg.io/en/index.html) is used to build pyogrio from source 117 | as part of creating the Pyogrio Python wheels for Windows. You can install 118 | GDAL and other dependencies using vcpkg, and then build Pyogrio from source. 119 | 120 | See `.github/workflows/release.yml` for details about how vcpkg is used as part 121 | of the wheel-building process. 122 | 123 | We do not yet have instructions on building Pyogrio from source using vcpkg for 124 | local development; please feel free to contribute additional documentation! 125 | 126 | #### OSGeo4W 127 | 128 | You can also install GDAL from an appropriate provider of Windows binaries. We've heard 129 | that the [OSGeo4W](https://trac.osgeo.org/osgeo4w/) works. 130 | 131 | To build on Windows, you need to provide additional environment variables or 132 | command-line parameters because the location of the GDAL binaries and headers 133 | cannot be automatically determined. 134 | 135 | Assuming GDAL 3.8.3 is installed to `c:\GDAL`, you can set the `GDAL_INCLUDE_PATH`, 136 | `GDAL_LIBRARY_PATH` and `GDAL_VERSION` environment variables and build as follows: 137 | 138 | ```bash 139 | set GDAL_INCLUDE_PATH=C:\GDAL\include 140 | set GDAL_LIBRARY_PATH=C:\GDAL\lib 141 | set GDAL_VERSION=3.8.3 142 | python -m pip install --no-deps --force-reinstall --no-use-pep517 -e . -v 143 | ``` 144 | 145 | Alternatively, you can pass those options also as command-line parameters: 146 | 147 | ```bash 148 | python -m pip install --install-option=build_ext --install-option="-IC:\GDAL\include" --install-option="-lgdal_i" --install-option="-LC:\GDAL\lib" --install-option="--gdalversion=3.8.3" --no-deps --force-reinstall --no-use-pep517 -e . -v 149 | ``` 150 | 151 | The location of the GDAL DLLs must be on your system `PATH`. 152 | 153 | `--no-use-pep517` is required in order to pass additional options to the build 154 | backend (see https://github.com/pypa/pip/issues/5771). 155 | 156 | #### Conda 157 | 158 | It is also possible to install the necessary dependencies using conda. 159 | 160 | After cloning the environment, you can create a conda environment with the necessary 161 | dependencies like this: 162 | 163 | ``` 164 | conda env create -f environment-dev.yml 165 | ``` 166 | 167 | Before being able to build on Windows, you need to set some additional environment 168 | variables because the location of the GDAL binaries and headers cannot be 169 | automatically determined. 170 | 171 | After activating the `pyogrio-dev` environment the `CONDA_PREFIX` environment variable 172 | will be available. Assuming GDAL 3.8.3 is installed, you will be able to set the 173 | necessary environment variables as follows: 174 | 175 | ```bash 176 | set GDAL_INCLUDE_PATH=%CONDA_PREFIX%\Library\include 177 | set GDAL_LIBRARY_PATH=%CONDA_PREFIX%\Library\lib 178 | set GDAL_VERSION=3.8.3 179 | ``` 180 | 181 | Now you should be able to run `python setup.py develop` to build the extensions in 182 | Cython. 183 | 184 | ## GDAL and PROJ data files 185 | 186 | GDAL requires certain files to be present within a GDAL data folder, as well 187 | as a PROJ data folder. These folders are normally detected automatically. 188 | 189 | If you have an unusual installation of GDAL and PROJ, you may need to set 190 | additional environment variables at **runtime** in order for these to be 191 | correctly detected by GDAL: 192 | 193 | - set `GDAL_DATA` to the folder containing the GDAL data files (e.g., contains `header.dxf`) 194 | within the installation of GDAL that is used by Pyogrio. 195 | - set `PROJ_LIB` to the folder containing the PROJ data files (e.g., contains `proj.db`) 196 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | from pathlib import Path 4 | import platform 5 | import shutil 6 | import subprocess 7 | import sys 8 | 9 | from setuptools import Extension, setup, find_packages 10 | import versioneer 11 | 12 | # import Cython if available 13 | try: 14 | from Cython.Build import cythonize 15 | from Cython.Distutils import build_ext 16 | except ImportError: 17 | cythonize = None 18 | 19 | 20 | logger = logging.getLogger(__name__) 21 | 22 | 23 | MIN_PYTHON_VERSION = (3, 10, 0) 24 | MIN_GDAL_VERSION = (2, 4, 0) 25 | 26 | 27 | if sys.version_info < MIN_PYTHON_VERSION: 28 | raise RuntimeError("Python >= 3.10 is required") 29 | 30 | 31 | def copy_data_tree(datadir, destdir): 32 | if os.path.exists(destdir): 33 | shutil.rmtree(destdir) 34 | shutil.copytree(datadir, destdir) 35 | 36 | 37 | # Get GDAL config from gdal-config command 38 | def read_response(cmd): 39 | return subprocess.check_output(cmd).decode("utf").strip() 40 | 41 | 42 | def get_gdal_config(): 43 | """ 44 | Obtain the paths and version for compiling and linking with the GDAL C-API. 45 | 46 | GDAL_INCLUDE_PATH, GDAL_LIBRARY_PATH, and GDAL_VERSION environment variables 47 | are used if all are present. 48 | 49 | If those variables are not present, gdal-config is called (it should be 50 | on the PATH variable). gdal-config provides all the paths and version. 51 | 52 | If no environment variables were specified or gdal-config was not found, 53 | no additional paths are provided to the extension. It is still possible 54 | to compile in this case using custom arguments to setup.py. 55 | """ 56 | include_dir = os.environ.get("GDAL_INCLUDE_PATH") 57 | library_dir = os.environ.get("GDAL_LIBRARY_PATH") 58 | gdal_version_str = os.environ.get("GDAL_VERSION") 59 | 60 | if include_dir and library_dir and gdal_version_str: 61 | gdal_libs = ["gdal"] 62 | 63 | if platform.system() == "Windows": 64 | # NOTE: if libgdal is built for Windows using CMake, it is now "gdal", 65 | # but older Windows builds still use "gdal_i" 66 | if (Path(library_dir) / "gdal_i.lib").exists(): 67 | gdal_libs = ["gdal_i"] 68 | 69 | return { 70 | "include_dirs": [include_dir], 71 | "library_dirs": [library_dir], 72 | "libraries": gdal_libs, 73 | }, gdal_version_str 74 | 75 | if include_dir or library_dir or gdal_version_str: 76 | logger.warning( 77 | "If specifying the GDAL_INCLUDE_PATH, GDAL_LIBRARY_PATH, or GDAL_VERSION " 78 | "environment variables, you need to specify all of them." 79 | ) 80 | 81 | try: 82 | # Get libraries, etc from gdal-config (not available on Windows) 83 | flags = ["cflags", "libs", "version"] 84 | gdal_config = os.environ.get("GDAL_CONFIG", "gdal-config") 85 | config = {flag: read_response([gdal_config, f"--{flag}"]) for flag in flags} 86 | 87 | gdal_version_str = config["version"] 88 | include_dirs = [entry[2:] for entry in config["cflags"].split(" ")] 89 | library_dirs = [] 90 | libraries = [] 91 | extra_link_args = [] 92 | 93 | for entry in config["libs"].split(" "): 94 | if entry.startswith("-L"): 95 | library_dirs.append(entry[2:]) 96 | elif entry.startswith("-l"): 97 | libraries.append(entry[2:]) 98 | else: 99 | extra_link_args.append(entry) 100 | 101 | return { 102 | "include_dirs": include_dirs, 103 | "library_dirs": library_dirs, 104 | "libraries": libraries, 105 | "extra_link_args": extra_link_args, 106 | }, gdal_version_str 107 | 108 | except Exception as e: 109 | if platform.system() == "Windows": 110 | # Get GDAL API version from the command line if specified there. 111 | if "--gdalversion" in sys.argv: 112 | index = sys.argv.index("--gdalversion") 113 | sys.argv.pop(index) 114 | gdal_version_str = sys.argv.pop(index) 115 | else: 116 | print( 117 | "GDAL_VERSION must be provided as an environment variable " 118 | "or as --gdalversion command line argument" 119 | ) 120 | sys.exit(1) 121 | 122 | logger.info( 123 | "Building on Windows requires extra options to setup.py to locate " 124 | "GDAL files. See the installation documentation." 125 | ) 126 | return {}, gdal_version_str 127 | 128 | else: 129 | raise e 130 | 131 | 132 | ext_modules = [] 133 | package_data = {} 134 | 135 | # setuptools clean does not cleanup Cython artifacts 136 | if "clean" in sys.argv: 137 | for directory in ["build", "pyogrio/gdal_data", "pyogrio/proj_data"]: 138 | if os.path.exists(directory): 139 | shutil.rmtree(directory) 140 | 141 | root = Path(".") 142 | for ext in ["*.so", "*.pyc", "*.c", "*.cpp"]: 143 | for entry in root.rglob(ext): 144 | entry.unlink() 145 | 146 | elif "sdist" in sys.argv or "egg_info" in sys.argv: 147 | # don't cythonize for the sdist 148 | pass 149 | 150 | else: 151 | if cythonize is None: 152 | raise ImportError("Cython is required to build from source") 153 | 154 | ext_options, gdal_version_str = get_gdal_config() 155 | 156 | gdal_version = tuple(int(i) for i in gdal_version_str.strip("dev").split(".")) 157 | if not gdal_version >= MIN_GDAL_VERSION: 158 | sys.exit(f"GDAL must be >= {'.'.join(map(str, MIN_GDAL_VERSION))}") 159 | 160 | compile_time_env = { 161 | "CTE_GDAL_VERSION": gdal_version, 162 | } 163 | 164 | ext_modules = cythonize( 165 | [ 166 | Extension("pyogrio._err", ["pyogrio/_err.pyx"], **ext_options), 167 | Extension("pyogrio._geometry", ["pyogrio/_geometry.pyx"], **ext_options), 168 | Extension("pyogrio._io", ["pyogrio/_io.pyx"], **ext_options), 169 | Extension("pyogrio._ogr", ["pyogrio/_ogr.pyx"], **ext_options), 170 | Extension("pyogrio._vsi", ["pyogrio/_vsi.pyx"], **ext_options), 171 | ], 172 | compiler_directives={"language_level": "3", "freethreading_compatible": True}, 173 | compile_time_env=compile_time_env, 174 | ) 175 | 176 | if os.environ.get("PYOGRIO_PACKAGE_DATA"): 177 | gdal_data = os.environ.get("GDAL_DATA") 178 | if gdal_data and os.path.exists(gdal_data): 179 | logger.info(f"Copying gdal data from {gdal_data}") 180 | copy_data_tree(gdal_data, "pyogrio/gdal_data") 181 | else: 182 | raise Exception( 183 | "Could not find GDAL data files for packaging. " 184 | "Make sure to set the GDAL_DATA environment variable" 185 | ) 186 | 187 | proj_data = os.environ.get("PROJ_LIB") 188 | if proj_data and os.path.exists(proj_data): 189 | logger.info(f"Copying proj data from {proj_data}") 190 | copy_data_tree(proj_data, "pyogrio/proj_data") 191 | else: 192 | raise Exception( 193 | "Could not find PROJ data files for packaging. " 194 | "Make sure to set the PROJ_LIB environment variable" 195 | ) 196 | 197 | package_data = {"pyogrio": ["gdal_data/*", "proj_data/*"]} 198 | 199 | 200 | version = versioneer.get_version() 201 | cmdclass = versioneer.get_cmdclass() 202 | cmdclass["build_ext"] = build_ext 203 | 204 | setup( 205 | version=version, 206 | packages=find_packages(), 207 | include_package_data=True, 208 | exclude_package_data={"": ["*.h", "_*.pxd", "_*.pyx"]}, 209 | cmdclass=cmdclass, 210 | ext_modules=ext_modules, 211 | package_data=package_data, 212 | ) 213 | -------------------------------------------------------------------------------- /pyogrio/util.py: -------------------------------------------------------------------------------- 1 | """Utility functions.""" 2 | 3 | import re 4 | import sys 5 | from packaging.version import Version 6 | from pathlib import Path 7 | from urllib.parse import urlparse 8 | 9 | from pyogrio._ogr import MULTI_EXTENSIONS 10 | from pyogrio._vsi import vsimem_rmtree_toplevel as _vsimem_rmtree_toplevel 11 | 12 | 13 | def get_vsi_path_or_buffer(path_or_buffer): 14 | """Get VSI-prefixed path or bytes buffer depending on type of path_or_buffer. 15 | 16 | If path_or_buffer is a bytes object, it will be returned directly and will 17 | be read into an in-memory dataset when passed to one of the Cython functions. 18 | 19 | If path_or_buffer is a file-like object with a read method, bytes will be 20 | read from the file-like object and returned. 21 | 22 | Otherwise, it will be converted to a string, and parsed to prefix with 23 | appropriate GDAL /vsi*/ prefixes. 24 | 25 | Parameters 26 | ---------- 27 | path_or_buffer : str, pathlib.Path, bytes, or file-like 28 | A dataset path or URI, raw buffer, or file-like object with a read method. 29 | 30 | Returns 31 | ------- 32 | str or bytes 33 | 34 | """ 35 | # treat Path objects here already to ignore their read method + to avoid backslashes 36 | # on Windows. 37 | if isinstance(path_or_buffer, Path): 38 | return vsi_path(path_or_buffer) 39 | 40 | if isinstance(path_or_buffer, bytes): 41 | return path_or_buffer 42 | 43 | if hasattr(path_or_buffer, "read"): 44 | bytes_buffer = path_or_buffer.read() 45 | 46 | # rewind buffer if possible so that subsequent operations do not need to rewind 47 | if hasattr(path_or_buffer, "seekable") and path_or_buffer.seekable(): 48 | path_or_buffer.seek(0) 49 | 50 | return bytes_buffer 51 | 52 | return vsi_path(str(path_or_buffer)) 53 | 54 | 55 | def vsi_path(path: str | Path) -> str: 56 | """Ensure path is a local path or a GDAL-compatible VSI path.""" 57 | # Convert Path objects to string, but for VSI paths, keep posix style path. 58 | if isinstance(path, Path): 59 | if sys.platform == "win32" and path.as_posix().startswith("/vsi"): 60 | path = path.as_posix() 61 | else: 62 | path = str(path) 63 | 64 | # path is already in GDAL format 65 | if path.startswith("/vsi"): 66 | return path 67 | 68 | # Windows drive letters (e.g. "C:\") confuse `urlparse` as they look like 69 | # URL schemes 70 | if sys.platform == "win32" and re.match("^[a-zA-Z]\\:", path): 71 | # If it is not a zip file or it is multi-extension zip file that is directly 72 | # supported by a GDAL driver, return the path as is. 73 | if not path.split("!")[0].endswith(".zip"): 74 | return path 75 | if path.split("!")[0].endswith(MULTI_EXTENSIONS): 76 | return path 77 | 78 | # prefix then allow to proceed with remaining parsing 79 | path = f"zip://{path}" 80 | 81 | path, archive, scheme = _parse_uri(path) 82 | 83 | if ( 84 | scheme 85 | or archive 86 | or (path.endswith(".zip") and not path.endswith(MULTI_EXTENSIONS)) 87 | ): 88 | return _construct_vsi_path(path, archive, scheme) 89 | 90 | return path 91 | 92 | 93 | # Supported URI schemes and their mapping to GDAL's VSI suffix. 94 | SCHEMES = { 95 | "file": "file", 96 | "zip": "zip", 97 | "tar": "tar", 98 | "gzip": "gzip", 99 | "http": "curl", 100 | "https": "curl", 101 | "ftp": "curl", 102 | "s3": "s3", 103 | "gs": "gs", 104 | "az": "az", 105 | "adls": "adls", 106 | "adl": "adls", # fsspec uses this 107 | "hdfs": "hdfs", 108 | "webhdfs": "webhdfs", 109 | # GDAL additionally supports oss and swift for remote filesystems, but 110 | # those are for now not added as supported URI 111 | } 112 | 113 | CURLSCHEMES = {k for k, v in SCHEMES.items() if v == "curl"} 114 | 115 | 116 | def _parse_uri(path: str): 117 | """Parse a URI. 118 | 119 | Returns a tuples of (path, archive, scheme) 120 | 121 | path : str 122 | Parsed path. Includes the hostname and query string in the case 123 | of a URI. 124 | archive : str 125 | Parsed archive path. 126 | scheme : str 127 | URI scheme such as "https" or "zip+s3". 128 | """ 129 | parts = urlparse(path, allow_fragments=False) 130 | 131 | # if the scheme is not one of GDAL's supported schemes, return raw path 132 | if parts.scheme and not all(p in SCHEMES for p in parts.scheme.split("+")): 133 | return path, "", "" 134 | 135 | # we have a URI 136 | path = parts.path 137 | scheme = parts.scheme or "" 138 | 139 | if parts.query: 140 | path += "?" + parts.query 141 | 142 | if parts.scheme and parts.netloc: 143 | path = parts.netloc + path 144 | 145 | parts = path.split("!") 146 | path = parts.pop() if parts else "" 147 | archive = parts.pop() if parts else "" 148 | return (path, archive, scheme) 149 | 150 | 151 | def _construct_vsi_path(path, archive, scheme) -> str: 152 | """Convert a parsed path to a GDAL VSI path.""" 153 | prefix = "" 154 | suffix = "" 155 | schemes = scheme.split("+") 156 | 157 | if "zip" not in schemes and ( 158 | archive.endswith(".zip") 159 | or (path.endswith(".zip") and not path.endswith(MULTI_EXTENSIONS)) 160 | ): 161 | schemes.insert(0, "zip") 162 | 163 | if schemes: 164 | prefix = "/".join(f"vsi{SCHEMES[p]}" for p in schemes if p and p != "file") 165 | 166 | if schemes[-1] in CURLSCHEMES: 167 | suffix = f"{schemes[-1]}://" 168 | 169 | if prefix: 170 | if archive: 171 | return "/{}/{}{}/{}".format(prefix, suffix, archive, path.lstrip("/")) 172 | else: 173 | return f"/{prefix}/{suffix}{path}" 174 | 175 | return path 176 | 177 | 178 | def _preprocess_options_key_value(options): 179 | """Preprocess options. 180 | 181 | For example, `spatial_index=True` gets converted to `SPATIAL_INDEX="YES"`. 182 | """ 183 | if not isinstance(options, dict): 184 | raise TypeError(f"Expected options to be a dict, got {type(options)}") 185 | 186 | result = {} 187 | for k, v in options.items(): 188 | if v is None: 189 | continue 190 | k = k.upper() 191 | if isinstance(v, bool): 192 | v = "ON" if v else "OFF" 193 | else: 194 | v = str(v) 195 | result[k] = v 196 | return result 197 | 198 | 199 | def _mask_to_wkb(mask): 200 | """Convert a Shapely mask geometry to WKB. 201 | 202 | Parameters 203 | ---------- 204 | mask : Shapely geometry 205 | The geometry to convert to WKB. 206 | 207 | Returns 208 | ------- 209 | WKB bytes or None 210 | 211 | Raises 212 | ------ 213 | ValueError 214 | raised if Shapely >= 2.0 is not available or mask is not a Shapely 215 | Geometry object 216 | 217 | """ 218 | if mask is None: 219 | return mask 220 | 221 | try: 222 | import shapely 223 | 224 | if Version(shapely.__version__) < Version("2.0.0"): 225 | shapely = None 226 | except ImportError: 227 | shapely = None 228 | 229 | if not shapely: 230 | raise ValueError("'mask' parameter requires Shapely >= 2.0") 231 | 232 | if not isinstance(mask, shapely.Geometry): 233 | raise ValueError("'mask' parameter must be a Shapely geometry") 234 | 235 | return shapely.to_wkb(mask) 236 | 237 | 238 | def vsimem_rmtree_toplevel(path: str | Path): 239 | """Remove the parent directory of the file path recursively. 240 | 241 | This is used for final cleanup of an in-memory dataset, which may have been 242 | created within a directory to contain sibling files. 243 | 244 | Additional VSI handlers may be chained to the left of /vsimem/ in path and 245 | will be ignored. 246 | 247 | Remark: function is defined here to be able to run tests on it. 248 | 249 | Parameters 250 | ---------- 251 | path : str or pathlib.Path 252 | path to in-memory file 253 | 254 | """ 255 | if isinstance(path, Path): 256 | path = path.as_posix() 257 | 258 | _vsimem_rmtree_toplevel(path) 259 | -------------------------------------------------------------------------------- /pyogrio/_vsi.pyx: -------------------------------------------------------------------------------- 1 | import fnmatch 2 | from io import BytesIO 3 | from uuid import uuid4 4 | 5 | from pyogrio._ogr cimport * 6 | from pyogrio._ogr import _get_driver_metadata_item 7 | 8 | 9 | cdef tuple get_ogr_vsimem_write_path(object path_or_fp, str driver): 10 | """Return the path to write to and whether it is a tmp vsimem filepath. 11 | 12 | If passed a io.BytesIO object to write to, a temporary vsimem file will be 13 | used to write the data directly to memory. 14 | Hence, a tuple will be returned with a /vsimem/ path and True to indicate 15 | the path will be to a tmp vsimem file. 16 | The path will have an extension inferred from the driver if possible. Path 17 | will be contained in an in-memory directory to contain sibling files 18 | (though drivers that create sibling files are not supported for in-memory 19 | files). 20 | 21 | Caller is responsible for deleting the directory via 22 | vsimem_rmtree_toplevel(). 23 | 24 | Parameters 25 | ---------- 26 | path_or_fp : str or io.BytesIO object 27 | driver : str 28 | 29 | Returns 30 | ------- 31 | tuple of (path, use_tmp_vsimem) 32 | Tuple of the path to write to and a bool indicating if the path is a 33 | temporary vsimem filepath. 34 | 35 | """ 36 | # The write path is not a BytesIO object, so return path as-is 37 | if not isinstance(path_or_fp, BytesIO): 38 | return (path_or_fp, False) 39 | 40 | # Check for existing bytes 41 | if path_or_fp.getbuffer().nbytes > 0: 42 | raise NotImplementedError( 43 | "writing to existing in-memory object is not supported" 44 | ) 45 | 46 | # Create in-memory directory to contain auxiliary files. 47 | # Prefix with "pyogrio_" so it is clear the directory was created by pyogrio. 48 | memfilename = f"pyogrio_{uuid4().hex}" 49 | VSIMkdir(f"/vsimem/{memfilename}".encode("UTF-8"), 0666) 50 | 51 | # file extension is required for some drivers, set it based on driver metadata 52 | ext = "" 53 | recommended_ext = _get_driver_metadata_item(driver, "DMD_EXTENSIONS") 54 | if recommended_ext is not None: 55 | ext = "." + recommended_ext.split(" ")[0] 56 | 57 | path = f"/vsimem/{memfilename}/{memfilename}{ext}" 58 | 59 | return (path, True) 60 | 61 | 62 | cdef str read_buffer_to_vsimem(bytes bytes_buffer): 63 | """ Wrap the bytes (zero-copy) into an in-memory dataset 64 | 65 | If the first 4 bytes indicate the bytes are a zip file, the returned path 66 | will be prefixed with /vsizip/ and suffixed with .zip to enable proper 67 | reading by GDAL. 68 | 69 | Caller is responsible for deleting the in-memory file via 70 | vsimem_rmtree_toplevel(). 71 | 72 | Parameters 73 | ---------- 74 | bytes_buffer : bytes 75 | """ 76 | cdef int num_bytes = len(bytes_buffer) 77 | 78 | is_zipped = len(bytes_buffer) > 4 and bytes_buffer[:4].startswith(b"PK\x03\x04") 79 | ext = ".zip" if is_zipped else "" 80 | 81 | # Prefix with "pyogrio_" so it is clear the file was created by pyogrio. 82 | path = f"/vsimem/pyogrio_{uuid4().hex}{ext}" 83 | 84 | # Create an in-memory object that references bytes_buffer 85 | # NOTE: GDAL does not copy the contents of bytes_buffer; it must remain 86 | # in scope through the duration of using this file 87 | vsi_handle = VSIFileFromMemBuffer( 88 | path.encode("UTF-8"), bytes_buffer, num_bytes, 0 89 | ) 90 | 91 | if vsi_handle == NULL: 92 | raise OSError("failed to read buffer into in-memory file") 93 | 94 | if VSIFCloseL(vsi_handle) != 0: 95 | raise OSError("failed to close in-memory file") 96 | 97 | if is_zipped: 98 | path = f"/vsizip/{path}" 99 | 100 | return path 101 | 102 | 103 | cdef read_vsimem_to_buffer(str path, object out_buffer): 104 | """Copy bytes from in-memory file to buffer 105 | 106 | This will automatically unlink the in-memory file pointed to by path; caller 107 | is still responsible for calling vsimem_rmtree_toplevel() to cleanup any 108 | other files contained in the in-memory directory. 109 | 110 | Parameters: 111 | ----------- 112 | path : str 113 | path to in-memory file 114 | buffer : BytesIO object 115 | """ 116 | 117 | cdef unsigned char *vsi_buffer = NULL 118 | cdef vsi_l_offset vsi_buffer_size = 0 119 | 120 | try: 121 | # Take ownership of the buffer to avoid a copy; GDAL will automatically 122 | # unlink the memory file 123 | vsi_buffer = VSIGetMemFileBuffer(path.encode("UTF-8"), &vsi_buffer_size, 1) 124 | if vsi_buffer == NULL: 125 | raise RuntimeError("could not read bytes from in-memory file") 126 | 127 | # write bytes to buffer 128 | out_buffer.write(vsi_buffer[:vsi_buffer_size]) 129 | # rewind to beginning to allow caller to read 130 | out_buffer.seek(0) 131 | 132 | finally: 133 | if vsi_buffer != NULL: 134 | CPLFree(vsi_buffer) 135 | 136 | 137 | cpdef vsimem_rmtree_toplevel(str path): 138 | """Remove the top-level file or top-level directory containing the file. 139 | 140 | This is used for final cleanup of an in-memory dataset. The path can point 141 | to either: 142 | - a top-level file (directly in /vsimem/). 143 | - a file in a directory, which may include sibling files. 144 | - a zip file (reported as a directory by VSI_ISDIR). 145 | 146 | Except for the first case, the top-level directory (direct subdirectory of 147 | /vsimem/) will be determined and will be removed recursively. 148 | 149 | Additional VSI handlers may be chained to the left of /vsimem/ in path and 150 | will be ignored. 151 | 152 | Even though it is only meant for "internal use", the function is declared 153 | as cpdef, so it can be called from tests as well. 154 | 155 | Parameters: 156 | ----------- 157 | path : str 158 | path to in-memory file 159 | 160 | """ 161 | cdef VSIStatBufL st_buf 162 | 163 | if "/vsimem/" not in path: 164 | raise ValueError(f"Path is not a /vsimem/ path: '{path}'") 165 | 166 | # Determine the top-level directory of the file 167 | mempath_parts = path.split("/vsimem/")[1].split("/") 168 | if len(mempath_parts) == 0: 169 | raise OSError("path to in-memory file or directory is required") 170 | 171 | toplevel_path = f"/vsimem/{mempath_parts[0]}" 172 | 173 | if not VSIStatL(toplevel_path.encode("UTF-8"), &st_buf) == 0: 174 | raise FileNotFoundError(f"Path does not exist: '{path}'") 175 | 176 | if VSI_ISDIR(st_buf.st_mode): 177 | errcode = VSIRmdirRecursive(toplevel_path.encode("UTF-8")) 178 | else: 179 | errcode = VSIUnlink(toplevel_path.encode("UTF-8")) 180 | 181 | if errcode != 0: 182 | raise OSError(f"Error removing '{path}': {errcode=}") 183 | 184 | 185 | def ogr_vsi_listtree(str path, str pattern): 186 | """Recursively list the contents in a VSI directory. 187 | 188 | An fnmatch pattern can be specified to filter the directories/files 189 | returned. 190 | 191 | Parameters: 192 | ----------- 193 | path : str 194 | Path to the VSI directory to be listed. 195 | pattern : str 196 | Pattern to filter results, in fnmatch format. 197 | 198 | """ 199 | cdef const char *path_c 200 | cdef int n 201 | cdef char** papszFiles 202 | cdef VSIStatBufL st_buf 203 | 204 | path_b = path.encode("UTF-8") 205 | path_c = path_b 206 | 207 | if not VSIStatL(path_c, &st_buf) == 0: 208 | raise FileNotFoundError(f"Path does not exist: '{path}'") 209 | if not VSI_ISDIR(st_buf.st_mode): 210 | raise NotADirectoryError(f"Path is not a directory: '{path}'") 211 | 212 | try: 213 | papszFiles = VSIReadDirRecursive(path_c) 214 | n = CSLCount(papszFiles) 215 | files = [] 216 | for i in range(n): 217 | files.append(papszFiles[i].decode("UTF-8")) 218 | finally: 219 | CSLDestroy(papszFiles) 220 | 221 | # Apply filter pattern 222 | if pattern is not None: 223 | files = fnmatch.filter(files, pattern) 224 | 225 | # Prepend files with the base path 226 | if not path.endswith("/"): 227 | path = f"{path}/" 228 | files = [f"{path}{file}" for file in files] 229 | 230 | return files 231 | 232 | 233 | def ogr_vsi_rmtree(str path): 234 | """Recursively remove VSI directory. 235 | 236 | Parameters: 237 | ----------- 238 | path : str 239 | path to the VSI directory to be removed. 240 | 241 | """ 242 | cdef const char *path_c 243 | cdef VSIStatBufL st_buf 244 | 245 | try: 246 | path_b = path.encode("UTF-8") 247 | except UnicodeDecodeError: 248 | path_b = path 249 | path_c = path_b 250 | if not VSIStatL(path_c, &st_buf) == 0: 251 | raise FileNotFoundError(f"Path does not exist: '{path}'") 252 | if not VSI_ISDIR(st_buf.st_mode): 253 | raise NotADirectoryError(f"Path is not a directory: '{path}'") 254 | if path.endswith("/vsimem") or path.endswith("/vsimem/"): 255 | raise OSError("path to in-memory file or directory is required") 256 | 257 | errcode = VSIRmdirRecursive(path_c) 258 | if errcode != 0: 259 | raise OSError(f"Error in rmtree of '{path}': {errcode=}") 260 | 261 | 262 | def ogr_vsi_unlink(str path): 263 | """Remove VSI file. 264 | 265 | Parameters: 266 | ----------- 267 | path : str 268 | path to the VSI file to be removed. 269 | 270 | """ 271 | cdef const char *path_c 272 | cdef VSIStatBufL st_buf 273 | 274 | try: 275 | path_b = path.encode("UTF-8") 276 | except UnicodeDecodeError: 277 | path_b = path 278 | path_c = path_b 279 | 280 | if not VSIStatL(path_c, &st_buf) == 0: 281 | raise FileNotFoundError(f"Path does not exist: '{path}'") 282 | 283 | if VSI_ISDIR(st_buf.st_mode): 284 | raise IsADirectoryError(f"Path is a directory: '{path}'") 285 | 286 | errcode = VSIUnlink(path_c) 287 | if errcode != 0: 288 | raise OSError(f"Error removing '{path}': {errcode=}") 289 | -------------------------------------------------------------------------------- /pyogrio/_ogr.pyx: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import warnings 4 | 5 | from pyogrio._err cimport check_pointer 6 | from pyogrio._err import CPLE_BaseError, NullPointerError 7 | from pyogrio.errors import DataSourceError 8 | 9 | MULTI_EXTENSIONS = (".gpkg.zip", ".shp.zip") 10 | 11 | 12 | cdef get_string(const char *c_str, str encoding="UTF-8"): 13 | """Get Python string from a char *. 14 | 15 | IMPORTANT: the char * must still be freed by the caller. 16 | 17 | Parameters 18 | ---------- 19 | c_str : char * 20 | encoding : str, optional (default: UTF-8) 21 | 22 | Returns 23 | ------- 24 | Python string 25 | """ 26 | cdef bytes py_str 27 | 28 | py_str = c_str 29 | return py_str.decode(encoding) 30 | 31 | 32 | def get_gdal_version(): 33 | """Convert GDAL version number into tuple of (major, minor, revision)""" 34 | version = int(GDALVersionInfo("VERSION_NUM")) 35 | major = version // 1000000 36 | minor = (version - (major * 1000000)) // 10000 37 | revision = (version - (major * 1000000) - (minor * 10000)) // 100 38 | return (major, minor, revision) 39 | 40 | 41 | def get_gdal_version_string(): 42 | cdef const char* version = GDALVersionInfo("RELEASE_NAME") 43 | return get_string(version) 44 | 45 | 46 | cdef extern from "ogr_api.h": 47 | bint OGRGetGEOSVersion(int *pnMajor, int *pnMinor, int *pnPatch) 48 | 49 | 50 | def get_gdal_geos_version(): 51 | cdef int major, minor, revision 52 | 53 | if not OGRGetGEOSVersion(&major, &minor, &revision): 54 | return None 55 | return (major, minor, revision) 56 | 57 | 58 | def set_gdal_config_options(dict options): 59 | for name, value in options.items(): 60 | name_b = name.encode("utf-8") 61 | name_c = name_b 62 | 63 | # None is a special case; this is used to clear the previous value 64 | if value is None: 65 | CPLSetConfigOption(name_c, NULL) 66 | continue 67 | 68 | # normalize bool to ON/OFF 69 | if isinstance(value, bool): 70 | value_b = b"ON" if value else b"OFF" 71 | else: 72 | value_b = str(value).encode("utf-8") 73 | 74 | value_c = value_b 75 | CPLSetConfigOption(name_c, value_c) 76 | 77 | 78 | def get_gdal_config_option(str name): 79 | name_b = name.encode("utf-8") 80 | name_c = name_b 81 | value = CPLGetConfigOption(name_c, NULL) 82 | 83 | if not value: 84 | return None 85 | 86 | if value.isdigit(): 87 | return int(value) 88 | 89 | if value == b"ON": 90 | return True 91 | if value == b"OFF": 92 | return False 93 | 94 | str_value = get_string(value) 95 | 96 | return str_value 97 | 98 | 99 | def ogr_driver_supports_write(driver): 100 | # check metadata for driver to see if it supports write 101 | if _get_driver_metadata_item(driver, "DCAP_CREATE") == "YES": 102 | return True 103 | 104 | return False 105 | 106 | 107 | def ogr_driver_supports_vsi(driver): 108 | # check metadata for driver to see if it supports write 109 | if _get_driver_metadata_item(driver, "DCAP_VIRTUALIO") == "YES": 110 | return True 111 | 112 | return False 113 | 114 | 115 | def ogr_list_drivers(): 116 | cdef OGRSFDriverH driver = NULL 117 | cdef int i 118 | cdef char *name_c 119 | 120 | drivers = dict() 121 | for i in range(OGRGetDriverCount()): 122 | driver = OGRGetDriver(i) 123 | name_c = OGR_Dr_GetName(driver) 124 | 125 | name = get_string(name_c) 126 | 127 | if ogr_driver_supports_write(name): 128 | drivers[name] = "rw" 129 | 130 | else: 131 | drivers[name] = "r" 132 | 133 | return drivers 134 | 135 | 136 | cdef void set_proj_search_path(str path): 137 | """Set PROJ library data file search path for use in GDAL.""" 138 | cdef char **paths = NULL 139 | cdef const char *path_c = NULL 140 | path_b = path.encode("utf-8") 141 | path_c = path_b 142 | paths = CSLAddString(paths, path_c) 143 | OSRSetPROJSearchPaths(paths) 144 | 145 | 146 | def has_gdal_data(): 147 | """Verify that GDAL library data files are correctly found. 148 | 149 | Adapted from Fiona (_env.pyx). 150 | """ 151 | 152 | if CPLFindFile("gdal", "header.dxf") != NULL: 153 | return True 154 | 155 | return False 156 | 157 | 158 | def get_gdal_data_path(): 159 | """ 160 | Get the path to the directory GDAL uses to read data files. 161 | """ 162 | cdef const char *path_c = CPLFindFile("gdal", "header.dxf") 163 | if path_c != NULL: 164 | return get_string(path_c).replace("header.dxf", "") 165 | return None 166 | 167 | 168 | def has_proj_data(): 169 | """Verify that PROJ library data files are correctly found. 170 | 171 | Returns 172 | ------- 173 | bool 174 | True if a test spatial reference object could be created, which verifies 175 | that data files are correctly loaded. 176 | 177 | Adapted from Fiona (_env.pyx). 178 | """ 179 | cdef OGRSpatialReferenceH srs = OSRNewSpatialReference(NULL) 180 | 181 | retval = OSRImportFromEPSG(srs, 4326) 182 | if srs != NULL: 183 | OSRRelease(srs) 184 | 185 | if retval == OGRERR_NONE: 186 | # Succesfull return, so PROJ data files are correctly found 187 | return True 188 | else: 189 | return False 190 | 191 | 192 | def init_gdal_data(): 193 | """Set GDAL data search directories. 194 | 195 | They are set in the following precedence: 196 | - wheel copy of gdal_data 197 | - default detection by GDAL, including GDAL_DATA (detected automatically by GDAL) 198 | - other well-known paths under sys.prefix 199 | 200 | Adapted from Fiona (env.py, _env.pyx). 201 | """ 202 | 203 | # wheels are packaged to include GDAL data files at pyogrio/gdal_data 204 | wheel_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "gdal_data")) 205 | if os.path.exists(wheel_path): 206 | set_gdal_config_options({"GDAL_DATA": wheel_path}) 207 | if not has_gdal_data(): 208 | raise ValueError( 209 | "Could not correctly detect GDAL data files installed by pyogrio wheel" 210 | ) 211 | return 212 | 213 | # GDAL correctly found data files from GDAL_DATA or compiled-in paths 214 | if has_gdal_data(): 215 | return 216 | 217 | wk_path = os.path.join(sys.prefix, "share", "gdal") 218 | if os.path.exists(wk_path): 219 | set_gdal_config_options({"GDAL_DATA": wk_path}) 220 | if not has_gdal_data(): 221 | raise ValueError( 222 | f"Found GDAL data directory at {wk_path} but it does not appear to " 223 | "correctly contain GDAL data files" 224 | ) 225 | return 226 | 227 | warnings.warn( 228 | "Could not detect GDAL data files. Set GDAL_DATA environment variable to the " 229 | "correct path.", 230 | RuntimeWarning 231 | ) 232 | 233 | 234 | def init_proj_data(): 235 | """Set Proj search directories in the following precedence: 236 | - wheel copy of proj_data 237 | - default detection by PROJ, including PROJ_LIB (detected automatically by PROJ) 238 | - search other well-known paths under sys.prefix 239 | 240 | Adapted from Fiona (env.py, _env.pyx). 241 | """ 242 | 243 | # wheels are packaged to include PROJ data files at pyogrio/proj_data 244 | wheel_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "proj_data")) 245 | if os.path.exists(wheel_path): 246 | set_proj_search_path(wheel_path) 247 | # verify that this now resolves 248 | if not has_proj_data(): 249 | raise ValueError( 250 | "Could not correctly detect PROJ data files installed by pyogrio wheel" 251 | ) 252 | return 253 | 254 | # PROJ correctly found data files from PROJ_LIB or compiled-in paths 255 | if has_proj_data(): 256 | return 257 | 258 | wk_path = os.path.join(sys.prefix, "share", "proj") 259 | if os.path.exists(wk_path): 260 | set_proj_search_path(wk_path) 261 | # verify that this now resolves 262 | if not has_proj_data(): 263 | raise ValueError( 264 | f"Found PROJ data directory at {wk_path} but it does not appear to " 265 | "correctly contain PROJ data files" 266 | ) 267 | return 268 | 269 | warnings.warn( 270 | "Could not detect PROJ data files. Set PROJ_LIB environment variable to " 271 | "the correct path.", RuntimeWarning) 272 | 273 | 274 | def _register_drivers(): 275 | # Register all drivers 276 | GDALAllRegister() 277 | 278 | 279 | def _get_driver_metadata_item(driver, metadata_item): 280 | """ 281 | Query driver metadata items. 282 | 283 | Parameters 284 | ---------- 285 | driver : str 286 | Driver to query 287 | metadata_item : str 288 | Metadata item to query 289 | 290 | Returns 291 | ------- 292 | str or None 293 | Metadata item 294 | """ 295 | cdef const char* metadata_c = NULL 296 | cdef void *cogr_driver = NULL 297 | 298 | try: 299 | cogr_driver = check_pointer(GDALGetDriverByName(driver.encode("UTF-8"))) 300 | except NullPointerError: 301 | raise DataSourceError( 302 | f"Could not obtain driver: {driver} (check that it was installed " 303 | "correctly into GDAL)" 304 | ) 305 | except CPLE_BaseError as exc: 306 | raise DataSourceError(str(exc)) 307 | 308 | metadata_c = GDALGetMetadataItem(cogr_driver, metadata_item.encode("UTF-8"), NULL) 309 | 310 | metadata = None 311 | if metadata_c != NULL: 312 | metadata = metadata_c 313 | metadata = metadata.decode("UTF-8") 314 | if len(metadata) == 0: 315 | metadata = None 316 | 317 | return metadata 318 | 319 | 320 | def _get_drivers_for_path(path): 321 | cdef OGRSFDriverH driver = NULL 322 | cdef int i 323 | cdef char *name_c 324 | 325 | path = str(path).lower() 326 | 327 | parts = os.path.splitext(path) 328 | if len(parts) == 2 and len(parts[1]) > 1: 329 | ext = parts[1][1:] 330 | else: 331 | ext = None 332 | 333 | # allow specific drivers to have a .zip extension to match GDAL behavior 334 | if ext == "zip": 335 | for multi_ext in MULTI_EXTENSIONS: 336 | if path.endswith(multi_ext): 337 | ext = multi_ext[1:] # strip leading dot 338 | break 339 | 340 | drivers = [] 341 | for i in range(OGRGetDriverCount()): 342 | driver = OGRGetDriver(i) 343 | name_c = OGR_Dr_GetName(driver) 344 | name = get_string(name_c) 345 | 346 | if not ogr_driver_supports_write(name): 347 | continue 348 | 349 | # extensions is a space-delimited list of supported extensions 350 | # for driver 351 | extensions = _get_driver_metadata_item(name, "DMD_EXTENSIONS") 352 | if ( 353 | ext is not None 354 | and extensions is not None 355 | and ext in extensions.lower().split(" ") 356 | ): 357 | drivers.append(name) 358 | else: 359 | prefix = _get_driver_metadata_item(name, "DMD_CONNECTION_PREFIX") 360 | if prefix is not None and path.startswith(prefix.lower()): 361 | drivers.append(name) 362 | 363 | return drivers 364 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: Release 2 | 3 | on: 4 | push: 5 | branches: 6 | - main # just build the sdist & wheel, skip release 7 | tags: 8 | - "v*" 9 | pull_request: # also build on PRs touching files that affect building sdist / wheels 10 | paths: 11 | - ".github/workflows/release.yml" 12 | - "ci/**" 13 | - "MANIFEST.in" 14 | - "pyproject.toml" 15 | - "setup.py" 16 | workflow_dispatch: 17 | 18 | # cancel running jobs on new commit to PR 19 | concurrency: 20 | group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} 21 | cancel-in-progress: true 22 | 23 | jobs: 24 | build-sdist: 25 | name: Build pyogrio sdist 26 | runs-on: ubuntu-latest 27 | 28 | steps: 29 | - name: Checkout source 30 | uses: actions/checkout@v6 31 | with: 32 | fetch-depth: 0 33 | 34 | - name: Set up Python 35 | uses: actions/setup-python@v6 36 | with: 37 | python-version: "3.x" 38 | 39 | - name: Build a source tarball 40 | run: | 41 | python -m pip install --upgrade pip 42 | python -m pip install build setuptools 43 | python -m build --sdist 44 | 45 | - uses: actions/upload-artifact@v6 46 | with: 47 | name: pyogrio-sdist 48 | path: ./dist/*.tar.gz 49 | retention-days: 5 50 | compression-level: 0 51 | 52 | test-sdist: 53 | name: Test sdist 54 | needs: [build-sdist] 55 | runs-on: ubuntu-latest 56 | container: 57 | image: "ghcr.io/osgeo/gdal:ubuntu-small-3.11.4" 58 | 59 | steps: 60 | - name: Install packages 61 | run: | 62 | apt-get update && apt-get install -y build-essential python3-dev 63 | 64 | - name: Create virtual environment 65 | # install uv and use it to create a virtual environment, then add it to 66 | # environment variables so that it is automatically activated and can be 67 | # used for tests below 68 | run: | 69 | curl -LsSf https://astral.sh/uv/install.sh | sh 70 | . $HOME/.local/bin/env 71 | uv venv .venv 72 | echo "VIRTUAL_ENV=.venv" >> $GITHUB_ENV 73 | echo "$PWD/.venv/bin" >> $GITHUB_PATH 74 | 75 | - name: Download sdist from artifacts 76 | uses: actions/download-artifact@v7 77 | with: 78 | name: pyogrio-sdist 79 | path: wheelhouse 80 | 81 | - name: Build from sdist and install test dependencies 82 | shell: bash 83 | run: | 84 | uv pip install --no-cache wheelhouse/*.tar.gz 85 | uv pip install pytest pandas pyproj shapely>=2 86 | uv pip install --no-deps geopandas 87 | uv pip list 88 | 89 | - name: Run tests 90 | shell: bash 91 | # virtual environment is automatically activated 92 | run: | 93 | cd .. 94 | uv run python -c "import pyogrio; print(f'GDAL version: {pyogrio.__gdal_version__}\nGEOS version: {pyogrio.__gdal_geos_version__}')" 95 | uv run python -m pytest --pyargs pyogrio.tests -v 96 | 97 | build-wheels-linux: 98 | name: Build wheels on Linux 99 | runs-on: ${{ matrix.os }} 100 | strategy: 101 | fail-fast: false 102 | matrix: 103 | include: 104 | # use manylinux2014 for older glibc platforms until discontinued 105 | - wheel_name: "pyogrio-wheel-linux-manylinux2014_x86_64" 106 | container: "ci/manylinux2014_x86_64-vcpkg-gdal.Dockerfile" 107 | os: ubuntu-latest 108 | arch: x86_64 109 | 110 | # use manylinux_2_28 for any platforms with glibc>=2.28 111 | - wheel_name: "pyogrio-wheel-linux-manylinux_2_28_x86_64" 112 | container: "ci/manylinux_2_28_x86_64-vcpkg-gdal.Dockerfile" 113 | os: ubuntu-latest 114 | arch: x86_64 115 | 116 | - wheel_name: "pyogrio-wheel-linux-manylinux_2_28_aarch64" 117 | container: "ci/manylinux_2_28_aarch64-vcpkg-gdal.Dockerfile" 118 | os: ubuntu-24.04-arm 119 | arch: aarch64 120 | 121 | steps: 122 | - name: Checkout 123 | uses: actions/checkout@v6 124 | with: 125 | fetch-depth: 0 126 | 127 | - name: Set up Docker Buildx 128 | id: buildx 129 | uses: docker/setup-buildx-action@v3 130 | with: 131 | install: true 132 | buildkitd-flags: --debug 133 | 134 | - name: Build Docker image with vcpkg and gdal 135 | # using build-push-action (without push) to make use of cache arguments 136 | uses: docker/build-push-action@v6 137 | with: 138 | context: . 139 | file: ${{ matrix.container }} 140 | tags: manylinux-${{ matrix.arch }}-vcpkg-gdal:latest 141 | push: false 142 | load: true 143 | cache-from: type=gha 144 | cache-to: type=gha,mode=max 145 | env: 146 | BUILDKIT_PROGRESS: plain 147 | 148 | - name: Build wheels 149 | uses: pypa/cibuildwheel@v3.2.1 150 | 151 | - uses: actions/upload-artifact@v6 152 | with: 153 | name: ${{ matrix.wheel_name }} 154 | path: ./wheelhouse/*.whl 155 | compression-level: 0 156 | 157 | build-wheels-mac-win: 158 | name: Build wheels on ${{ matrix.os }} (${{ matrix.arch }}) 159 | runs-on: ${{ matrix.os }} 160 | strategy: 161 | fail-fast: false 162 | matrix: 163 | include: 164 | - os: "macos-13" 165 | triplet: "x64-osx-dynamic-release" 166 | arch: x86_64 167 | vcpkg_cache: "/Users/runner/.cache/vcpkg/archives" 168 | vcpkg_logs: "/usr/local/share/vcpkg/buildtrees/**/*.log" 169 | 170 | - os: "macos-13" 171 | triplet: "arm64-osx-dynamic-release" 172 | arch: arm64 173 | vcpkg_cache: "/Users/runner/.cache/vcpkg/archives" 174 | vcpkg_logs: "/usr/local/share/vcpkg/buildtrees/**/*.log" 175 | 176 | - os: "windows-2022" 177 | triplet: "x64-windows-dynamic-release" 178 | arch: AMD64 179 | # windows requires windows-specific paths 180 | vcpkg_cache: "c:\\vcpkg\\installed" 181 | vcpkg_logs: "c:\\vcpkg\\buildtrees\\**\\*.log" 182 | 183 | steps: 184 | - name: Checkout 185 | uses: actions/checkout@v6 186 | with: 187 | fetch-depth: 0 188 | 189 | - name: Cache vcpkg 190 | uses: actions/cache@v5 191 | id: vcpkgcache 192 | with: 193 | path: | 194 | ${{ matrix.vcpkg_cache }} 195 | # bump the last digit to avoid using previous build cache 196 | key: ${{ matrix.os }}-${{ matrix.arch }}-vcpkg-gdal3.11.4-cache0 197 | 198 | # MacOS build requires aclocal, which is part of automake, but appears 199 | # to be missing in default image 200 | - name: Reinstall automake 201 | if: runner.os == 'macOS' 202 | run: | 203 | brew reinstall automake 204 | echo $(which aclocal) 205 | 206 | - name: Checkout specific version of vcpkg 207 | shell: bash 208 | run: | 209 | cd $VCPKG_INSTALLATION_ROOT 210 | # on mac the clone is not clean, otherwise git pull fails 211 | git reset --hard 212 | # pull specific commit with desired GDAL version 213 | git pull 214 | git checkout da096fdc67db437bee863ae73c4c12e289f82789 215 | 216 | - name: Install GDAL 217 | env: 218 | VCPKG_DEFAULT_TRIPLET: ${{ matrix.triplet }} 219 | shell: bash 220 | run: | 221 | vcpkg install --overlay-triplets=./ci/custom-triplets --feature-flags="versions,manifests" --x-manifest-root=./ci --x-install-root=$VCPKG_INSTALLATION_ROOT/installed 222 | vcpkg list 223 | 224 | - name: Upload vcpkg build logs 225 | if: ${{ failure() }} 226 | uses: actions/upload-artifact@v6 227 | with: 228 | name: pyogrio-vcpkg-logs-${{ matrix.triplet }} 229 | path: ${{ matrix.vcpkg_logs }} 230 | 231 | - name: Build wheels 232 | uses: pypa/cibuildwheel@v3.2.1 233 | env: 234 | # CIBW needs to know triplet for the correct install path 235 | VCPKG_DEFAULT_TRIPLET: ${{ matrix.triplet }} 236 | CIBW_ARCHS: ${{ matrix.arch }} 237 | 238 | - uses: actions/upload-artifact@v6 239 | with: 240 | name: pyogrio-wheel-${{ matrix.triplet }} 241 | path: ./wheelhouse/*.whl 242 | compression-level: 0 243 | 244 | test-wheels: 245 | name: Test wheels on ${{ matrix.os }} (Python ${{ matrix.python-version }}) 246 | needs: [build-wheels-linux, build-wheels-mac-win] 247 | runs-on: ${{ matrix.os }} 248 | strategy: 249 | fail-fast: false 250 | matrix: 251 | os: 252 | [ 253 | "ubuntu-latest", 254 | "ubuntu-22.04", 255 | "ubuntu-24.04-arm", 256 | "windows-latest", 257 | "macos-13", 258 | "macos-latest", 259 | ] 260 | python-version: ["3.10", "3.11", "3.12", "3.13", "3.14", "3.14t"] 261 | include: 262 | - os: "ubuntu-latest" 263 | artifact: pyogrio-wheel-linux-manylinux2014_x86_64 264 | - os: "ubuntu-latest" 265 | artifact: pyogrio-wheel-linux-manylinux_2_28_x86_64 266 | - os: "ubuntu-22.04" 267 | artifact: pyogrio-wheel-linux-manylinux_2_28_x86_64 268 | - os: "ubuntu-24.04-arm" 269 | artifact: pyogrio-wheel-linux-manylinux_2_28_aarch64 270 | - os: "windows-latest" 271 | artifact: pyogrio-wheel-x64-windows-dynamic-release 272 | - os: "macos-13" 273 | artifact: pyogrio-wheel-x64-osx-dynamic-release 274 | - os: "macos-latest" 275 | artifact: pyogrio-wheel-arm64-osx-dynamic-release 276 | 277 | steps: 278 | - name: Checkout 279 | uses: actions/checkout@v6 280 | 281 | - name: Set up Python 282 | uses: actions/setup-python@v6 283 | with: 284 | python-version: ${{ matrix.python-version }} 285 | allow-prereleases: true 286 | 287 | - name: Install uv 288 | uses: astral-sh/setup-uv@v7 289 | 290 | - name: Create virtual environment (Linux / MacOS) 291 | # use uv to create a virtual environment, then add it to environment 292 | # variables so that it is automatically activated and can be used for 293 | # tests below 294 | if: ${{ runner.os != 'Windows' }} 295 | run: | 296 | uv venv .venv 297 | echo "VIRTUAL_ENV=.venv" >> $GITHUB_ENV 298 | echo "$PWD/.venv/bin" >> $GITHUB_PATH 299 | 300 | - name: Create virtual environment (Windows) 301 | if: ${{ runner.os == 'Windows' }} 302 | run: | 303 | uv venv .venv 304 | "VIRTUAL_ENV=.venv" | Out-File -FilePath $env:GITHUB_ENV -Append 305 | "$PWD/.venv/Scripts" | Out-File -FilePath $env:GITHUB_PATH -Append 306 | 307 | - name: Download wheels from artifacts 308 | uses: actions/download-artifact@v7 309 | with: 310 | name: ${{ matrix.artifact }} 311 | path: wheelhouse 312 | 313 | - name: Install dependencies and pyogrio wheel 314 | shell: bash 315 | run: | 316 | uv pip install -r ci/requirements-wheel-test.txt 317 | uv pip install --no-cache --pre --no-index --find-links wheelhouse pyogrio 318 | # TMP avoid installing nightly pandas given pandas <-> geopandas compatibility issues 319 | # if [ ${{ matrix.python-version }} == "3.14t" ]; then 320 | # uv pip install --pre --upgrade --extra-index-url https://pypi.anaconda.org/scientific-python-nightly-wheels/simple pandas 321 | # fi 322 | if [ ${{ matrix.python-version }} != "3.14t" ]; then 323 | uv pip install --no-deps geopandas 324 | fi 325 | uv pip list 326 | 327 | - name: Run tests 328 | shell: bash 329 | # virtual environment is automatically activated 330 | run: | 331 | cd .. 332 | uv run python -c "import pyogrio; print(f'GDAL version: {pyogrio.__gdal_version__}\nGEOS version: {pyogrio.__gdal_geos_version__}')" 333 | uv run python -m pytest --pyargs pyogrio.tests -v 334 | 335 | publish: 336 | name: Publish pyogrio to GitHub / PyPI 337 | needs: [test-sdist, test-wheels] 338 | runs-on: ubuntu-latest 339 | environment: 340 | name: pypi 341 | url: https://pypi.org/p/pyogrio 342 | permissions: 343 | id-token: write # IMPORTANT: this permission is mandatory for trusted publishing to PyPI 344 | contents: write # this permission is required for the Github release action 345 | 346 | # release on every tag 347 | if: github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/') 348 | 349 | steps: 350 | - uses: actions/download-artifact@v7 351 | with: 352 | pattern: pyogrio-* 353 | path: dist 354 | merge-multiple: true 355 | 356 | - name: Publish distribution to PyPI 357 | uses: pypa/gh-action-pypi-publish@release/v1 358 | 359 | - name: Create GitHub Release 360 | id: create_release 361 | uses: softprops/action-gh-release@v2 362 | with: 363 | name: Version ${{ github.ref_name }} 364 | tag_name: ${{ github.ref }} 365 | draft: false 366 | prerelease: false 367 | files: dist/*.tar.gz 368 | token: ${{ secrets.GITHUB_TOKEN }} 369 | -------------------------------------------------------------------------------- /pyogrio/tests/test_path.py: -------------------------------------------------------------------------------- 1 | import contextlib 2 | import os 3 | from pathlib import Path 4 | from zipfile import ZIP_DEFLATED, ZipFile 5 | 6 | import pyogrio 7 | import pyogrio.raw 8 | from pyogrio._compat import HAS_PYPROJ 9 | from pyogrio.util import get_vsi_path_or_buffer, vsi_path 10 | 11 | import pytest 12 | 13 | try: 14 | import geopandas # noqa: F401 15 | 16 | has_geopandas = True 17 | except ImportError: 18 | has_geopandas = False 19 | 20 | 21 | @contextlib.contextmanager 22 | def change_cwd(path): 23 | curdir = os.getcwd() 24 | os.chdir(str(path)) 25 | try: 26 | yield 27 | finally: 28 | os.chdir(curdir) 29 | 30 | 31 | @pytest.mark.parametrize( 32 | "path, expected", 33 | [ 34 | # local file paths that should be passed through as is 35 | ("data.gpkg", "data.gpkg"), 36 | ("data.gpkg.zip", "data.gpkg.zip"), 37 | ("data.shp.zip", "data.shp.zip"), 38 | (Path("data.gpkg"), "data.gpkg"), 39 | (Path("data.gpkg.zip"), "data.gpkg.zip"), 40 | (Path("data.shp.zip"), "data.shp.zip"), 41 | ("/home/user/data.gpkg", "/home/user/data.gpkg"), 42 | ("/home/user/data.gpkg.zip", "/home/user/data.gpkg.zip"), 43 | ("/home/user/data.shp.zip", "/home/user/data.shp.zip"), 44 | (r"C:\User\Documents\data.gpkg", r"C:\User\Documents\data.gpkg"), 45 | (r"C:\User\Documents\data.gpkg.zip", r"C:\User\Documents\data.gpkg.zip"), 46 | (r"C:\User\Documents\data.shp.zip", r"C:\User\Documents\data.shp.zip"), 47 | ("file:///home/user/data.gpkg", "/home/user/data.gpkg"), 48 | ("file:///home/user/data.gpkg.zip", "/home/user/data.gpkg.zip"), 49 | ("file:///home/user/data.shp.zip", "/home/user/data.shp.zip"), 50 | ("/home/folder # with hash/data.gpkg", "/home/folder # with hash/data.gpkg"), 51 | # cloud URIs 52 | ("https://testing/data.gpkg", "/vsicurl/https://testing/data.gpkg"), 53 | ("s3://testing/data.gpkg", "/vsis3/testing/data.gpkg"), 54 | ("gs://testing/data.gpkg", "/vsigs/testing/data.gpkg"), 55 | ("az://testing/data.gpkg", "/vsiaz/testing/data.gpkg"), 56 | ("adl://testing/data.gpkg", "/vsiadls/testing/data.gpkg"), 57 | ("adls://testing/data.gpkg", "/vsiadls/testing/data.gpkg"), 58 | ("hdfs://testing/data.gpkg", "/vsihdfs/testing/data.gpkg"), 59 | ("webhdfs://testing/data.gpkg", "/vsiwebhdfs/testing/data.gpkg"), 60 | # archives 61 | ("zip://data.zip", "/vsizip/data.zip"), 62 | ("tar://data.tar", "/vsitar/data.tar"), 63 | ("gzip://data.gz", "/vsigzip/data.gz"), 64 | ("tar://./my.tar!my.geojson", "/vsitar/./my.tar/my.geojson"), 65 | ( 66 | "zip://home/data/shapefile.zip!layer.shp", 67 | "/vsizip/home/data/shapefile.zip/layer.shp", 68 | ), 69 | # combined schemes 70 | ("zip+s3://testing/shapefile.zip", "/vsizip/vsis3/testing/shapefile.zip"), 71 | ( 72 | "zip+https://s3.amazonaws.com/testing/shapefile.zip", 73 | "/vsizip/vsicurl/https://s3.amazonaws.com/testing/shapefile.zip", 74 | ), 75 | # auto-prefix zip files 76 | ("test.zip", "/vsizip/test.zip"), 77 | ("/a/b/test.zip", "/vsizip//a/b/test.zip"), 78 | ("a/b/test.zip", "/vsizip/a/b/test.zip"), 79 | # archives using ! notation should be prefixed by vsizip 80 | ("test.zip!item.shp", "/vsizip/test.zip/item.shp"), 81 | ("test.zip!/a/b/item.shp", "/vsizip/test.zip/a/b/item.shp"), 82 | ("test.zip!a/b/item.shp", "/vsizip/test.zip/a/b/item.shp"), 83 | ("/vsizip/test.zip/a/b/item.shp", "/vsizip/test.zip/a/b/item.shp"), 84 | ("zip:///test.zip/a/b/item.shp", "/vsizip//test.zip/a/b/item.shp"), 85 | # auto-prefix remote zip files 86 | ( 87 | "https://s3.amazonaws.com/testing/test.zip", 88 | "/vsizip/vsicurl/https://s3.amazonaws.com/testing/test.zip", 89 | ), 90 | ( 91 | "https://s3.amazonaws.com/testing/test.zip!/a/b/item.shp", 92 | "/vsizip/vsicurl/https://s3.amazonaws.com/testing/test.zip/a/b/item.shp", 93 | ), 94 | ("s3://testing/test.zip", "/vsizip/vsis3/testing/test.zip"), 95 | ( 96 | "s3://testing/test.zip!a/b/item.shp", 97 | "/vsizip/vsis3/testing/test.zip/a/b/item.shp", 98 | ), 99 | ("/vsimem/data.gpkg", "/vsimem/data.gpkg"), 100 | (Path("/vsimem/data.gpkg"), "/vsimem/data.gpkg"), 101 | ], 102 | ) 103 | def test_vsi_path(path, expected): 104 | assert vsi_path(path) == expected 105 | 106 | 107 | def test_vsi_path_unknown(): 108 | # unrecognized URI gets passed through as is 109 | assert vsi_path("s4://test/data.geojson") == "s4://test/data.geojson" 110 | 111 | 112 | def test_vsi_handling_read_functions(naturalearth_lowres_vsi): 113 | # test that all different read entry points have the path handling 114 | # (a zip:// path would otherwise fail) 115 | path, _ = naturalearth_lowres_vsi 116 | path = "zip://" + str(path) 117 | 118 | result = pyogrio.raw.read(path) 119 | assert len(result[2]) == 177 120 | 121 | result = pyogrio.read_info(path) 122 | assert result["features"] == 177 123 | 124 | result = pyogrio.read_bounds(path) 125 | assert len(result[0]) == 177 126 | 127 | 128 | @pytest.mark.skipif(not has_geopandas, reason="GeoPandas not available") 129 | def test_vsi_handling_read_dataframe(naturalearth_lowres_vsi): 130 | path, _ = naturalearth_lowres_vsi 131 | path = "zip://" + str(path) 132 | 133 | result = pyogrio.read_dataframe(path) 134 | assert len(result) == 177 135 | 136 | 137 | @pytest.mark.skipif(not has_geopandas, reason="GeoPandas not available") 138 | def test_path_absolute(data_dir): 139 | # pathlib path 140 | path = data_dir / "naturalearth_lowres/naturalearth_lowres.shp" 141 | df = pyogrio.read_dataframe(path) 142 | assert len(df) == 177 143 | 144 | # str path 145 | df = pyogrio.read_dataframe(str(path)) 146 | assert len(df) == 177 147 | 148 | 149 | def test_path_relative(data_dir): 150 | path = "naturalearth_lowres/naturalearth_lowres.shp" 151 | 152 | with change_cwd(data_dir): 153 | result = pyogrio.raw.read(path) 154 | assert len(result[2]) == 177 155 | 156 | result = pyogrio.read_info(path) 157 | assert result["features"] == 177 158 | 159 | result = pyogrio.read_bounds(path) 160 | assert len(result[0]) == 177 161 | 162 | 163 | @pytest.mark.skipif(not has_geopandas, reason="GeoPandas not available") 164 | def test_path_relative_dataframe(data_dir): 165 | with change_cwd(data_dir): 166 | df = pyogrio.read_dataframe("naturalearth_lowres/naturalearth_lowres.shp") 167 | assert len(df) == 177 168 | 169 | 170 | def test_uri_local_file(data_dir): 171 | path = "file://" + str(data_dir / "naturalearth_lowres/naturalearth_lowres.shp") 172 | result = pyogrio.raw.read(path) 173 | assert len(result[2]) == 177 174 | 175 | result = pyogrio.read_info(path) 176 | assert result["features"] == 177 177 | 178 | result = pyogrio.read_bounds(path) 179 | assert len(result[0]) == 177 180 | 181 | 182 | @pytest.mark.skipif(not has_geopandas, reason="GeoPandas not available") 183 | def test_uri_local_file_dataframe(data_dir): 184 | uri = "file://" + str(data_dir / "naturalearth_lowres/naturalearth_lowres.shp") 185 | df = pyogrio.read_dataframe(uri) 186 | assert len(df) == 177 187 | 188 | 189 | def test_zip_path(naturalearth_lowres_vsi): 190 | path, path_vsi = naturalearth_lowres_vsi 191 | path_zip = "zip://" + str(path) 192 | 193 | # absolute zip path 194 | result = pyogrio.raw.read(path_zip) 195 | assert len(result[2]) == 177 196 | 197 | result = pyogrio.read_info(path_zip) 198 | assert result["features"] == 177 199 | 200 | result = pyogrio.read_bounds(path_zip) 201 | assert len(result[0]) == 177 202 | 203 | # absolute vsizip path 204 | result = pyogrio.raw.read(path_vsi) 205 | assert len(result[2]) == 177 206 | 207 | result = pyogrio.read_info(path_vsi) 208 | assert result["features"] == 177 209 | 210 | result = pyogrio.read_bounds(path_vsi) 211 | assert len(result[0]) == 177 212 | 213 | # relative zip path 214 | relative_path = "zip://" + path.name 215 | with change_cwd(path.parent): 216 | result = pyogrio.raw.read(relative_path) 217 | assert len(result[2]) == 177 218 | 219 | result = pyogrio.read_info(relative_path) 220 | assert result["features"] == 177 221 | 222 | result = pyogrio.read_bounds(relative_path) 223 | assert len(result[0]) == 177 224 | 225 | 226 | @pytest.mark.skipif(not has_geopandas, reason="GeoPandas not available") 227 | def test_zip_path_dataframe(naturalearth_lowres_vsi): 228 | path, path_vsi = naturalearth_lowres_vsi 229 | path_zip = "zip://" + str(path) 230 | 231 | # absolute zip path 232 | df = pyogrio.read_dataframe(path_zip) 233 | assert len(df) == 177 234 | 235 | # absolute vsizip path 236 | df = pyogrio.read_dataframe(path_vsi) 237 | assert len(df) == 177 238 | 239 | # relative zip path 240 | with change_cwd(path.parent): 241 | df = pyogrio.read_dataframe("zip://" + path.name) 242 | assert len(df) == 177 243 | 244 | 245 | @pytest.mark.skipif(not has_geopandas, reason="GeoPandas not available") 246 | def test_detect_zip_path(tmp_path, naturalearth_lowres): 247 | # create a zipfile with 2 shapefiles in a set of subdirectories 248 | df = pyogrio.read_dataframe(naturalearth_lowres, where="iso_a3 in ('CAN', 'PER')") 249 | pyogrio.write_dataframe(df.loc[df.iso_a3 == "CAN"], tmp_path / "test1.shp") 250 | pyogrio.write_dataframe(df.loc[df.iso_a3 == "PER"], tmp_path / "test2.shp") 251 | 252 | path = tmp_path / "test.zip" 253 | with ZipFile(path, mode="w", compression=ZIP_DEFLATED, compresslevel=5) as out: 254 | for ext in ["dbf", "prj", "shp", "shx"]: 255 | if not HAS_PYPROJ and ext == "prj": 256 | continue 257 | 258 | filename = f"test1.{ext}" 259 | out.write(tmp_path / filename, filename) 260 | 261 | filename = f"test2.{ext}" 262 | out.write(tmp_path / filename, f"/a/b/{filename}") 263 | 264 | # defaults to the first shapefile found, at lowest subdirectory 265 | df = pyogrio.read_dataframe(path) 266 | assert df.iso_a3[0] == "CAN" 267 | 268 | # selecting a shapefile from within the zip requires "!"" archive specifier 269 | df = pyogrio.read_dataframe(f"{path}!test1.shp") 270 | assert df.iso_a3[0] == "CAN" 271 | 272 | df = pyogrio.read_dataframe(f"{path}!/a/b/test2.shp") 273 | assert df.iso_a3[0] == "PER" 274 | 275 | # specifying zip:// scheme should also work 276 | df = pyogrio.read_dataframe(f"zip://{path}!/a/b/test2.shp") 277 | assert df.iso_a3[0] == "PER" 278 | 279 | # specifying /vsizip/ should also work but path must already be in GDAL ready 280 | # format without the "!"" archive specifier 281 | df = pyogrio.read_dataframe(f"/vsizip/{path}/a/b/test2.shp") 282 | assert df.iso_a3[0] == "PER" 283 | 284 | 285 | @pytest.mark.network 286 | def test_url(): 287 | url = "https://raw.githubusercontent.com/geopandas/pyogrio/main/pyogrio/tests/fixtures/naturalearth_lowres/naturalearth_lowres.shp" 288 | 289 | result = pyogrio.raw.read(url) 290 | assert len(result[2]) == 177 291 | 292 | result = pyogrio.read_info(url) 293 | assert result["features"] == 177 294 | 295 | result = pyogrio.read_bounds(url) 296 | assert len(result[0]) == 177 297 | 298 | 299 | @pytest.mark.network 300 | @pytest.mark.skipif(not has_geopandas, reason="GeoPandas not available") 301 | def test_url_dataframe(): 302 | url = "https://raw.githubusercontent.com/geopandas/pyogrio/main/pyogrio/tests/fixtures/naturalearth_lowres/naturalearth_lowres.shp" 303 | 304 | assert len(pyogrio.read_dataframe(url)) == 177 305 | 306 | 307 | @pytest.mark.network 308 | def test_url_with_zip(): 309 | url = "zip+https://s3.amazonaws.com/fiona-testing/coutwildrnp.zip" 310 | 311 | result = pyogrio.raw.read(url) 312 | assert len(result[2]) == 67 313 | 314 | result = pyogrio.read_info(url) 315 | assert result["features"] == 67 316 | 317 | result = pyogrio.read_bounds(url) 318 | assert len(result[0]) == 67 319 | 320 | 321 | @pytest.mark.network 322 | @pytest.mark.skipif(not has_geopandas, reason="GeoPandas not available") 323 | def test_url_with_zip_dataframe(): 324 | url = "zip+https://s3.amazonaws.com/fiona-testing/coutwildrnp.zip" 325 | df = pyogrio.read_dataframe(url) 326 | assert len(df) == 67 327 | 328 | 329 | @pytest.fixture 330 | def aws_env_setup(monkeypatch): 331 | monkeypatch.setenv("AWS_NO_SIGN_REQUEST", "YES") 332 | 333 | 334 | @pytest.mark.network 335 | def test_uri_s3(aws_env_setup): 336 | url = "zip+s3://fiona-testing/coutwildrnp.zip" 337 | 338 | result = pyogrio.raw.read(url) 339 | assert len(result[2]) == 67 340 | 341 | result = pyogrio.read_info(url) 342 | assert result["features"] == 67 343 | 344 | result = pyogrio.read_bounds(url) 345 | assert len(result[0]) == 67 346 | 347 | 348 | @pytest.mark.network 349 | @pytest.mark.skipif(not has_geopandas, reason="GeoPandas not available") 350 | def test_uri_s3_dataframe(aws_env_setup): 351 | df = pyogrio.read_dataframe("zip+s3://fiona-testing/coutwildrnp.zip") 352 | assert len(df) == 67 353 | 354 | 355 | @pytest.mark.parametrize( 356 | "path, expected", 357 | [ 358 | (Path("/tmp/test.gpkg"), str(Path("/tmp/test.gpkg"))), 359 | (Path("/vsimem/test.gpkg"), "/vsimem/test.gpkg"), 360 | ], 361 | ) 362 | def test_get_vsi_path_or_buffer_obj_to_string(path, expected): 363 | """Verify that get_vsi_path_or_buffer retains forward slashes in /vsimem paths. 364 | 365 | The /vsimem paths should keep forward slashes for GDAL to recognize them as such. 366 | However, on Windows systems, forward slashes are by default replaced by backslashes, 367 | so this test verifies that this doesn't happen for /vsimem paths. 368 | """ 369 | assert get_vsi_path_or_buffer(path) == expected 370 | 371 | 372 | def test_get_vsi_path_or_buffer_fixtures_to_string(tmp_path): 373 | path = tmp_path / "test.gpkg" 374 | assert get_vsi_path_or_buffer(path) == str(path) 375 | -------------------------------------------------------------------------------- /pyogrio/_err.pyx: -------------------------------------------------------------------------------- 1 | """Error handling code for GDAL/OGR. 2 | 3 | Ported from fiona::_err.pyx 4 | """ 5 | 6 | import contextlib 7 | import warnings 8 | from contextvars import ContextVar 9 | from itertools import zip_longest 10 | 11 | from pyogrio._ogr cimport ( 12 | CE_Warning, CE_Failure, CE_Fatal, CPLErrorReset, 13 | CPLGetLastErrorType, CPLGetLastErrorNo, CPLGetLastErrorMsg, 14 | OGRERR_NONE, CPLErr, CPLErrorHandler, CPLDefaultErrorHandler, 15 | CPLPopErrorHandler, CPLPushErrorHandler) 16 | 17 | _ERROR_STACK = ContextVar("error_stack") 18 | _ERROR_STACK.set([]) 19 | 20 | 21 | class CPLE_BaseError(Exception): 22 | """Base CPL error class. 23 | 24 | For internal use within Cython only. 25 | """ 26 | 27 | def __init__(self, error, errno, errmsg): 28 | self.error = error 29 | self.errno = errno 30 | self.errmsg = errmsg 31 | 32 | def __str__(self): 33 | return self.__unicode__() 34 | 35 | def __unicode__(self): 36 | return u"{}".format(self.errmsg) 37 | 38 | @property 39 | def args(self): 40 | return self.error, self.errno, self.errmsg 41 | 42 | 43 | class CPLE_AppDefinedError(CPLE_BaseError): 44 | pass 45 | 46 | 47 | class CPLE_OutOfMemoryError(CPLE_BaseError): 48 | pass 49 | 50 | 51 | class CPLE_FileIOError(CPLE_BaseError): 52 | pass 53 | 54 | 55 | class CPLE_OpenFailedError(CPLE_BaseError): 56 | pass 57 | 58 | 59 | class CPLE_IllegalArgError(CPLE_BaseError): 60 | pass 61 | 62 | 63 | class CPLE_NotSupportedError(CPLE_BaseError): 64 | pass 65 | 66 | 67 | class CPLE_AssertionFailedError(CPLE_BaseError): 68 | pass 69 | 70 | 71 | class CPLE_NoWriteAccessError(CPLE_BaseError): 72 | pass 73 | 74 | 75 | class CPLE_UserInterruptError(CPLE_BaseError): 76 | pass 77 | 78 | 79 | class ObjectNullError(CPLE_BaseError): 80 | pass 81 | 82 | 83 | class CPLE_HttpResponseError(CPLE_BaseError): 84 | pass 85 | 86 | 87 | class CPLE_AWSBucketNotFoundError(CPLE_BaseError): 88 | pass 89 | 90 | 91 | class CPLE_AWSObjectNotFoundError(CPLE_BaseError): 92 | pass 93 | 94 | 95 | class CPLE_AWSAccessDeniedError(CPLE_BaseError): 96 | pass 97 | 98 | 99 | class CPLE_AWSInvalidCredentialsError(CPLE_BaseError): 100 | pass 101 | 102 | 103 | class CPLE_AWSSignatureDoesNotMatchError(CPLE_BaseError): 104 | pass 105 | 106 | 107 | class CPLE_AWSError(CPLE_BaseError): 108 | pass 109 | 110 | 111 | class NullPointerError(CPLE_BaseError): 112 | """ 113 | Returned from check_pointer when a NULL pointer is passed, but no GDAL 114 | error was raised. 115 | """ 116 | pass 117 | 118 | 119 | class CPLError(CPLE_BaseError): 120 | """ 121 | Returned from check_int when a error code is returned, but no GDAL 122 | error was set. 123 | """ 124 | pass 125 | 126 | 127 | # Map of GDAL error numbers to the Python exceptions. 128 | exception_map = { 129 | 1: CPLE_AppDefinedError, 130 | 2: CPLE_OutOfMemoryError, 131 | 3: CPLE_FileIOError, 132 | 4: CPLE_OpenFailedError, 133 | 5: CPLE_IllegalArgError, 134 | 6: CPLE_NotSupportedError, 135 | 7: CPLE_AssertionFailedError, 136 | 8: CPLE_NoWriteAccessError, 137 | 9: CPLE_UserInterruptError, 138 | 10: ObjectNullError, 139 | 140 | # error numbers 11-16 are introduced in GDAL 2.1. See 141 | # https://github.com/OSGeo/gdal/pull/98. 142 | 11: CPLE_HttpResponseError, 143 | 12: CPLE_AWSBucketNotFoundError, 144 | 13: CPLE_AWSObjectNotFoundError, 145 | 14: CPLE_AWSAccessDeniedError, 146 | 15: CPLE_AWSInvalidCredentialsError, 147 | 16: CPLE_AWSSignatureDoesNotMatchError, 148 | 17: CPLE_AWSError 149 | } 150 | 151 | 152 | cdef inline object check_last_error(): 153 | """Checks if the last GDAL error was a fatal or non-fatal error. 154 | 155 | When a non-fatal error is found, an appropriate exception is raised. 156 | 157 | When a fatal error is found, SystemExit is called. 158 | 159 | Returns 160 | ------- 161 | An Exception, SystemExit, or None 162 | """ 163 | err_type = CPLGetLastErrorType() 164 | err_no = CPLGetLastErrorNo() 165 | err_msg = clean_error_message(CPLGetLastErrorMsg()) 166 | if err_msg == "": 167 | err_msg = "No error message." 168 | 169 | if err_type == CE_Failure: 170 | CPLErrorReset() 171 | return exception_map.get( 172 | err_no, CPLE_BaseError)(err_type, err_no, err_msg) 173 | 174 | if err_type == CE_Fatal: 175 | return SystemExit("Fatal error: {0}".format((err_type, err_no, err_msg))) 176 | 177 | 178 | cdef clean_error_message(const char* err_msg): 179 | """Cleans up error messages from GDAL. 180 | 181 | Parameters 182 | ---------- 183 | err_msg : const char* 184 | The error message to clean up. 185 | 186 | Returns 187 | ------- 188 | str 189 | The cleaned up error message or empty string 190 | """ 191 | if err_msg != NULL: 192 | # Reformat message. 193 | msg_b = err_msg 194 | try: 195 | msg = msg_b.decode("utf-8") 196 | msg = msg.replace("`", "'") 197 | msg = msg.replace("\n", " ") 198 | except UnicodeDecodeError as exc: 199 | msg = f"Could not decode error message to UTF-8. Raw error: {msg_b}" 200 | 201 | else: 202 | msg = "" 203 | 204 | return msg 205 | 206 | 207 | cdef void *check_pointer(void *ptr) except NULL: 208 | """Check the pointer returned by a GDAL/OGR function. 209 | 210 | If `ptr` is `NULL`, an exception inheriting from CPLE_BaseError is raised. 211 | When the last error registered by GDAL/OGR was a non-fatal error, the 212 | exception raised will be customized appropriately. Otherwise a 213 | NullPointerError is raised. 214 | """ 215 | if ptr == NULL: 216 | exc = check_last_error() 217 | if exc: 218 | raise exc 219 | else: 220 | # null pointer was passed, but no error message from GDAL 221 | raise NullPointerError(-1, -1, "NULL pointer error") 222 | 223 | return ptr 224 | 225 | 226 | cdef int check_int(int err) except -1: 227 | """Check the CPLErr (int) value returned by a GDAL/OGR function. 228 | 229 | If `err` is not OGRERR_NONE, an exception inheriting from CPLE_BaseError is raised. 230 | When the last error registered by GDAL/OGR was a non-fatal error, the 231 | exception raised will be customized appropriately. Otherwise a CPLError is 232 | raised. 233 | """ 234 | if err != OGRERR_NONE: 235 | exc = check_last_error() 236 | if exc: 237 | raise exc 238 | else: 239 | # no error message from GDAL 240 | raise CPLError(-1, -1, "Unspecified OGR / GDAL error") 241 | 242 | return err 243 | 244 | 245 | cdef void error_handler( 246 | CPLErr err_class, int err_no, const char* err_msg 247 | ) noexcept nogil: 248 | """Custom CPL error handler to match the Python behaviour. 249 | 250 | For non-fatal errors (CE_Failure), error printing to stderr (behaviour of 251 | the default GDAL error handler) is suppressed, because we already raise a 252 | Python exception that includes the error message. 253 | 254 | Warnings are converted to Python warnings. 255 | """ 256 | if err_class == CE_Fatal: 257 | # If the error class is CE_Fatal, we want to have a message issued 258 | # because the CPL support code does an abort() before any exception 259 | # can be generated 260 | CPLDefaultErrorHandler(err_class, err_no, err_msg) 261 | return 262 | 263 | if err_class == CE_Failure: 264 | # For Failures, do nothing as those are explicitly caught 265 | # with error return codes and translated into Python exceptions 266 | return 267 | 268 | if err_class == CE_Warning: 269 | with gil: 270 | warnings.warn(clean_error_message(err_msg), RuntimeWarning) 271 | return 272 | 273 | # Fall back to the default handler for non-failure messages since 274 | # they won't be translated into exceptions. 275 | CPLDefaultErrorHandler(err_class, err_no, err_msg) 276 | 277 | 278 | def _register_error_handler(): 279 | CPLPushErrorHandler(error_handler) 280 | 281 | 282 | cdef class ErrorHandler: 283 | 284 | def __init__(self, error_stack=None): 285 | self.error_stack = error_stack or {} 286 | 287 | cdef int check_int(self, int err, bint squash_errors) except -1: 288 | """Check the CPLErr (int) value returned by a GDAL/OGR function. 289 | 290 | If `err` is not OGRERR_NONE, an exception inheriting from CPLE_BaseError is 291 | raised. 292 | When a non-fatal GDAL/OGR error was captured in the error stack, the 293 | exception raised will be customized appropriately. Otherwise, a 294 | CPLError is raised. 295 | 296 | Parameters 297 | ---------- 298 | err : int 299 | The CPLErr returned by a GDAL/OGR function. 300 | squash_errors : bool 301 | True to squash all errors captured to one error with the exception type of 302 | the last error and all error messages concatenated. 303 | 304 | Returns 305 | ------- 306 | int 307 | The `err` input parameter if it is OGRERR_NONE. Otherwise an exception is 308 | raised. 309 | 310 | """ 311 | if err != OGRERR_NONE: 312 | if self.error_stack.get(): 313 | self._handle_error_stack(squash_errors) 314 | else: 315 | raise CPLError(CE_Failure, err, "Unspecified OGR / GDAL error") 316 | 317 | return err 318 | 319 | cdef void *check_pointer(self, void *ptr, bint squash_errors) except NULL: 320 | """Check the pointer returned by a GDAL/OGR function. 321 | 322 | If `ptr` is `NULL`, an exception inheriting from CPLE_BaseError is 323 | raised. 324 | When a non-fatal GDAL/OGR error was captured in the error stack, the 325 | exception raised will be customized appropriately. Otherwise, a 326 | NullPointerError is raised. 327 | 328 | Parameters 329 | ---------- 330 | ptr : pointer 331 | The pointer returned by a GDAL/OGR function. 332 | squash_errors : bool 333 | True to squash all errors captured to one error with the exception type of 334 | the last error and all error messages concatenated. 335 | 336 | Returns 337 | ------- 338 | pointer 339 | The `ptr` input parameter if it is not `NULL`. Otherwise an exception is 340 | raised. 341 | 342 | """ 343 | if ptr == NULL: 344 | if self.error_stack.get(): 345 | self._handle_error_stack(squash_errors) 346 | else: 347 | raise NullPointerError(-1, -1, "NULL pointer error") 348 | 349 | return ptr 350 | 351 | cdef void _handle_error_stack(self, bint squash_errors): 352 | """Handle the errors in `error_stack`.""" 353 | stack = self.error_stack.get() 354 | for error, cause in zip_longest(stack[::-1], stack[::-1][1:]): 355 | if error is not None and cause is not None: 356 | error.__cause__ = cause 357 | 358 | last = stack.pop() 359 | if last is not None: 360 | if squash_errors: 361 | # Concatenate all error messages, and raise a single exception 362 | errmsg = str(last) 363 | inner = last.__cause__ 364 | while inner is not None: 365 | errmsg = f"{errmsg}; {inner}" 366 | inner = inner.__cause__ 367 | 368 | if errmsg == "": 369 | errmsg = "No error message." 370 | 371 | raise type(last)(-1, -1, errmsg) 372 | 373 | raise last 374 | 375 | 376 | cdef void stacking_error_handler( 377 | CPLErr err_class, 378 | int err_no, 379 | const char* err_msg 380 | ) noexcept nogil: 381 | """Custom CPL error handler that adds non-fatal errors to a stack. 382 | 383 | All non-fatal errors (CE_Failure) are not printed to stderr (behaviour 384 | of the default GDAL error handler), but they are converted to python 385 | exceptions and added to a stack, so they can be dealt with afterwards. 386 | 387 | Warnings are converted to Python warnings. 388 | """ 389 | if err_class == CE_Fatal: 390 | # If the error class is CE_Fatal, we want to have a message issued 391 | # because the CPL support code does an abort() before any exception 392 | # can be generated 393 | CPLDefaultErrorHandler(err_class, err_no, err_msg) 394 | return 395 | 396 | if err_class == CE_Failure: 397 | # For Failures, add them to the error exception stack 398 | with gil: 399 | stack = _ERROR_STACK.get() 400 | stack.append( 401 | exception_map.get(err_no, CPLE_BaseError)( 402 | err_class, err_no, clean_error_message(err_msg) 403 | ), 404 | ) 405 | _ERROR_STACK.set(stack) 406 | 407 | return 408 | 409 | if err_class == CE_Warning: 410 | with gil: 411 | warnings.warn(clean_error_message(err_msg), RuntimeWarning) 412 | return 413 | 414 | # Fall back to the default handler for non-failure messages since 415 | # they won't be translated into exceptions. 416 | CPLDefaultErrorHandler(err_class, err_no, err_msg) 417 | 418 | 419 | @contextlib.contextmanager 420 | def capture_errors(): 421 | """A context manager that captures all GDAL non-fatal errors occurring. 422 | 423 | It adds all errors to a single stack, so it assumes that no more than one 424 | GDAL function is called. 425 | 426 | Yields an ErrorHandler object that can be used to handle the errors 427 | if any were captured. 428 | """ 429 | CPLErrorReset() 430 | _ERROR_STACK.set([]) 431 | 432 | # stacking_error_handler records GDAL errors in the order they occur and 433 | # converts them to exceptions. 434 | CPLPushErrorHandler(stacking_error_handler) 435 | 436 | # Run code in the `with` block. 437 | yield ErrorHandler(_ERROR_STACK) 438 | 439 | CPLPopErrorHandler() 440 | _ERROR_STACK.set([]) 441 | CPLErrorReset() 442 | -------------------------------------------------------------------------------- /pyogrio/core.py: -------------------------------------------------------------------------------- 1 | """Core functions to interact with OGR data sources.""" 2 | 3 | from pathlib import Path 4 | 5 | from pyogrio._env import GDALEnv 6 | from pyogrio.util import ( 7 | _mask_to_wkb, 8 | _preprocess_options_key_value, 9 | get_vsi_path_or_buffer, 10 | ) 11 | 12 | with GDALEnv(): 13 | from pyogrio._err import _register_error_handler 14 | from pyogrio._io import ogr_list_layers, ogr_read_bounds, ogr_read_info 15 | from pyogrio._ogr import ( 16 | _get_drivers_for_path, 17 | _register_drivers, 18 | get_gdal_config_option as _get_gdal_config_option, 19 | get_gdal_data_path as _get_gdal_data_path, 20 | get_gdal_geos_version, 21 | get_gdal_version, 22 | get_gdal_version_string, 23 | init_gdal_data as _init_gdal_data, 24 | init_proj_data as _init_proj_data, 25 | ogr_list_drivers, 26 | set_gdal_config_options as _set_gdal_config_options, 27 | ) 28 | from pyogrio._vsi import ( 29 | ogr_vsi_listtree, 30 | ogr_vsi_rmtree, 31 | ogr_vsi_unlink, 32 | ) 33 | 34 | _init_gdal_data() 35 | _init_proj_data() 36 | _register_drivers() 37 | _register_error_handler() 38 | 39 | __gdal_version__ = get_gdal_version() 40 | __gdal_version_string__ = get_gdal_version_string() 41 | __gdal_geos_version__ = get_gdal_geos_version() 42 | 43 | 44 | def list_drivers(read=False, write=False): 45 | """List drivers available in GDAL. 46 | 47 | Parameters 48 | ---------- 49 | read: bool, optional (default: False) 50 | If True, will only return drivers that are known to support read capabilities. 51 | write: bool, optional (default: False) 52 | If True, will only return drivers that are known to support write capabilities. 53 | 54 | Returns 55 | ------- 56 | dict 57 | Mapping of driver name to file mode capabilities: ``"r"``: read, ``"w"``: write. 58 | Drivers that are available but with unknown support are marked with ``"?"`` 59 | 60 | """ 61 | drivers = ogr_list_drivers() 62 | 63 | if read: 64 | drivers = {k: v for k, v in drivers.items() if v.startswith("r")} 65 | 66 | if write: 67 | drivers = {k: v for k, v in drivers.items() if v.endswith("w")} 68 | 69 | return drivers 70 | 71 | 72 | def detect_write_driver(path): 73 | """Attempt to infer the driver for a path by extension or prefix. 74 | 75 | Only drivers that support write capabilities will be detected. 76 | 77 | If the path cannot be resolved to a single driver, a ValueError will be 78 | raised. 79 | 80 | Parameters 81 | ---------- 82 | path : str 83 | data source path 84 | 85 | Returns 86 | ------- 87 | str 88 | name of the driver, if detected 89 | 90 | """ 91 | # try to infer driver from path 92 | drivers = _get_drivers_for_path(path) 93 | 94 | if len(drivers) == 0: 95 | raise ValueError( 96 | f"Could not infer driver from path: {path}; please specify driver " 97 | "explicitly" 98 | ) 99 | 100 | # if there are multiple drivers detected, user needs to specify the correct 101 | # one manually 102 | elif len(drivers) > 1: 103 | raise ValueError( 104 | f"Could not infer driver from path: {path}; multiple drivers are " 105 | f"available for that extension: {', '.join(drivers)}. Please " 106 | "specify driver explicitly." 107 | ) 108 | 109 | return drivers[0] 110 | 111 | 112 | def list_layers(path_or_buffer, /): 113 | """List layers available in an OGR data source. 114 | 115 | NOTE: includes both spatial and nonspatial layers. 116 | 117 | Parameters 118 | ---------- 119 | path_or_buffer : str, pathlib.Path, bytes, or file-like 120 | A dataset path or URI, raw buffer, or file-like object with a read method. 121 | 122 | Returns 123 | ------- 124 | ndarray shape (2, n) 125 | array of pairs of [, ] 126 | Note: geometry is `None` for nonspatial layers. 127 | 128 | """ 129 | return ogr_list_layers(get_vsi_path_or_buffer(path_or_buffer)) 130 | 131 | 132 | def read_bounds( 133 | path_or_buffer, 134 | /, 135 | layer=None, 136 | skip_features=0, 137 | max_features=None, 138 | where=None, 139 | bbox=None, 140 | mask=None, 141 | ): 142 | """Read bounds of each feature. 143 | 144 | This can be used to assist with spatial indexing and partitioning, in 145 | order to avoid reading all features into memory. It is roughly 2-3x faster 146 | than reading the full geometry and attributes of a dataset. 147 | 148 | Parameters 149 | ---------- 150 | path_or_buffer : str, pathlib.Path, bytes, or file-like 151 | A dataset path or URI, raw buffer, or file-like object with a read method. 152 | layer : int or str, optional (default: first layer) 153 | If an integer is provided, it corresponds to the index of the layer 154 | with the data source. If a string is provided, it must match the name 155 | of the layer in the data source. Defaults to first layer in data source. 156 | skip_features : int, optional (default: 0) 157 | Number of features to skip from the beginning of the file before returning 158 | features. Must be less than the total number of features in the file. 159 | max_features : int, optional (default: None) 160 | Number of features to read from the file. Must be less than the total 161 | number of features in the file minus ``skip_features`` (if used). 162 | where : str, optional (default: None) 163 | Where clause to filter features in layer by attribute values. Uses a 164 | restricted form of SQL WHERE clause, defined here: 165 | http://ogdi.sourceforge.net/prop/6.2.CapabilitiesMetadata.html 166 | Examples: ``"ISO_A3 = 'CAN'"``, ``"POP_EST > 10000000 AND POP_EST < 100000000"`` 167 | bbox : tuple of (xmin, ymin, xmax, ymax), optional (default: None) 168 | If present, will be used to filter records whose geometry intersects this 169 | box. This must be in the same CRS as the dataset. If GEOS is present 170 | and used by GDAL, only geometries that intersect this bbox will be 171 | returned; if GEOS is not available or not used by GDAL, all geometries 172 | with bounding boxes that intersect this bbox will be returned. 173 | mask : Shapely geometry, optional (default: None) 174 | If present, will be used to filter records whose geometry intersects 175 | this geometry. This must be in the same CRS as the dataset. If GEOS is 176 | present and used by GDAL, only geometries that intersect this geometry 177 | will be returned; if GEOS is not available or not used by GDAL, all 178 | geometries with bounding boxes that intersect the bounding box of this 179 | geometry will be returned. Requires Shapely >= 2.0. 180 | Cannot be combined with ``bbox`` keyword. 181 | 182 | Returns 183 | ------- 184 | tuple of (fids, bounds) 185 | fids are global IDs read from the FID field of the dataset 186 | bounds are ndarray of shape(4, n) containing ``xmin``, ``ymin``, ``xmax``, 187 | ``ymax`` 188 | 189 | """ 190 | return ogr_read_bounds( 191 | get_vsi_path_or_buffer(path_or_buffer), 192 | layer=layer, 193 | skip_features=skip_features, 194 | max_features=max_features or 0, 195 | where=where, 196 | bbox=bbox, 197 | mask=_mask_to_wkb(mask), 198 | ) 199 | 200 | 201 | def read_info( 202 | path_or_buffer, 203 | /, 204 | layer=None, 205 | encoding=None, 206 | force_feature_count=False, 207 | force_total_bounds=False, 208 | **kwargs, 209 | ): 210 | """Read information about an OGR data source. 211 | 212 | ``crs``, ``geometry`` and ``total_bounds`` will be ``None`` and ``features`` will be 213 | 0 for a nonspatial layer. 214 | 215 | ``features`` will be -1 if this is an expensive operation for this driver. You can 216 | force it to be calculated using the ``force_feature_count`` parameter. 217 | 218 | ``total_bounds`` is the 2-dimensional extent of all features within the dataset: 219 | (xmin, ymin, xmax, ymax). It will be None if this is an expensive operation for this 220 | driver or if the data source is nonspatial. You can force it to be calculated using 221 | the ``force_total_bounds`` parameter. 222 | 223 | ``fid_column`` is the name of the FID field in the data source, if the FID is 224 | physically stored (e.g. in GPKG). If the FID is just a sequence, ``fid_column`` 225 | will be "" (e.g. ESRI Shapefile). 226 | 227 | ``geometry_name`` is the name of the field where the main geometry is stored in the 228 | data data source, if the field name can by customized (e.g. in GPKG). If no custom 229 | name is supported, ``geometry_name`` will be "" (e.g. ESRI Shapefile). 230 | 231 | ``encoding`` will be ``UTF-8`` if either the native encoding is likely to be 232 | ``UTF-8`` or GDAL can automatically convert from the detected native encoding 233 | to ``UTF-8``. 234 | 235 | Parameters 236 | ---------- 237 | path_or_buffer : str, pathlib.Path, bytes, or file-like 238 | A dataset path or URI, raw buffer, or file-like object with a read method. 239 | layer : str or int, optional 240 | Name or index of layer in data source. Reads the first layer by default. 241 | encoding : str, optional (default: None) 242 | If present, will be used as the encoding for reading string values from 243 | the data source, unless encoding can be inferred directly from the data 244 | source. 245 | force_feature_count : bool, optional (default: False) 246 | True if the feature count should be computed even if it is expensive. 247 | force_total_bounds : bool, optional (default: False) 248 | True if the total bounds should be computed even if it is expensive. 249 | **kwargs 250 | Additional driver-specific dataset open options passed to OGR. Invalid 251 | options will trigger a warning. 252 | 253 | Returns 254 | ------- 255 | dict 256 | A dictionary with the following keys:: 257 | 258 | { 259 | "layer_name": "", 260 | "crs": "", 261 | "fields": , 262 | "dtypes": , 263 | "ogr_types": , 264 | "ogr_subtypes": , 265 | "encoding": "", 266 | "fid_column": "", 267 | "geometry_name": "", 268 | "geometry_type": "", 269 | "features": , 270 | "total_bounds": , 271 | "driver": "", 272 | "capabilities": "" 273 | "dataset_metadata": "" 274 | "layer_metadata": "" 275 | } 276 | 277 | """ 278 | dataset_kwargs = _preprocess_options_key_value(kwargs) if kwargs else {} 279 | 280 | return ogr_read_info( 281 | get_vsi_path_or_buffer(path_or_buffer), 282 | layer=layer, 283 | encoding=encoding, 284 | force_feature_count=force_feature_count, 285 | force_total_bounds=force_total_bounds, 286 | dataset_kwargs=dataset_kwargs, 287 | ) 288 | 289 | 290 | def set_gdal_config_options(options): 291 | """Set GDAL configuration options. 292 | 293 | Options are listed here: https://trac.osgeo.org/gdal/wiki/ConfigOptions 294 | 295 | No error is raised if invalid option names are provided. 296 | 297 | These options are applied for an entire session rather than for individual 298 | functions. 299 | 300 | Parameters 301 | ---------- 302 | options : dict 303 | If present, provides a mapping of option name / value pairs for GDAL 304 | configuration options. ``True`` / ``False`` are normalized to ``'ON'`` 305 | / ``'OFF'``. A value of ``None`` for a config option can be used to clear out a 306 | previously set value. 307 | 308 | """ 309 | _set_gdal_config_options(options) 310 | 311 | 312 | def get_gdal_config_option(name): 313 | """Get the value for a GDAL configuration option. 314 | 315 | Parameters 316 | ---------- 317 | name : str 318 | name of the option to retrive 319 | 320 | Returns 321 | ------- 322 | value of the option or None if not set 323 | ``'ON'`` / ``'OFF'`` are normalized to ``True`` / ``False``. 324 | 325 | """ 326 | return _get_gdal_config_option(name) 327 | 328 | 329 | def get_gdal_data_path(): 330 | """Get the path to the directory GDAL uses to read data files. 331 | 332 | Returns 333 | ------- 334 | str, or None if data directory was not found 335 | 336 | """ 337 | return _get_gdal_data_path() 338 | 339 | 340 | def vsi_listtree(path: str | Path, pattern: str | None = None): 341 | """Recursively list the contents of a VSI directory. 342 | 343 | An fnmatch pattern can be specified to filter the directories/files 344 | returned. 345 | 346 | Parameters 347 | ---------- 348 | path : str or pathlib.Path 349 | Path to the VSI directory to be listed. 350 | pattern : str, optional 351 | Pattern to filter results, in fnmatch format. 352 | 353 | """ 354 | if isinstance(path, Path): 355 | path = path.as_posix() 356 | 357 | return ogr_vsi_listtree(path, pattern=pattern) 358 | 359 | 360 | def vsi_rmtree(path: str | Path): 361 | """Recursively remove VSI directory. 362 | 363 | Parameters 364 | ---------- 365 | path : str or pathlib.Path 366 | path to the VSI directory to be removed. 367 | 368 | """ 369 | if isinstance(path, Path): 370 | path = path.as_posix() 371 | 372 | ogr_vsi_rmtree(path) 373 | 374 | 375 | def vsi_unlink(path: str | Path): 376 | """Remove a VSI file. 377 | 378 | Parameters 379 | ---------- 380 | path : str or pathlib.Path 381 | path to vsimem file to be removed 382 | 383 | """ 384 | if isinstance(path, Path): 385 | path = path.as_posix() 386 | 387 | ogr_vsi_unlink(path) 388 | -------------------------------------------------------------------------------- /CHANGES.md: -------------------------------------------------------------------------------- 1 | # CHANGELOG 2 | 3 | ## 0.12.1 (2025-11-28) 4 | 5 | ### Bug fixes 6 | 7 | - Fix regression in reading date columns (#616) 8 | - Fix regression in `read_dataframe` when `use_arrow=True` and `columns` is used to filter 9 | out columns of some specific types (#611) 10 | 11 | ## 0.12.0 (2025-11-26) 12 | 13 | ### Potentially breaking changes 14 | 15 | - Return JSON fields (as identified by GDAL) as dicts/lists in `read_dataframe`; 16 | these were previously returned as strings (#556). 17 | - Drop support for GDAL 3.4 and 3.5 (#584). 18 | 19 | ### Improvements 20 | 21 | - Add `datetime_as_string` and `mixed_offsets_as_utc` parameters to `read_dataframe` 22 | to choose the way datetime columns are returned + several fixes when reading and 23 | writing datetimes (#486). 24 | - Add listing of GDAL data types and subtypes to `read_info` (#556). 25 | - Add support to read list fields without arrow (#558, #597). 26 | 27 | ### Bug fixes 28 | 29 | - Fix decode error reading an sqlite file on Windows (#568). 30 | - Fix wrong layer name when creating .gpkg.zip file (#570). 31 | - Fix segfault on providing an invalid value for `layer` in `read_info` (#564). 32 | - Fix error when reading data with ``use_arrow=True`` after having used the 33 | Parquet driver with GDAL>=3.12 (#601). 34 | 35 | ### Packaging 36 | 37 | - Wheels are now available for Python 3.14 (#579). 38 | - The GDAL library included in the wheels is upgraded from 3.10.3 to 3.11.4 (#578). 39 | - Add libkml driver to the wheels for more recent Linux platforms supported 40 | by manylinux_2_28, macOS, and Windows (#561). 41 | - Add libspatialite to the wheels (#546). 42 | - Minimum required Python version is now 3.10 (#557). 43 | - Initial support for free-threaded Python builds, with the extension module 44 | declaring free-threaded support and wheels for Python 3.13t and 3.14t being 45 | built (#562). 46 | 47 | ## 0.11.1 (2025-08-02) 48 | 49 | ### Bug fixes 50 | 51 | - Compatibility with Shapely >= 2.1 to avoid triggering a deprecation warning at 52 | import (#542). 53 | - Fix reading with a `skip_features` larger than the available number of 54 | features to ensure this consistently returns an empty result for all file 55 | formats (#550). 56 | 57 | ## 0.11.0 (2025-05-08) 58 | 59 | ### Improvements 60 | 61 | - Capture all errors logged by gdal when opening a file fails (#495). 62 | - Add support to read and write ".gpkg.zip" (GDAL >= 3.7), ".shp.zip", and ".shz" 63 | files (#527). 64 | - Compatibility with the string dtype in the upcoming pandas 3.0 release (#493). 65 | 66 | ### Bug fixes 67 | 68 | - Fix WKB writing on big-endian systems (#497). 69 | - Fix writing fids to e.g. GPKG file with `use_arrow` (#511). 70 | - Fix error in `write_dataframe` when writing an empty or all-None object 71 | column with `use_arrow` (#512). 72 | 73 | ### Packaging 74 | 75 | - The GDAL library included in the wheels is upgraded from 3.9.2 to 3.10.3 (#499). 76 | 77 | ## 0.10.0 (2024-09-28) 78 | 79 | ### Improvements 80 | 81 | - Add support to read, write, list, and remove `/vsimem/` files (#457). 82 | - Raise specific error when trying to read non-UTF-8 file with 83 | `use_arrow=True` (#490). 84 | 85 | ### Bug fixes 86 | 87 | - Silence warning from `write_dataframe` with `GeoSeries.notna()` (#435). 88 | - Enable mask & bbox filter when geometry column not read (#431). 89 | - Raise `NotImplementedError` when user attempts to write to an open file handle (#442). 90 | - Prevent seek on read from compressed inputs (#443). 91 | 92 | ### Packaging 93 | 94 | - For the conda-forge package, change the dependency from `libgdal` to 95 | `libgdal-core`. This package is significantly smaller as it doesn't contain 96 | some large GDAL plugins. Extra plugins can be installed as seperate conda 97 | packages if needed: more info [here](https://gdal.org/download.html#conda). 98 | This also leads to `pyproj` becoming an optional dependency; you will need 99 | to install `pyproj` in order to support spatial reference systems (#452). 100 | - The GDAL library included in the wheels is updated from 3.8.5 to GDAL 3.9.2 (#466). 101 | - pyogrio now requires a minimum version of Python >= 3.9 (#473). 102 | - Wheels are now available for Python 3.13. 103 | 104 | ## 0.9.0 (2024-06-17) 105 | 106 | ### Improvements 107 | 108 | - Add `on_invalid` parameter to `read_dataframe` (#422). 109 | 110 | ### Bug fixes 111 | 112 | - Fixed bug transposing longitude and latitude when writing files with 113 | coordinate transformation from EPSG:4326 (#421). 114 | - Fix bug preventing reading from file paths containing hashes in `read_dataframe` (#412). 115 | 116 | ### Packaging 117 | 118 | - MacOS wheels are now only available for macOS 12+. For older unsupported macOS 119 | versions, pyogrio can still be built from source (requires GDAL to be installed) (#417). 120 | - Remove usage of deprecated `distutils` in `setup.py` (#416). 121 | 122 | ## 0.8.0 (2024-05-06) 123 | 124 | ### Improvements 125 | 126 | - Support for writing based on Arrow as the transfer mechanism of the data 127 | from Python to GDAL (requires GDAL >= 3.8). This is provided through the 128 | new `pyogrio.raw.write_arrow` function, or by using the `use_arrow=True` 129 | option in `pyogrio.write_dataframe` (#314, #346). 130 | - Add support for `fids` filter to `read_arrow` and `open_arrow`, and to 131 | `read_dataframe` with `use_arrow=True` (#304). 132 | - Add some missing properties to `read_info`, including layer name, geometry name 133 | and FID column name (#365). 134 | - `read_arrow` and `open_arrow` now provide 135 | [GeoArrow-compliant extension metadata](https://geoarrow.org/extension-types.html), 136 | including the CRS, when using GDAL 3.8 or higher (#366). 137 | - The `open_arrow` function can now be used without a `pyarrow` dependency. By 138 | default, it will now return a stream object implementing the 139 | [Arrow PyCapsule Protocol](https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html) 140 | (i.e. having an `__arrow_c_stream__`method). This object can then be consumed 141 | by your Arrow implementation of choice that supports this protocol. To keep 142 | the previous behaviour of returning a `pyarrow.RecordBatchReader`, specify 143 | `use_pyarrow=True` (#349). 144 | - Warn when reading from a multilayer file without specifying a layer (#362). 145 | - Allow writing to a new in-memory datasource using io.BytesIO object (#397). 146 | 147 | ### Bug fixes 148 | 149 | - Fix error in `write_dataframe` if input has a date column and 150 | non-consecutive index values (#325). 151 | - Fix encoding issues on windows for some formats (e.g. ".csv") and always write ESRI 152 | Shapefiles using UTF-8 by default on all platforms (#361). 153 | - Raise exception in `read_arrow` or `read_dataframe(..., use_arrow=True)` if 154 | a boolean column is detected due to error in GDAL reading boolean values for 155 | FlatGeobuf / GPKG drivers (#335, #387); this has been fixed in GDAL >= 3.8.3. 156 | - Properly ignore fields not listed in `columns` parameter when reading from 157 | the data source not using the Arrow API (#391). 158 | - Properly handle decoding of ESRI Shapefiles with user-provided `encoding` 159 | option for `read`, `read_dataframe`, and `open_arrow`, and correctly encode 160 | Shapefile field names and text values to the user-provided `encoding` for 161 | `write` and `write_dataframe` (#384). 162 | - Fixed bug preventing reading from bytes or file-like in `read_arrow` / 163 | `open_arrow` (#407). 164 | 165 | ### Packaging 166 | 167 | - The GDAL library included in the wheels is updated from 3.7.2 to GDAL 3.8.5. 168 | 169 | ### Potentially breaking changes 170 | 171 | - Using a `where` expression combined with a list of `columns` that does not include 172 | the column referenced in the expression is not recommended and will now 173 | return results based on driver-dependent behavior, which may include either 174 | returning empty results (even if non-empty results are expected from `where` parameter) 175 | or raise an exception (#391). Previous versions of pyogrio incorrectly 176 | set ignored fields against the data source, allowing it to return non-empty 177 | results in these cases. 178 | 179 | ## 0.7.2 (2023-10-30) 180 | 181 | ### Bug fixes 182 | 183 | - Add `packaging` as a dependency (#320). 184 | - Fix conversion of WKB to geometries with missing values when using 185 | `pandas.ArrowDtype` (#321). 186 | 187 | ## 0.7.1 (2023-10-26) 188 | 189 | ### Bug fixes 190 | 191 | - Fix unspecified dependency on `packaging` (#318). 192 | 193 | ## 0.7.0 (2023-10-25) 194 | 195 | ### Improvements 196 | 197 | - Support reading and writing datetimes with time zones (#253). 198 | - Support writing dataframes without geometry column (#267). 199 | - Calculate feature count by iterating over features if GDAL returns an 200 | unknown count for a data layer (e.g., OSM driver); this may have signficant 201 | performance impacts for some data sources that would otherwise return an 202 | unknown count (count is used in `read_info`, `read`, `read_dataframe`) (#271). 203 | - Add `arrow_to_pandas_kwargs` parameter to `read_dataframe` + reduce memory usage 204 | with `use_arrow=True` (#273) 205 | - In `read_info`, the result now also contains the `total_bounds` of the layer as well 206 | as some extra `capabilities` of the data source driver (#281). 207 | - Raise error if `read` or `read_dataframe` is called with parameters to read no 208 | columns, geometry, or fids (#280). 209 | - Automatically detect supported driver by extension for all available 210 | write drivers and addition of `detect_write_driver` (#270). 211 | - Addition of `mask` parameter to `open_arrow`, `read`, `read_dataframe`, 212 | and `read_bounds` functions to select only the features in the dataset that 213 | intersect the mask geometry (#285). Note: GDAL < 3.8.0 returns features that 214 | intersect the bounding box of the mask when using the Arrow interface for 215 | some drivers; this has been fixed in GDAL 3.8.0. 216 | - Removed warning when no features are read from the data source (#299). 217 | - Add support for `force_2d=True` with `use_arrow=True` in `read_dataframe` (#300). 218 | 219 | ### Other changes 220 | 221 | - test suite requires Shapely >= 2.0 222 | 223 | - using `skip_features` greater than the number of features available in a data 224 | layer now returns empty arrays for `read` and an empty DataFrame for 225 | `read_dataframe` instead of raising a `ValueError` (#282). 226 | - enabled `skip_features` and `max_features` for `read_arrow` and 227 | `read_dataframe(path, use_arrow=True)`. Note that this incurs overhead 228 | because all features up to the next batch size above `max_features` (or size 229 | of data layer) will be read prior to slicing out the requested range of 230 | features (#282). 231 | - The `use_arrow=True` option can be enabled globally for testing using the 232 | `PYOGRIO_USE_ARROW=1` environment variable (#296). 233 | 234 | ### Bug fixes 235 | 236 | - Fix int32 overflow when reading int64 columns (#260) 237 | - Fix `fid_as_index=True` doesn't set fid as index using `read_dataframe` with 238 | `use_arrow=True` (#265) 239 | - Fix errors reading OSM data due to invalid feature count and incorrect 240 | reading of OSM layers beyond the first layer (#271) 241 | - Always raise an exception if there is an error when writing a data source 242 | (#284) 243 | 244 | ### Potentially breaking changes 245 | 246 | - In `read_info` (#281): 247 | - the `features` property in the result will now be -1 if calculating the 248 | feature count is an expensive operation for this driver. You can force it to be 249 | calculated using the `force_feature_count` parameter. 250 | - for boolean values in the `capabilities` property, the values will now be 251 | booleans instead of 1 or 0. 252 | 253 | ### Packaging 254 | 255 | - The GDAL library included in the wheels is updated from 3.6.4 to GDAL 3.7.2. 256 | 257 | ## 0.6.0 (2023-04-27) 258 | 259 | ### Improvements 260 | 261 | - Add automatic detection of 3D geometries in `write_dataframe` (#223, #229) 262 | - Add "driver" property to `read_info` result (#224) 263 | - Add support for dataset open options to `read`, `read_dataframe`, and 264 | `read_info` (#233) 265 | - Add support for pandas' nullable data types in `write_dataframe`, or 266 | specifying a mask manually for missing values in `write` (#219) 267 | - Standardized 3-dimensional geometry type labels from "2.5D " to 268 | " Z" for consistency with well-known text (WKT) formats (#234) 269 | - Failure error messages from GDAL are no longer printed to stderr (they were 270 | already translated into Python exceptions as well) (#236). 271 | - Failure and warning error messages from GDAL are no longer printed to 272 | stderr: failures were already translated into Python exceptions 273 | and warning messages are now translated into Python warnings (#236, #242). 274 | - Add access to low-level pyarrow `RecordBatchReader` via 275 | `pyogrio.raw.open_arrow`, which allows iterating over batches of Arrow 276 | tables (#205). 277 | - Add support for writing dataset and layer metadata (where supported by 278 | driver) to `write` and `write_dataframe`, and add support for reading 279 | dataset and layer metadata in `read_info` (#237). 280 | 281 | ### Packaging 282 | 283 | - The GDAL library included in the wheels is updated from 3.6.2 to GDAL 3.6.4. 284 | - Wheels are now available for Linux aarch64 / arm64. 285 | 286 | ## 0.5.1 (2023-01-26) 287 | 288 | ### Bug fixes 289 | 290 | - Fix memory leak in reading files (#207) 291 | - Fix to only use transactions for writing records when supported by the 292 | driver (#203) 293 | 294 | ## 0.5.0 (2023-01-16) 295 | 296 | ### Major enhancements 297 | 298 | - Support for reading based on Arrow as the transfer mechanism of the data 299 | from GDAL to Python (requires GDAL >= 3.6 and `pyarrow` to be installed). 300 | This can be enabled by passing `use_arrow=True` to `pyogrio.read_dataframe` 301 | (or by using `pyogrio.raw.read_arrow` directly), and provides a further 302 | speed-up (#155, #191). 303 | - Support for appending to an existing data source when supported by GDAL by 304 | passing `append=True` to `pyogrio.write_dataframe` (#197). 305 | 306 | ### Potentially breaking changes 307 | 308 | - In floating point columns, NaN values are now by default written as "null" 309 | instead of NaN, but with an option to control this (pass `nan_as_null=False` 310 | to keep the previous behaviour) (#190). 311 | 312 | ### Improvements 313 | 314 | - It is now possible to pass GDAL's dataset creation options in addition 315 | to layer creation options in `pyogrio.write_dataframe` (#189). 316 | - When specifying a subset of `columns` to read, unnecessary IO or parsing 317 | is now avoided (#195). 318 | 319 | ### Packaging 320 | 321 | - The GDAL library included in the wheels is updated from 3.4 to GDAL 3.6.2, 322 | and is now built with GEOS and sqlite with rtree support enabled 323 | (which allows writing a spatial index for GeoPackage). 324 | - Wheels are now available for Python 3.11. 325 | - Wheels are now available for MacOS arm64. 326 | 327 | ## 0.4.2 (2022-10-06) 328 | 329 | ### Improvements 330 | 331 | - new `get_gdal_data_path()` utility funtion to check the path of the data 332 | directory detected by GDAL (#160) 333 | 334 | ### Bug fixes 335 | 336 | - register GDAL drivers during initial import of pyogrio (#145) 337 | - support writing "not a time" (NaT) values in a datetime column (#146) 338 | - fixes an error when reading GPKG with bbox filter (#150) 339 | - properly raises error when invalid where clause is used on a GPKG (#150) 340 | - avoid duplicate count of available features (#151) 341 | 342 | ## 0.4.1 (2022-07-25) 343 | 344 | ### Bug fixes 345 | 346 | - use user-provided `encoding` when reading files instead of using default 347 | encoding of data source type (#139) 348 | - always convert curve or surface geometry types to linear geometry types, 349 | such as lines or polygons (#140) 350 | 351 | ## 0.4.0 (2022-06-20) 352 | 353 | ### Major enhancements 354 | 355 | - support for reading from file-like objects and in-memory buffers (#25) 356 | - index of GeoDataFrame created by `read_dataframe` can now optionally be set 357 | to the FID of the features that are read, as `int64` dtype. Note that some 358 | drivers start FID numbering at 0 whereas others start numbering at 1. 359 | - generalize check for VSI files from `/vsizip` to `/vsi` (#29) 360 | - add dtype for each field to `read_info` (#30) 361 | - support writing empty GeoDataFrames (#38) 362 | - support URI schemes (`zip://`, `s3://`) (#43) 363 | - add keyword to promote mixed singular/multi geometry column to multi geometry type (#56) 364 | - Python wheels built for Windows, MacOS (x86_64), and Linux (x86_64) (#49, #55, #57, #61, #63) 365 | - automatically prefix zip files with URI scheme (#68) 366 | - support use of a sql statement in read_dataframe (#70) 367 | - correctly write geometry type for layer when dataset has multiple geometry types (#82) 368 | - support reading `bool`, `int16`, `float32` into correct dtypes (#83) 369 | - add `geometry_type` to `write_dataframe` to set geometry type for layer (#85) 370 | - Use certifi to set `GDAL_CURL_CA_BUNDLE` / `PROJ_CURL_CA_BUNDLE` defaults (#97) 371 | - automatically detect driver for `.geojson`, `.geojsonl` and `.geojsons` files (#101) 372 | - read DateTime fields with millisecond accuracy (#111) 373 | - support writing object columns with np.nan values (#118) 374 | - add support to write object columns that contain types different than string (#125) 375 | - support writing datetime columns (#120) 376 | - support for writing missing (null) geometries (#59) 377 | 378 | ### Breaking changes 379 | 380 | - `read` now also returns an optional FIDs ndarray in addition to meta, 381 | geometries, and fields; this is the 2nd item in the returned tuple. 382 | 383 | ### Potentially breaking changes 384 | 385 | - Consolidated error handling to better use GDAL error messages and specific 386 | exception classes (#39). Note that this is a breaking change only if you are 387 | relying on specific error classes to be emitted. 388 | - by default, writing GeoDataFrames with mixed singular and multi geometry 389 | types will automatically promote to the multi type if the driver does not 390 | support mixed geometry types (e.g., `FGB`, though it can write mixed geometry 391 | types if `geometry_type` is set to `"Unknown"`) 392 | - the geometry type of datasets with multiple geometry types will be set to 393 | `"Unknown"` unless overridden using `geometry_type`. Note: 394 | `"Unknown"` may be ignored by some drivers (e.g., shapefile) 395 | 396 | ### Bug fixes 397 | 398 | - use dtype `object` instead of `numpy.object` to eliminate deprecation warnings (#34) 399 | - raise error if layer cannot be opened (#35) 400 | - fix passing gdal creation parameters in `write_dataframe` (#62) 401 | - fix passing kwargs to GDAL in `write_dataframe` (#67) 402 | 403 | ### Changes from 0.4.0a1 404 | 405 | - `layer_geometry_type` introduced in 0.4.0a1 was renamed to `geometry_type` for consistency 406 | 407 | ### Contributors 408 | 409 | People with a “+” by their names contributed a patch for the first time. 410 | 411 | - Brendan Ward 412 | - Joris Van den Bossche 413 | - Martin Fleischmann 414 | - Pieter Roggemans + 415 | - Wei Ji Leong + 416 | 417 | ## 0.3.0 (2021-12-22) 418 | 419 | ### Major enhancements 420 | 421 | - Auto-discovery of `GDAL_VERSION` on Windows, if `gdalinfo.exe` is discoverable 422 | on the `PATH`. 423 | - Addition of `read_bounds` function to read the bounds of each feature. 424 | - Addition of a `fids` keyword to `read` and `read_dataframe` to selectively 425 | read features based on a list of the FIDs. 426 | 427 | ## 0.2.0 (2021-04-02) 428 | 429 | ### Major enhancements 430 | 431 | - initial support for building on Windows. 432 | - Windows: enabled search for GDAL dll directory for Python >= 3.8. 433 | - Addition of `where` parameter to `read` and `read_dataframe` to enable GDAL-compatible 434 | SQL WHERE queries to filter data sources. 435 | - Addition of `force_2d` parameter to `read` and `read_dataframe` to force 436 | coordinates to always be returned as 2 dimensional, dropping the 3rd dimension 437 | if present. 438 | - Addition of `bbox` parameter to `read` and `read_dataframe` to select only 439 | the features in the dataset that intersect the bbox. 440 | - Addition of `set_gdal_config_options` to set GDAL configuration options and 441 | `get_gdal_config_option` to get a GDAL configuration option. 442 | - Addition of `pyogrio.__gdal_version__` attribute to return GDAL version tuple 443 | and `__gdal_version_string__` to return string version. 444 | - Addition of `list_drivers` function to list all available GDAL drivers. 445 | - Addition of read and write support for `FlatGeobuf` driver when available in GDAL. 446 | 447 | ## 0.1.0 (2020-08-28) 448 | 449 | ### Major enhancements 450 | 451 | - Addition of `list_layers` to list layers in a data source. 452 | - Addition of `read_info` to read basic information for a layer. 453 | - Addition of `read_dataframe` to read from supported file formats (Shapefile, GeoPackage, GeoJSON) into GeoDataFrames. 454 | - Addition of `write_dataframe` to write GeoDataFrames into supported file formats. 455 | --------------------------------------------------------------------------------