├── .github └── workflows │ ├── lint.yml │ ├── release.yml │ └── scripts.yml ├── .gitignore ├── .pre-commit-config.yaml ├── LICENSE ├── README.md ├── RELEASE.md ├── examples ├── example-2.R ├── example.csv ├── example.parquet ├── example_metadata.json └── example_metadata_point.json ├── format-specs ├── compatible-parquet.md ├── geoparquet.md ├── parquet-raster.md └── schema.json ├── scripts ├── README.md ├── generate_example.py ├── pyproject.toml ├── test_json_schema.py ├── update_example_schemas.py ├── uv.lock └── write_nz_building_outline.py └── test_data ├── data-linestring-encoding_native.parquet ├── data-linestring-encoding_wkb.parquet ├── data-linestring-wkt.csv ├── data-multilinestring-encoding_native.parquet ├── data-multilinestring-encoding_wkb.parquet ├── data-multilinestring-wkt.csv ├── data-multipoint-encoding_native.parquet ├── data-multipoint-encoding_wkb.parquet ├── data-multipoint-wkt.csv ├── data-multipolygon-encoding_native.parquet ├── data-multipolygon-encoding_wkb.parquet ├── data-multipolygon-wkt.csv ├── data-point-encoding_native.parquet ├── data-point-encoding_wkb.parquet ├── data-point-wkt.csv ├── data-polygon-encoding_native.parquet ├── data-polygon-encoding_wkb.parquet ├── data-polygon-wkt.csv └── generate_test_data.py /.github/workflows/lint.yml: -------------------------------------------------------------------------------- 1 | name: Lint 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | pull_request: 8 | 9 | jobs: 10 | lint: 11 | runs-on: ubuntu-latest 12 | steps: 13 | - uses: actions/checkout@v4 14 | 15 | - uses: actions/setup-python@v4 16 | with: 17 | python-version: '3.x' 18 | 19 | - name: Install dependencies 20 | run: | 21 | python -m pip install --upgrade pip 22 | python -m pip install pre-commit 23 | 24 | - name: Run pre-commit 25 | run: pre-commit run --all-files 26 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: Release 2 | 3 | on: 4 | push: 5 | tags: 6 | - 'v*.*.*' 7 | 8 | jobs: 9 | release: 10 | runs-on: ubuntu-latest 11 | steps: 12 | - uses: actions/checkout@v4 13 | - name: Draft Release 14 | uses: softprops/action-gh-release@v1 15 | with: 16 | draft: true 17 | generate_release_notes: true 18 | files: | 19 | format-specs/geoparquet.md 20 | format-specs/schema.json 21 | -------------------------------------------------------------------------------- /.github/workflows/scripts.yml: -------------------------------------------------------------------------------- 1 | name: Scripts 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | pull_request: 8 | 9 | jobs: 10 | 11 | test-json-metadata: 12 | runs-on: ubuntu-latest 13 | steps: 14 | - uses: actions/checkout@v4 15 | - uses: actions/setup-python@v4 16 | with: 17 | python-version: '3.x' 18 | 19 | - uses: astral-sh/setup-uv@v5 20 | 21 | - name: Run scripts 22 | run: | 23 | cd scripts 24 | uv run pytest test_json_schema.py -v 25 | uv run python generate_example.py 26 | uv run python update_example_schemas.py 27 | cd ../examples 28 | # Assert that the version number and file metadata are up to date 29 | # Allow for differences in example.parquet 30 | git restore example.parquet 31 | git diff 32 | test -z "$(git status --porcelain)" 33 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /scripts/data/ 2 | /scripts/__pycache__/ 3 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | # See https://pre-commit.com for more information 2 | # See https://pre-commit.com/hooks.html for more hooks 3 | 4 | # Default to Python 3 5 | default_language_version: 6 | python: python3 7 | 8 | # Optionally both commit and push 9 | default_stages: [commit] 10 | 11 | # Regex for files to exclude 12 | # Don't lint the generated JSON metadata files 13 | exclude: "examples/.*json" 14 | 15 | repos: 16 | - repo: https://github.com/pre-commit/pre-commit-hooks 17 | rev: v4.0.1 18 | hooks: 19 | - id: trailing-whitespace 20 | - id: end-of-file-fixer 21 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright 2022 Open Geospatial Consortium 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # GeoParquet 2 | 3 | ## About 4 | 5 | This repository defines a [specification](https://geoparquet.org/releases/) for how to store geospatial [vector data](https://gisgeography.com/spatial-data-types-vector-raster/) (point, lines, polygons) in [Apache Parquet](https://parquet.apache.org/), a popular columnar storage format for tabular data - see [this vendor explanation](https://databricks.com/glossary/what-is-parquet) for more on what that means. Our goal is to standardize how geospatial data is represented in Parquet to further geospatial interoperability among tools using Parquet today, and hopefully help push forward what's possible with 'cloud-native geospatial' workflows. There are now more than 20 different tools and libraries in 6 different languages that support GeoParquet, you can learn more at [geoparquet.org](https://geoparquet.org). 6 | 7 | Early contributors include developers from GeoPandas, GeoTrellis, OpenLayers, Vis.gl, Voltron Data, Microsoft, CARTO, Azavea, Planet & Unfolded. 8 | Anyone is welcome to join the project, by building implementations, trying it out, giving feedback through issues and contributing to the spec via pull requests. 9 | Initial work started in the [geo-arrow-spec](https://github.com/geoarrow/geoarrow) GeoPandas repository, and that will continue on 10 | Arrow work in a compatible way, with this specification focused solely on Parquet. We are in the process of becoming an [OGC](https://ogc.org) official 11 | [Standards Working Group](https://portal.ogc.org/files/103450) and are on the path to be a full OGC standard. 12 | 13 | **The latest [stable specification](https://geoparquet.org/releases/v1.1.0/) and [JSON schema](https://geoparquet.org/releases/v1.1.0/schema.json) are published at [geoparquet.org/releases/](https://geoparquet.org/releases/).** 14 | 15 | **The community has agreed on this release, but it is still pending OGC approval.** We are currently working on the process to get it officially OGC approved as soon as possible. The OGC candidate Standard is at [https://docs.ogc.org/DRAFTS/24-013.html](https://docs.ogc.org/DRAFTS/24-013.html). The candidate Standard remains in draft form until it is approved as a Standard by the OGC Membership. Released versions of GeoParquet will not be changed, so if changes are needed for OGC approval, it will be released with a new version number. 16 | 17 | The 'dev' versions of the spec are available in this repo: 18 | 19 | - [**Specification**](format-specs/geoparquet.md) (dev version - not stable, go to the [stable specification](https://geoparquet.org/releases/v1.1.0/) instead) 20 | - [JSON Schema](format-specs/schema.json) 21 | - [Examples](examples/) 22 | 23 | ## Validating GeoParquet 24 | 25 | There are two tools that validate the metadata and the actual data. It is recommended to use one of them to ensure any GeoParquet you produce or are given is completely valid according to the specification: 26 | 27 | * **[GPQ](https://github.com/planetlabs/gpq)** - the `validate` command generates a report with `gpq validate example.parquet`. 28 | * **[GDAL/OGR Validation Script](https://gdal.org/drivers/vector/parquet.html#validation-script)** - a Python script that can check compliance with `python3 validate_geoparquet.py --check-data my_geo.parquet` 29 | 30 | ## Goals 31 | 32 | There are a few core goals driving the initial development. 33 | 34 | * **Establish a great geospatial format for workflows that excel with columnar data** - Most data science and 'business intelligence' workflows have been moving 35 | towards columnar data, but current geospatial formats can not be as efficiently loaded as other data. So we aim to bring geospatial data best practices to one 36 | of the most popular formats, and hopefully establish a good pattern for how to do so. 37 | * **Introduce columnar data formats to the geospatial world** - And most of the geospatial world is not yet benefitting from all the breakthroughs in data analysis 38 | in the broader IT world, so we are excited to enable interesting geospatial analysis with a wider range of tools. 39 | * **Enable interoperability among cloud data warehouses** - BigQuery, Snowflake, Redshift and others all support spatial operations but importing and exporting data 40 | with existing formats can be problematic. All support and often recommend Parquet, so defining a solid GeoParquet can help enable interoperability. 41 | * **Persist geospatial data from Apache Arrow** - GeoParquet is developed in parallel with a [GeoArrow spec](https://github.com/geoarrow/geoarrow), to 42 | enable cross-language in-memory analytics of geospatial information with Arrow. Parquet is already well-supported by Arrow as the key on disk persistance format. 43 | 44 | And our broader goal is to innovate with 'cloud-native vector' providing a stable base to try out new ideas for cloud-native & streaming workflows. 45 | 46 | ## Features 47 | 48 | A quick overview of what GeoParquet supports (or at least plans to support). 49 | 50 | * **Multiple spatial reference systems** - Many tools will use GeoParquet for high-performance analysis, so it's important to be able to use data in its 51 | native projection. But we do provide a clear default recommendation to better enable interoperability, giving a clear target for implementations that don't want to 52 | worry about projections. 53 | * **Multiple geometry columns** - There is a default geometry column, but additional geometry columns can be included. 54 | * **Great compression / small files** - Parquet is designed to compress very well, so data benefits by taking up less disk space & being more efficient over 55 | the network. 56 | * **Work with both planar and spherical coordinates** - Most cloud data warehouses support spherical coordinates, and so GeoParquet aims to help persist those 57 | and be clear about what is supported. 58 | * **Great at read-heavy analytic workflows** - Columnar formats enable cheap reading of a subset of columns, and Parquet in particular enables efficient filtering 59 | of chunks based on column statistics, so the format will perform well in a variety of modern analytic workflows. 60 | * **Support for data partitioning** - Parquet has a nice ability to partition data into different files for efficiency, and we aim to enable geospatial partitions. 61 | 62 | It should be noted what GeoParquet is less good for. The biggest one is that it is not a good choice for write-heavy interactions. A row-based format 63 | will work much better if it is backing a system that is constantly updating the data and adding new data. 64 | 65 | ## Versioning 66 | 67 | As of version 1.0 the specification follows [Semantic Versioning](https://semver.org/), so at that point any breaking change will require the spec to go to 2.0.0. 68 | 69 | ## Current Implementations & Examples 70 | 71 | Examples of GeoParquet files following the current spec can be found in the [examples/](examples/) folder. For information on all the tools and libraries implementing GeoParquet, as well as sample data, see the [implementations section](https://geoparquet.org/#implementations) of the website. 72 | -------------------------------------------------------------------------------- /RELEASE.md: -------------------------------------------------------------------------------- 1 | # Release Procedure 2 | 3 | Please read through the [Release Procedure](https://github.com/opengeospatial/geoparquet/wiki/Release-Process) steps on the wiki. 4 | 5 | After going through a few releases and ideally streamlining the process, you'll likely find those steps documented in this file instead of the wiki. 6 | -------------------------------------------------------------------------------- /examples/example-2.R: -------------------------------------------------------------------------------- 1 | library(geoarrow) 2 | library(ggplot2) 3 | nc <- sf::read_sf(system.file("shape/nc.shp", package = "sf")) 4 | write_geoparquet(nc, "nc.parquet") 5 | nc_pq<-read_geoparquet("nc.parquet") 6 | 7 | nc_pq %>% 8 | geoarrow_collect_sf()%>% 9 | ggplot() + 10 | geom_sf() 11 | -------------------------------------------------------------------------------- /examples/example.csv: -------------------------------------------------------------------------------- 1 | pop_est,continent,name,iso_a3,gdp_md_est,geometry 2 | 889953.0,Oceania,Fiji,FJI,5496,"MULTIPOLYGON (((180 -16.067132663642447, 180 -16.555216566639196, 179.36414266196414 -16.801354076946883, 178.72505936299711 -17.01204167436804, 178.59683859511713 -16.639150000000004, 179.0966093629971 -16.433984277547403, 179.4135093629971 -16.379054277547404, 180 -16.067132663642447)), ((178.12557 -17.50481, 178.3736 -17.33992, 178.71806 -17.62846, 178.55271 -18.15059, 177.93266000000003 -18.28799, 177.38146 -18.16432, 177.28504 -17.72465, 177.67087 -17.381140000000002, 178.12557 -17.50481)), ((-179.79332010904864 -16.020882256741224, -179.9173693847653 -16.501783135649397, -180 -16.555216566639196, -180 -16.067132663642447, -179.79332010904864 -16.020882256741224)))" 3 | 58005463.0,Africa,Tanzania,TZA,63177,"POLYGON ((33.90371119710453 -0.9500000000000001, 34.07261999999997 -1.0598199999999451, 37.69868999999994 -3.0969899999999484, 37.7669 -3.6771200000000004, 39.20222 -4.67677, 38.74053999999995 -5.9089499999999475, 38.79977000000008 -6.475660000000005, 39.44 -6.839999999999861, 39.47000000000014 -7.099999999999966, 39.19468999999998 -7.703899999999976, 39.25203000000005 -8.00780999999995, 39.18652000000009 -8.48550999999992, 39.53574000000009 -9.112369999999885, 39.94960000000003 -10.098400000000026, 40.316586229110854 -10.317097752817492, 40.31659000000002 -10.317099999999868, 39.52099999999996 -10.89688000000001, 38.42755659358775 -11.285202325081656, 37.827639999999974 -11.26878999999991, 37.471289999999954 -11.568759999999997, 36.775150994622805 -11.594537448780805, 36.51408165868426 -11.720938002166735, 35.31239790216904 -11.439146416879147, 34.55998904799935 -11.520020033415925, 34.27999999999997 -10.160000000000025, 33.940837724096525 -9.693673841980285, 33.73972000000009 -9.417149999999992, 32.75937544122132 -9.23059905358906, 32.19186486179194 -8.930358981973257, 31.556348097466497 -8.762048841998642, 31.15775133695005 -8.594578747317366, 30.740009731422095 -8.34000593035372, 30.74001549655179 -8.340007419470915, 30.199996779101696 -7.079980970898163, 29.620032179490014 -6.520015150583426, 29.419992710088167 -5.939998874539434, 29.519986606572928 -5.419978936386315, 29.339997592900346 -4.4999834122940925, 29.753512404099865 -4.452389418153302, 30.11632000000003 -4.090120000000013, 30.505539999999996 -3.5685799999999404, 30.752240000000086 -3.3593099999999936, 30.743010000000027 -3.034309999999948, 30.527660000000026 -2.807619999999986, 30.469673645761223 -2.41385475710134, 30.469670000000008 -2.4138299999999617, 30.75830895358311 -2.2872502579883687, 30.816134881317712 -1.6989140763453887, 30.419104852019245 -1.1346591121504161, 30.769860000000108 -1.0145499999999856, 31.866170000000068 -1.0273599999999306, 33.90371119710453 -0.9500000000000001))" 4 | 603253.0,Africa,W. Sahara,ESH,907,"POLYGON ((-8.665589565454809 27.656425889592356, -8.665124477564191 27.589479071558227, -8.684399786809053 27.395744126896005, -8.6872936670174 25.881056219988906, -11.96941891117116 25.933352769468268, -11.937224493853321 23.374594224536168, -12.874221564169575 23.284832261645178, -13.118754441774712 22.771220201096256, -12.929101935263532 21.327070624267563, -16.845193650773993 21.33332347257488, -17.06342322434257 20.999752102130827, -17.02042843267577 21.422310288981578, -17.00296179856109 21.420734157796577, -14.750954555713534 21.500600083903663, -14.630832688851072 21.860939846274903, -14.221167771857253 22.31016307218816, -13.891110398809047 23.691009019459305, -12.50096269372537 24.7701162785782, -12.03075883630163 26.030866197203068, -11.718219773800357 26.104091701760623, -11.392554897497007 26.883423977154393, -10.551262579785273 26.990807603456886, -10.189424200877582 26.860944729107405, -9.735343390328879 26.860944729107405, -9.41303748212448 27.088476060488574, -8.794883999049077 27.120696316022507, -8.817828334986672 27.656425889592356, -8.665589565454809 27.656425889592356))" 5 | 37589262.0,North America,Canada,CAN,1736425,"MULTIPOLYGON (((-122.84000000000003 49.000000000000114, -122.97421000000001 49.00253777777778, -124.91024 49.98456, -125.62461 50.416560000000004, -127.43561000000001 50.83061, -127.99276 51.71583, -127.85032 52.32961, -129.12979 52.75538, -129.30523 53.561589999999995, -130.51497 54.28757, -130.53610895273684 54.80275447679924, -130.53611 54.802780000000006, -129.98 55.285000000000004, -130.00778000000003 55.915830000000085, -131.70781 56.55212, -132.73042 57.692890000000006, -133.35556000000003 58.41028000000001, -134.27111000000002 58.86111000000005, -134.94500000000005 59.2705600000001, -135.47583 59.787780000000005, -136.47972000000004 59.46389000000005, -137.4525 58.905, -138.34089 59.562110000000004, -139.03900000000002 60, -140.013 60.27682000000001, -140.99778 60.30639000000001, -140.9925 66.00003000000001, -140.986 69.712, -140.98598761037601 69.71199839952635, -139.12052 69.47102, -137.54636000000002 68.99002, -136.50358 68.89804, -135.62576 69.31512000000001, -134.41464000000002 69.62743, -132.92925000000002 69.50534, -131.43135999999998 69.94451, -129.79471 70.19369, -129.10773 69.77927000000001, -128.36156 70.01286, -128.13817 70.48384, -127.44712000000001 70.37721, -125.75632000000002 69.48058, -124.42483 70.1584, -124.28968 69.39968999999999, -123.06108 69.56372, -122.6835 69.85553, -121.47226 69.79778, -119.94288 69.37786, -117.60268 69.01128, -116.22643 68.84151, -115.24690000000001 68.90591, -113.89793999999999 68.3989, -115.30489 67.90261000000001, -113.49727 67.68815000000001, -110.798 67.80611999999999, -109.94619 67.98104000000001, -108.8802 67.38144, -107.79239 67.88736, -108.81299 68.31164, -108.16721000000001 68.65392, -106.95 68.7, -106.15 68.8, -105.34282000000002 68.56122, -104.33791000000001 68.018, -103.22115000000001 68.09775, -101.45433 67.64689, -99.90195 67.80566, -98.4432 67.78165, -98.5586 68.40394, -97.66948000000001 68.57864000000001, -96.11991 68.23939, -96.12588 67.29338, -95.48943 68.0907, -94.685 68.06383, -94.23282000000002 69.06903000000001, -95.30408 69.68571, -96.47131 70.08976, -96.39115 71.19482, -95.2088 71.92053, -93.88997 71.76015, -92.87818 71.31869, -91.51964000000001 70.19129000000001, -92.40692000000001 69.69997000000001, -90.5471 69.49766, -90.55151000000001 68.47499, -89.21515 69.25873, -88.01966 68.61508, -88.31748999999999 67.87338000000001, -87.35017 67.19872, -86.30606999999999 67.92146, -85.57664 68.78456, -85.52197 69.88211, -84.10081000000001 69.80539, -82.62258 69.65826, -81.28043000000001 69.16202000000001, -81.22019999999999 68.66567, -81.96436000000001 68.13253, -81.25928 67.59716, -81.38653000000001 67.11078, -83.34456 66.41154, -84.73542 66.2573, -85.76943 66.55833, -86.06760000000001 66.05625, -87.03143 65.21297, -87.32324 64.77563, -88.48296 64.09897000000001, -89.91444 64.03273, -90.70398 63.610170000000004, -90.77004000000001 62.960210000000004, -91.93342 62.83508, -93.15698 62.02469000000001, -94.24153 60.89865, -94.62930999999999 60.11021, -94.6846 58.94882, -93.21502000000001 58.78212, -92.76462000000001 57.84571, -92.29702999999999 57.08709, -90.89769 57.28468, -89.03953 56.85172, -88.03978000000001 56.47162, -87.32421 55.999140000000004, -86.07121 55.72383, -85.01181000000001 55.302600000000005, -83.36055 55.24489, -82.27285 55.14832, -82.43620000000001 54.282270000000004, -82.12502 53.27703, -81.40075 52.157880000000006, -79.91289 51.208420000000004, -79.14301 51.533930000000005, -78.60191 52.56208, -79.12421 54.14145, -79.82958 54.66772, -78.22874 55.136449999999996, -77.0956 55.83741, -76.54137 56.53423000000001, -76.62319000000001 57.20263, -77.30226 58.05209, -78.51688 58.80458, -77.33676 59.852610000000006, -77.77272 60.75788000000001, -78.10687 62.31964000000001, -77.41067 62.55053, -75.69621000000001 62.2784, -74.6682 62.181110000000004, -73.83988000000001 62.4438, -72.90853 62.10507, -71.67708 61.52535, -71.37369000000001 61.137170000000005, -69.59042 61.06141, -69.62033 60.221250000000005, -69.28790000000001 58.95736, -68.37455 58.80106, -67.64976 58.21206, -66.20178 58.76731, -65.24517 59.87071, -64.58352000000001 60.33558, -63.804750000000006 59.442600000000006, -62.502359999999996 58.16708, -61.396550000000005 56.96745000000001, -61.798660000000005 56.33945, -60.46853 55.775479999999995, -59.56962 55.20407, -57.97508 54.94549000000001, -57.3332 54.6265, -56.93689 53.780319999999996, -56.15811 53.647490000000005, -55.75632 53.27036, -55.68338 52.146640000000005, -56.40916000000001 51.770700000000005, -57.12691 51.419720000000005, -58.77482 51.0643, -60.03309000000001 50.24277, -61.72366 50.08046, -63.86251 50.29099, -65.36331 50.2982, -66.39905 50.228970000000004, -67.23631 49.511559999999996, -68.51114 49.068360000000006, -69.95362 47.74488, -71.10458 46.82171, -70.25522 46.986059999999995, -68.65 48.3, -66.55243 49.1331, -65.05626 49.232780000000005, -64.17099 48.74248, -65.11545000000001 48.07085, -64.79854 46.99297, -64.47219 46.238490000000006, -63.17329000000001 45.73902, -61.520720000000004 45.883770000000005, -60.518150000000006 47.00793, -60.448600000000006 46.28264, -59.80287 45.9204, -61.03988 45.265249999999995, -63.254709999999996 44.67014, -64.24656 44.265530000000005, -65.36406000000001 43.54523, -66.1234 43.61867, -66.16173 44.46512, -64.42549 45.29204, -66.02605000000001 45.25931, -67.13741 45.13753, -67.79134 45.70281000000001, -67.79046000000001 47.066359999999996, -68.23444 47.354859999999974, -68.90500000000003 47.18500000000006, -69.237216 47.447781, -69.99997 46.69307, -70.305 45.915, -70.66 45.46, -71.08482000000004 45.30524000000014, -71.405 45.254999999999995, -71.50506 45.0082, -73.34783 45.00738, -74.86700000000002 45.000480000000096, -75.31821000000001 44.81645, -76.375 44.09631, -76.50000000000001 44.01845889375865, -76.82003414580558 43.628784288093755, -77.7378850979577 43.62905558936328, -78.72027991404235 43.62508942318493, -79.17167355011186 43.46633942318426, -79.01 43.27, -78.92 42.964999999999996, -78.93936214874375 42.86361135514798, -80.24744767934794 42.36619985612255, -81.27774654816716 42.209025987306816, -82.4392777167916 41.675105088867326, -82.69008928092023 41.675105088867326, -83.029810146807 41.83279572200598, -83.14199968131264 41.975681057292874, -83.12 42.08, -82.9 42.43, -82.42999999999999 42.980000000000004, -82.13764238150395 43.57108755143997, -82.33776312543114 44.440000000000055, -82.55092464875821 45.34751658790543, -83.59285071484311 45.81689362241252, -83.46955074739469 45.994686387712534, -83.61613094759059 46.116926988299014, -83.89076534700574 46.116926988299014, -84.0918512641615 46.27541860613826, -84.1421195136734 46.51222585711571, -84.33670000000001 46.408770000000004, -84.60490000000004 46.439599999999984, -84.54374874544584 46.538684190449146, -84.77923824739992 46.63710195574902, -84.8760798815149 46.90008331968238, -85.65236324740341 47.22021881773051, -86.46199083122826 47.553338019392, -87.43979262330028 47.94, -88.37811418328671 48.302917588893706, -89.27291744663665 48.01980825458281, -89.60000000000002 48.010000000000105, -90.83 48.27, -91.64 48.14, -92.61000000000001 48.44999999999993, -93.63087000000002 48.609260000000006, -94.32914000000001 48.67074, -94.64 48.84, -94.81758000000002 49.38905, -95.15609 49.38425000000001, -95.15906950917206 49, -97.2287200000048 49.0007, -100.65000000000003 49.000000000000114, -104.04826000000003 48.99986000000007, -107.05000000000001 49, -110.05000000000001 49, -113 49, -116.04818 49, -117.03121 49, -120 49.000000000000114, -122.84000000000003 49.000000000000114)), ((-83.99367000000001 62.452799999999996, -83.25048 62.91409, -81.87699 62.90458, -81.89825 62.7108, -83.06857000000001 62.159220000000005, -83.77462000000001 62.18231, -83.99367000000001 62.452799999999996)), ((-79.77583312988281 72.8029022216797, -80.87609863281251 73.33318328857422, -80.83388519287111 73.69318389892578, -80.35305786132812 73.75971984863281, -78.06443786621094 73.65193176269531, -76.34 73.10268498995305, -76.25140380859375 72.82638549804688, -77.31443786621094 72.85554504394531, -78.39167022705078 72.87665557861328, -79.4862518310547 72.74220275878906, -79.77583312988281 72.8029022216797)), ((-80.315395 62.08556500000001, -79.92939 62.3856, -79.52002 62.363710000000005, -79.26582 62.158674999999995, -79.65752 61.63308, -80.09956000000001 61.71810000000001, -80.36215 62.016490000000005, -80.315395 62.08556500000001)), ((-93.61275590694046 74.97999726022438, -94.15690873897391 74.59234650338688, -95.60868058956564 74.66686391875176, -96.82093217648455 74.92762319609658, -96.28858740922982 75.37782827422338, -94.85081987178917 75.64721751576089, -93.97774654821797 75.29648956979595, -93.61275590694046 74.97999726022438)), ((-93.84000301794399 77.51999726023455, -94.29560828324529 77.49134267852868, -96.16965410031007 77.55511139597685, -96.43630449093614 77.83462921824362, -94.42257727738641 77.820004787905, -93.7206562975659 77.63433136668031, -93.84000301794399 77.51999726023455)), ((-96.75439876990876 78.76581268992702, -95.5592779202946 78.41831452098033, -95.83029496944934 78.05694122996324, -97.30984290239799 77.85059723582181, -98.12428931353404 78.08285696075761, -98.55286780474668 78.45810537384507, -98.63198442258553 78.87193024363837, -97.33723141151266 78.83198436147676, -96.75439876990876 78.76581268992702)), ((-88.15035030796028 74.39230703398503, -89.7647220527584 74.51555532500116, -92.42244096552946 74.83775788034099, -92.76828548864282 75.38681997344214, -92.88990597204175 75.88265534128267, -93.89382402217599 76.31924367950056, -95.9624574450358 76.4413809272224, -97.1213789538295 76.7510777859476, -96.74512285031237 77.16138865834507, -94.68408586299944 77.09787832305837, -93.57392106807313 76.77629588490605, -91.6050231595366 76.7785179714946, -90.7418458727493 76.44959747995681, -90.96966142450802 76.07401317005947, -89.82223792189926 75.84777374948565, -89.18708289259985 75.61016551380762, -87.83827633334965 75.56618886992725, -86.37919226758864 75.4824213731821, -84.78962521029058 75.69920400664653, -82.75344458691006 75.78431509063124, -81.12853084992436 75.71398346628199, -80.05751095245915 75.33684886341591, -79.83393286814837 74.92312734648716, -80.45777075877587 74.65730377877777, -81.94884253612557 74.44245901152432, -83.22889360221143 74.56402781849094, -86.09745235873332 74.41003205026117, -88.15035030796028 74.39230703398503)), ((-111.26444332563088 78.15295604116154, -109.85445187054711 77.99632477488488, -110.18693803591302 77.69701487905034, -112.0511911690585 77.4092288276169, -113.53427893761912 77.73220652944111, -112.7245867582539 78.05105011668196, -111.26444332563088 78.15295604116154)), ((-110.96366065147602 78.8044408230652, -109.6631457182026 78.60197256134565, -110.88131425661892 78.40691986765997, -112.54209143761516 78.4079017198735, -112.52589087609164 78.55055451121522, -111.5000103422334 78.8499935981305, -110.96366065147602 78.8044408230652)), ((-55.600218268442056 51.31707469339794, -56.13403581401709 50.68700979267928, -56.795881720595276 49.81230866149089, -56.14310502788433 50.15011749938286, -55.471492275603 49.93581533466846, -55.82240108908096 49.58712860777905, -54.935142584845636 49.3130109726868, -54.473775397343786 49.556691189159125, -53.47654944519137 49.24913890237404, -53.786013759971254 48.516780503933624, -53.08613399922626 48.68780365660358, -52.958648240762216 48.15716421161447, -52.64809872090421 47.53554840757552, -53.069158291218386 46.65549876564492, -53.521456264853 46.61829173439477, -54.17893551290251 46.80706574155698, -53.9618686590605 47.62520701760193, -54.24048214376214 47.752279364607645, -55.40077307801157 46.884993801453135, -55.99748084168583 46.919720363953275, -55.29121904155279 47.38956248635099, -56.250798712780586 47.632545070987376, -57.32522925477708 47.57280711525797, -59.26601518414682 47.60334788674247, -59.419494188053676 47.899453843774886, -58.79658647320744 48.25152537697942, -59.23162451845657 48.52318838153781, -58.3918049790652 49.12558055276418, -57.35868974468606 50.71827403421587, -56.738650071832026 51.28743825947855, -55.87097693543532 51.63209422464921, -55.40697424988659 51.5882726100657, -55.600218268442056 51.31707469339794)), ((-83.88262630891977 65.10961782496354, -82.78757687043883 64.76669302027467, -81.6420137193926 64.45513580998697, -81.55344031444432 63.97960928003714, -80.81736121287886 64.057485663501, -80.10345130076664 63.72598135034862, -80.99101986359572 63.41124603947496, -82.54717810741704 63.65172231714521, -83.10879757356511 64.10187571883971, -84.10041663281388 63.569711819098, -85.52340471061905 63.052379055424055, -85.8667687649824 63.63725291610349, -87.22198320183678 63.54123810490519, -86.35275977247133 64.0358332383707, -86.2248864407651 64.82291697860823, -85.88384782585486 65.7387783881171, -85.1613079495499 65.6572846543928, -84.97576371940592 65.21751821558898, -84.4640120104195 65.37177236598022, -83.88262630891977 65.10961782496354)), ((-78.77063859731078 72.35217316353418, -77.8246239895596 72.74961660429098, -75.60584469267573 72.2436784939374, -74.228616095665 71.76714427355789, -74.09914079455771 71.33084015571758, -72.24222571479768 71.55692454699452, -71.20001542833518 70.92001251899718, -68.7860542466849 70.52502370877427, -67.91497046575694 70.12194753689765, -66.9690333726542 69.18608734809182, -68.8051228502006 68.72019847276444, -66.4498660956339 68.06716339789203, -64.86231441919524 67.84753856065159, -63.424934454996794 66.92847321234059, -61.851981370680605 66.86212067327783, -62.16317684594226 66.16025136988962, -63.918444383384184 64.9986685248329, -65.14886023625368 65.42603261988667, -66.72121904159852 66.38804108343219, -68.015016038674 66.26272573512439, -68.1412874009792 65.68978913030439, -67.08964616562342 65.10845510523696, -65.73208045109976 64.64840566675856, -65.32016760930125 64.38273712834605, -64.66940629744968 63.392926744227495, -65.01380388045888 62.67418508569598, -66.27504472519048 62.94509878198612, -68.7831862046927 63.74567007105183, -67.36968075221309 62.88396556258484, -66.32829728866726 62.28007477482201, -66.16556820338015 61.93089712182582, -68.87736650254465 62.330149237712824, -71.02343705919385 62.91070811629588, -72.23537858751902 63.39783600529522, -71.88627844917127 63.67998932560887, -73.37830624051838 64.19396312118384, -74.83441891142263 64.6790756293238, -74.81850257027673 64.38909332951793, -77.70997982452008 64.22954234481678, -78.5559488593542 64.57290639918013, -77.89728105336198 65.30919220647475, -76.01827429879717 65.32696889918314, -73.95979529488268 65.45476471624094, -74.29388342964964 65.81177134872938, -73.94491248238262 66.31057811142666, -72.65116716173942 67.28457550726391, -72.92605994331605 67.72692576768235, -73.31161780464572 68.06943716091287, -74.84330725777684 68.55462718370127, -76.86910091826672 68.89473562283025, -76.22864905465738 69.14776927354741, -77.28736996123715 69.76954010688321, -78.1686339993266 69.82648753526887, -78.95724219431673 70.16688019477543, -79.49245500356366 69.87180776638884, -81.30547095409176 69.74318512641436, -84.94470618359851 69.96663401964442, -87.06000342481789 70.26000112576538, -88.68171322300148 70.4107412787608, -89.51341956252303 70.76203766548095, -88.46772111688082 71.21818553332132, -89.88815121128755 71.22255219184997, -90.20516028518205 72.23507436796079, -89.436576707705 73.12946421985238, -88.40824154331287 73.53788890247121, -85.82615108920098 73.80381582304518, -86.56217851433412 73.15744700793844, -85.77437130404454 72.53412588163387, -84.85011247428822 73.34027822538708, -82.31559017610101 73.7509508328106, -80.60008765330768 72.71654368762417, -80.74894161652443 72.06190664335072, -78.77063859731078 72.35217316353418)), ((-94.50365759965237 74.13490672473922, -92.42001217321173 74.1000251329422, -90.50979285354263 73.85673248971206, -92.00396521682987 72.96624420845852, -93.19629553910026 72.77199249947334, -94.26904659704726 72.02459625923599, -95.40985551632266 72.06188080513458, -96.03374508338244 72.94027680123183, -96.01826799191102 73.43742991809582, -95.49579342322404 73.86241689726417, -94.50365759965237 74.13490672473922)), ((-122.85492448615902 76.11654287383568, -122.85492529360326 76.11654287383568, -121.15753536032824 76.86450755482828, -119.1039389718211 77.51221995717462, -117.570130784966 77.4983189968881, -116.19858659550738 77.6452867703262, -116.33581336145845 76.87696157501061, -117.10605058476882 76.53003184681911, -118.04041215703819 76.48117178008714, -119.89931758688572 76.053213406062, -121.49999507712648 75.90001862253276, -122.85492448615902 76.11654287383568)), ((-132.71000788443126 54.04000931542356, -131.74998958400334 54.12000438090922, -132.049480347351 52.98462148702447, -131.1790425218266 52.180432847698285, -131.57782954982298 52.18237071390928, -132.18042842677852 52.639707139692405, -132.54999243231384 53.100014960332146, -133.05461117875552 53.411468817755406, -133.2396644827927 53.851080227262344, -133.1800040417117 54.169975490935315, -132.71000788443126 54.04000931542356)), ((-105.4922891914932 79.30159393992916, -103.52928239623795 79.16534902619163, -100.8251580472688 78.80046173777872, -100.0601918200522 78.32475434031589, -99.67093909381364 77.90754466420744, -101.30394019245301 78.01898489044486, -102.94980872273302 78.34322866486023, -105.17613277873151 78.3803323432458, -104.21042945027713 78.67742015249176, -105.41958045125853 78.91833567983649, -105.4922891914932 79.30159393992916)), ((-123.51000158755119 48.51001089130341, -124.01289078839955 48.37084625914139, -125.65501277733838 48.8250045843385, -125.95499446679275 49.17999583596759, -126.85000443587185 49.53000031188043, -127.02999344954443 49.81499583597008, -128.0593363043662 49.9949590114266, -128.44458410710214 50.539137681676095, -128.35841365625546 50.77064809834371, -127.30858109602994 50.552573554071955, -126.69500097721235 50.400903225295394, -125.7550066738232 50.29501821552935, -125.4150015875588 49.95000051533259, -124.92076818911934 49.475274970083376, -123.92250870832106 49.06248362893581, -123.51000158755119 48.51001089130341)), ((-121.53787999999997 74.44893000000002, -120.10978 74.24135000000001, -117.55563999999993 74.18576999999993, -116.58442000000002 73.89607000000007, -115.51080999999999 73.47519, -116.76793999999995 73.22291999999999, -119.22000000000003 72.51999999999998, -120.45999999999998 71.82000000000005, -120.45999999999998 71.38360179308756, -123.09218999999996 70.90164000000004, -123.62 71.34000000000009, -125.92894873747338 71.86868846301138, -125.49999999999994 72.29226081179502, -124.80729000000002 73.02255999999994, -123.93999999999994 73.68000000000012, -124.91774999999996 74.29275000000013, -121.53787999999997 74.44893000000002)), ((-107.81943000000001 75.84552000000001, -106.92893000000001 76.01282, -105.881 75.96940000000001, -105.70498 75.47951, -106.31347000000001 75.00527, -109.70000000000002 74.85000000000001, -112.22306999999999 74.41696, -113.74381 74.39427, -113.87135 74.72029, -111.79420999999999 75.16250000000001, -116.31221 75.04343, -117.7104 75.2222, -116.34602000000001 76.19903000000001, -115.40487 76.47887, -112.59056000000001 76.14134, -110.81422 75.54919, -109.06710000000001 75.47321000000001, -110.49726000000001 76.42982, -109.58109999999999 76.79417, -108.54858999999999 76.67832000000001, -108.21141 76.20168000000001, -107.81943000000001 75.84552000000001)), ((-106.52258999999992 73.07601, -105.40245999999996 72.67259000000007, -104.77484000000004 71.6984000000001, -104.4647599999999 70.99297000000007, -102.78537 70.49776000000003, -100.98077999999992 70.02431999999999, -101.08928999999995 69.58447000000012, -102.73115999999993 69.50402000000003, -102.09329000000002 69.11962000000011, -102.43024000000003 68.75281999999999, -104.24000000000001 68.91000000000008, -105.96000000000004 69.18000000000012, -107.12254000000001 69.11922000000004, -108.99999999999994 68.78000000000003, -111.53414887520017 68.63005915681794, -113.31320000000005 68.53553999999997, -113.85495999999989 69.00744000000009, -115.22000000000003 69.28000000000009, -116.10793999999999 69.16821000000004, -117.34000000000003 69.9600000000001, -116.67472999999995 70.06655, -115.13112000000001 70.23730000000006, -113.72140999999999 70.1923700000001, -112.41610000000003 70.36637999999999, -114.35000000000002 70.60000000000002, -116.48684000000003 70.52044999999998, -117.90480000000002 70.54056000000014, -118.43238000000002 70.90920000000006, -116.11311 71.30917999999997, -117.65567999999996 71.29520000000002, -119.40199000000001 71.55858999999998, -118.56266999999997 72.30785000000003, -117.86641999999995 72.70594000000006, -115.18909000000002 73.31459000000012, -114.16716999999994 73.1214500000001, -114.66633999999999 72.65277000000009, -112.44101999999992 72.95540000000011, -111.05039 72.45040000000006, -109.92034999999993 72.96113000000008, -109.00653999999997 72.63335000000001, -108.18834999999996 71.65089, -107.68599 72.0654800000001, -108.39639 73.08953000000008, -107.51645000000002 73.23597999999998, -106.52258999999992 73.07601)), ((-100.43836 72.70588000000001, -101.54 73.36, -100.35642000000001 73.84389, -99.16387 73.63339, -97.38 73.76, -97.12 73.47, -98.05359 72.99052, -96.54 72.56, -96.72000000000001 71.66, -98.35966 71.27284999999999, -99.32286 71.35639, -100.01482 71.73827, -102.5 72.51, -102.48000000000002 72.83000000000001, -100.43836 72.70588000000001)), ((-106.6 73.60000000000001, -105.26 73.64, -104.5 73.42, -105.38000000000001 72.76, -106.94 73.46000000000001, -106.6 73.60000000000001)), ((-98.50000000000001 76.72, -97.735585 76.25656000000001, -97.70441500000001 75.74344, -98.16000000000001 75, -99.80874 74.89744, -100.88365999999999 75.05736, -100.86292000000002 75.64075, -102.50209 75.5638, -102.56552 76.3366, -101.48973 76.30537, -99.98349 76.64634, -98.57699 76.58859, -98.50000000000001 76.72)), ((-96.01644 80.60233000000001, -95.32345000000001 80.90729, -94.29843 80.97727, -94.73542 81.20646000000002, -92.40983999999999 81.25739000000003, -91.13288999999999 80.72345000000003, -89.45000000000002 80.50932203389831, -87.81 80.32000000000001, -87.02000000000001 79.66000000000001, -85.81435 79.3369, -87.18755999999999 79.0393, -89.03535000000001 78.28723, -90.80436 78.21533000000001, -92.87669000000001 78.34333000000001, -93.95116000000002 78.75099, -93.93574 79.11373, -93.14524 79.3801, -94.974 79.37248, -96.07614000000001 79.70502, -96.70972 80.15777, -96.01644 80.60233000000001)), ((-91.58702000000001 81.89429000000001, -90.10000000000001 82.08500000000004, -88.93227 82.11751000000001, -86.97024 82.27961, -85.5 82.65227345805702, -84.260005 82.60000000000001, -83.18 82.32, -82.42 82.86000000000001, -81.1 83.02, -79.30664 83.13056, -76.25 83.17205882352941, -75.71878000000001 83.06404000000002, -72.83153 83.23324000000001, -70.66576500000001 83.16978075838284, -68.50000000000001 83.10632151676572, -65.82735 83.02801000000001, -63.68 82.9, -61.85 82.62860000000002, -61.89388 82.36165000000001, -64.334 81.92775000000002, -66.75342 81.72527000000001, -67.65755 81.50141, -65.48031 81.50657000000002, -67.84 80.90000000000003, -69.4697 80.61683000000001, -71.18 79.8, -73.2428 79.63415, -73.88000000000001 79.43016220480206, -76.90773 79.32309000000001, -75.52924 79.19766000000001, -76.22046 79.01907, -75.39345 78.52581, -76.34354 78.18296000000001, -77.88851000000001 77.89991, -78.36269 77.50859000000001, -79.75951 77.20967999999999, -79.61965000000001 76.98336, -77.91089000000001 77.022045, -77.88911 76.777955, -80.56125 76.17812, -83.17439 76.45403, -86.11184 76.29901000000001, -87.60000000000001 76.42, -89.49068 76.47239, -89.6161 76.95213000000001, -87.76739 77.17833, -88.26 77.9, -87.65 77.97022222222223, -84.97634 77.53873, -86.34 78.18, -87.96191999999999 78.37181, -87.15198000000001 78.75867, -85.37868 78.99690000000001, -85.09495 79.34543000000001, -86.50734 79.73624, -86.93179 80.25145, -84.19844 80.20836, -83.40869565217389 80.10000000000001, -81.84823 80.46442, -84.1 80.58, -87.59895 80.51627, -89.36663 80.85569000000001, -90.2 81.26, -91.36786000000001 81.5531, -91.58702000000001 81.89429000000001)), ((-75.21597 67.44425, -75.86588 67.14886, -76.98687 67.09873, -77.2364 67.58809000000001, -76.81166 68.14856, -75.89521 68.28721, -75.11449999999999 68.01035999999999, -75.10333 67.58202, -75.21597 67.44425)), ((-96.25740120380055 69.49003035832177, -95.64768120380054 69.10769035832178, -96.26952120380055 68.75704035832177, -97.61740120380055 69.06003035832177, -98.43180120380055 68.95070035832177, -99.79740120380055 69.40003035832177, -98.91740120380055 69.71003035832177, -98.21826120380055 70.14354035832177, -97.15740120380055 69.86003035832177, -96.55740120380055 69.68003035832177, -96.25740120380055 69.49003035832177)), ((-64.51912 49.87304, -64.17322 49.95718, -62.858290000000004 49.70641, -61.835584999999995 49.28855, -61.806304999999995 49.10506000000001, -62.29318 49.08717, -63.589259999999996 49.400690000000004, -64.51912 49.87304)), ((-64.01486 47.03601, -63.6645 46.55001, -62.9393 46.41587, -62.012080000000005 46.44314, -62.503910000000005 46.033390000000004, -62.87433 45.968180000000004, -64.14280000000001 46.39265, -64.39261 46.72747, -64.01486 47.03601)))" 6 | 328239523.0,North America,United States of America,USA,21433226,"MULTIPOLYGON (((-122.84000000000003 49.000000000000114, -120 49.000000000000114, -117.03121 49, -116.04818 49, -113 49, -110.05000000000001 49, -107.05000000000001 49, -104.04826000000003 48.99986000000007, -100.65000000000003 49.000000000000114, -97.2287200000048 49.0007, -95.15906950917206 49, -95.15609 49.38425000000001, -94.81758000000002 49.38905, -94.64 48.84, -94.32914000000001 48.67074, -93.63087000000002 48.609260000000006, -92.61000000000001 48.44999999999993, -91.64 48.14, -90.83 48.27, -89.60000000000002 48.010000000000105, -89.27291744663665 48.01980825458281, -88.37811418328671 48.302917588893706, -87.43979262330028 47.94, -86.46199083122826 47.553338019392, -85.65236324740341 47.22021881773051, -84.8760798815149 46.90008331968238, -84.77923824739992 46.63710195574902, -84.54374874544584 46.538684190449146, -84.60490000000004 46.439599999999984, -84.33670000000001 46.408770000000004, -84.1421195136734 46.51222585711571, -84.0918512641615 46.27541860613826, -83.89076534700574 46.116926988299014, -83.61613094759059 46.116926988299014, -83.46955074739469 45.994686387712534, -83.59285071484311 45.81689362241252, -82.55092464875821 45.34751658790543, -82.33776312543114 44.440000000000055, -82.13764238150395 43.57108755143997, -82.42999999999999 42.980000000000004, -82.9 42.43, -83.12 42.08, -83.14199968131264 41.975681057292874, -83.029810146807 41.83279572200598, -82.69008928092023 41.675105088867326, -82.4392777167916 41.675105088867326, -81.27774654816716 42.209025987306816, -80.24744767934794 42.36619985612255, -78.93936214874375 42.86361135514798, -78.92 42.964999999999996, -79.01 43.27, -79.17167355011186 43.46633942318426, -78.72027991404235 43.62508942318493, -77.7378850979577 43.62905558936328, -76.82003414580558 43.628784288093755, -76.50000000000001 44.01845889375865, -76.375 44.09631, -75.31821000000001 44.81645, -74.86700000000002 45.000480000000096, -73.34783 45.00738, -71.50506 45.0082, -71.405 45.254999999999995, -71.08482000000004 45.30524000000014, -70.66 45.46, -70.305 45.915, -69.99997 46.69307, -69.237216 47.447781, -68.90500000000003 47.18500000000006, -68.23444 47.354859999999974, -67.79046000000001 47.066359999999996, -67.79134 45.70281000000001, -67.13741 45.13753, -66.96465999999998 44.809700000000134, -68.03251999999998 44.325199999999995, -69.05999999999995 43.980000000000075, -70.11616999999995 43.68405000000013, -70.64547563341102 43.09023834896402, -70.81488999999999 42.865299999999934, -70.82499999999999 42.33499999999998, -70.49499999999995 41.80500000000001, -70.07999999999998 41.78000000000003, -70.185 42.145000000000095, -69.88496999999995 41.92283000000009, -69.96502999999996 41.63717000000014, -70.63999999999999 41.47500000000002, -71.12039000000004 41.49445000000014, -71.8599999999999 41.32000000000005, -72.29500000000002 41.26999999999998, -72.87643000000003 41.220650000000035, -73.71000000000004 40.93110235165449, -72.24125999999995 41.119480000000124, -71.94499999999988 40.930000000000064, -73.34499999999997 40.63000000000005, -73.98200000000003 40.62799999999993, -73.95232499999997 40.75075000000004, -74.25671 40.47351000000003, -73.96243999999996 40.42763000000002, -74.17838 39.70925999999997, -74.90603999999996 38.93954000000002, -74.98041 39.19640000000004, -75.20002 39.248450000000105, -75.52805000000001 39.49850000000009, -75.32 38.960000000000036, -75.07183476478986 38.782032230179254, -75.05672999999996 38.40412000000009, -75.37746999999996 38.015510000000006, -75.94022999999999 37.21689000000009, -76.03126999999995 37.25659999999999, -75.72204999999985 37.93705000000011, -76.23286999999999 38.319214999999986, -76.35000000000002 39.14999999999998, -76.54272499999996 38.71761500000008, -76.32933000000003 38.08326000000005, -76.98999793161352 38.23999176691336, -76.30161999999996 37.91794499999992, -76.25873999999999 36.96640000000008, -75.97179999999997 36.89726000000002, -75.8680399999999 36.55125000000004, -75.72748999999999 35.55074000000013, -76.36318 34.80854000000011, -77.39763499999992 34.512009999999975, -78.05496 33.92547000000002, -78.55434999999989 33.86133000000012, -79.06067000000002 33.493949999999984, -79.20357000000001 33.158390000000054, -80.30132499999996 32.509355000000085, -80.86498 32.033300000000054, -81.33629000000002 31.44049000000001, -81.49041999999997 30.7299900000001, -81.31371000000001 30.035520000000076, -80.97999999999996 29.18000000000012, -80.53558499999991 28.472129999999993, -80.52999999999986 28.040000000000077, -80.05653928497759 26.88000000000011, -80.08801499999998 26.205764999999985, -80.13155999999992 25.816775000000064, -80.38103000000001 25.20616000000001, -80.67999999999995 25.08000000000004, -81.17212999999998 25.201260000000104, -81.33000000000004 25.639999999999986, -81.70999999999987 25.870000000000005, -82.23999999999995 26.730000000000132, -82.70515 27.495040000000074, -82.85525999999999 27.886240000000043, -82.64999999999998 28.550000000000125, -82.92999999999995 29.10000000000008, -83.70958999999999 29.936560000000043, -84.09999999999997 30.09000000000009, -85.10881999999998 29.636150000000043, -85.28784000000002 29.68612000000013, -85.7731 30.152610000000095, -86.39999999999992 30.40000000000009, -87.53035999999992 30.27433000000002, -88.41781999999995 30.384900000000016, -89.1804899999999 30.315980000000025, -89.5938311784198 30.159994004836847, -89.41373499999997 29.89418999999998, -89.43 29.488639999999975, -89.21767 29.291080000000022, -89.40822999999995 29.159610000000043, -89.77927999999997 29.307140000000118, -90.15463 29.11743000000007, -90.88022499999994 29.148535000000095, -91.62678499999993 29.677000000000135, -92.49905999999999 29.552300000000002, -93.22636999999997 29.783750000000055, -93.84841999999998 29.71363000000008, -94.69 29.480000000000132, -95.60025999999999 28.738630000000057, -96.59403999999995 28.307480000000055, -97.13999999999987 27.83000000000004, -97.36999999999995 27.380000000000052, -97.37999999999994 26.690000000000055, -97.32999999999998 26.210000000000093, -97.13999999999987 25.870000000000005, -97.52999999999992 25.84000000000009, -98.23999999999995 26.06000000000006, -99.01999999999992 26.37000000000006, -99.30000000000001 26.840000000000032, -99.51999999999992 27.54000000000002, -100.10999999999996 28.110000000000127, -100.45584000000002 28.69612000000012, -100.95759999999996 29.380710000000136, -101.66239999999999 29.77930000000009, -102.48000000000002 29.75999999999999, -103.11000000000001 28.970000000000027, -103.94 29.27000000000004, -104.4569699999999 29.571960000000047, -104.70574999999997 30.121730000000014, -105.03737000000001 30.644019999999955, -105.63159000000002 31.08383000000009, -106.1429 31.399950000000047, -106.50758999999988 31.754520000000014, -108.24000000000001 31.754853718166373, -108.24193999999994 31.342220000000054, -109.03500000000003 31.341940000000136, -111.02361000000002 31.334719999999948, -113.30498 32.03914000000009, -114.815 32.52528000000001, -114.72138999999993 32.72082999999992, -115.99134999999995 32.61239000000012, -117.12775999999985 32.53533999999996, -117.29593769127393 33.04622461520387, -117.94400000000002 33.621236431201396, -118.41060227589753 33.74090922312445, -118.51989482279976 34.02778157757575, -119.08100000000002 34.07799999999992, -119.43884064201671 34.34847717828427, -120.36777999999998 34.447110000000066, -120.62286 34.60854999999998, -120.74432999999999 35.15686000000011, -121.71456999999992 36.161529999999914, -122.54746999999998 37.551760000000115, -122.51201000000003 37.78339000000011, -122.95319 38.11371000000008, -123.72720000000004 38.95166000000012, -123.86516999999998 39.76699000000008, -124.39807000000002 40.313199999999995, -124.17885999999999 41.142020000000116, -124.21370000000002 41.99964000000011, -124.53283999999996 42.7659900000001, -124.14213999999998 43.708380000000034, -124.020535 44.615894999999966, -123.89892999999995 45.52341000000007, -124.079635 46.864750000000015, -124.39566999999994 47.72017000000011, -124.68721008300781 48.18443298339855, -124.56610107421875 48.37971496582037, -123.12 48.04000000000002, -122.58735999999993 47.09600000000006, -122.34000000000003 47.360000000000014, -122.5 48.180000000000064, -122.84000000000003 49.000000000000114)), ((-155.40214 20.07975, -155.22452 19.99302, -155.06226 19.8591, -154.80741 19.50871, -154.83147 19.453280000000003, -155.22217 19.23972, -155.54211 19.08348, -155.68817 18.91619, -155.93665 19.05939, -155.90806 19.33888, -156.07347000000001 19.70294, -156.02368 19.81422, -155.85008000000002 19.97729, -155.91907 20.17395, -155.86108000000002 20.267210000000002, -155.78505 20.2487, -155.40214 20.07975)), ((-155.99566000000002 20.76404, -156.07926 20.643970000000003, -156.41445 20.57241, -156.58673 20.783, -156.70167 20.8643, -156.71054999999998 20.92676, -156.61258 21.01249, -156.25711 20.917450000000002, -155.99566000000002 20.76404)), ((-156.75824 21.176840000000002, -156.78933 21.068730000000002, -157.32521 21.097770000000004, -157.25027 21.219579999999997, -156.75824 21.176840000000002)), ((-158.0252 21.71696, -157.94161 21.65272, -157.65283000000002 21.322170000000003, -157.70703 21.26442, -157.7786 21.27729, -158.12667000000002 21.31244, -158.2538 21.53919, -158.29265 21.57912, -158.0252 21.71696)), ((-159.36569 22.21494, -159.34512 21.982000000000003, -159.46372 21.88299, -159.80051 22.065330000000003, -159.74877 22.1382, -159.5962 22.236179999999997, -159.36569 22.21494)), ((-166.46779212142462 60.384169826897754, -165.67442969466364 60.29360687930625, -165.57916419173358 59.90998688418753, -166.19277014876727 59.75444082298899, -166.84833736882197 59.941406155020985, -167.45527706609008 60.21306915957936, -166.46779212142462 60.384169826897754)), ((-153.22872941792113 57.96896841087248, -152.56479061583514 57.901427313866996, -152.1411472239064 57.591058661522, -153.00631405333692 57.11584219016593, -154.0050902984581 56.734676825581076, -154.51640275777004 56.99274892844669, -154.67099280497118 57.46119578717253, -153.7627795074415 57.81657461204373, -153.22872941792113 57.96896841087248)), ((-140.98598761037601 69.71199839952635, -140.986 69.712, -140.9925 66.00003000000001, -140.99778 60.30639000000001, -140.013 60.27682000000001, -139.03900000000002 60, -138.34089 59.562110000000004, -137.4525 58.905, -136.47972000000004 59.46389000000005, -135.47583 59.787780000000005, -134.94500000000005 59.2705600000001, -134.27111000000002 58.86111000000005, -133.35556000000003 58.41028000000001, -132.73042 57.692890000000006, -131.70781 56.55212, -130.00778000000003 55.915830000000085, -129.98 55.285000000000004, -130.53611 54.802780000000006, -130.53610895273684 54.80275447679924, -130.5361101894673 54.8027534043494, -131.08581823797215 55.17890615500204, -131.9672114671423 55.497775580459006, -132.2500107428595 56.3699962428974, -133.53918108435641 57.17888743756214, -134.07806292029608 58.12306753196691, -135.0382110322791 58.18771474876394, -136.62806230995471 58.21220937767043, -137.800006279686 58.49999542910376, -139.867787041413 59.53776154238915, -140.825273817133 59.727517401765056, -142.57444353556446 60.08444651960497, -143.9588809948799 59.999180406323376, -145.92555681682788 60.45860972761426, -147.11437394914665 60.884656073644635, -148.22430620012761 60.67298940697714, -148.01806555885082 59.97832896589364, -148.57082251686086 59.914172675203304, -149.72785783587585 59.70565827090553, -150.60824337461642 59.368211168039466, -151.7163927886833 59.15582103131993, -151.85943315326722 59.744984035879554, -151.40971900124717 60.72580272077937, -150.3469414947325 61.03358755150987, -150.62111080625704 61.2844249538544, -151.89583919981683 60.727197984451266, -152.57832984109558 60.061657212964235, -154.01917212625764 59.35027944603428, -153.28751135965317 58.86472768821977, -154.23249243875847 58.14637360293051, -155.3074914215102 57.727794501366304, -156.30833472392305 57.422774359763594, -156.55609737854638 56.97998484967064, -158.11721655986779 56.46360809999419, -158.43332129619714 55.99415355083852, -159.60332739971741 55.56668610292013, -160.28971961163427 55.643580634170576, -161.22304765525777 55.364734605523495, -162.23776607974105 55.02418691672011, -163.06944658104638 54.68973704692712, -164.78556922102717 54.40417308208214, -164.94222632552007 54.57222483989534, -163.84833960676565 55.03943146424609, -162.87000139061595 55.34804311789321, -161.80417497459607 55.89498647727038, -160.5636047027812 56.00805451112501, -160.07055986228448 56.41805532492873, -158.6844429189195 57.01667511659787, -158.46109737855403 57.21692129172885, -157.72277035218391 57.57000051536306, -157.55027442119362 58.328326321030204, -157.04167497457698 58.91888458926172, -158.19473120830554 58.61580231386978, -158.51721798402303 58.78778148053732, -159.0586061269288 58.42418610293163, -159.71166704001737 58.93139028587632, -159.98128882550017 58.572549140041644, -160.3552711659965 59.07112335879361, -161.3550034251151 58.670837714260756, -161.96889360252632 58.67166453717738, -162.05498653872465 59.26692536074745, -161.8741707021354 59.63362132429057, -162.51805904849212 59.98972361921386, -163.8183414378202 59.79805573184336, -164.66221757714652 60.26748444278263, -165.3463877024748 60.50749563256238, -165.3508318756519 61.073895168697504, -166.12137915755602 61.50001902937623, -165.73445187077058 62.074996853271784, -164.9191786367179 62.63307648380794, -164.56250790103934 63.14637848576302, -163.75333248599708 63.21944896102377, -163.06722449445786 63.05945872664802, -162.26055538638175 63.54193573674115, -161.53444983624863 63.455816962326764, -160.7725066803211 63.766108100023246, -160.9583351308426 64.22279857040274, -161.51806840721218 64.40278758407527, -160.77777767641481 64.78860382756642, -161.39192623598765 64.77723501246231, -162.4530500966689 64.55944468856819, -162.75778601789415 64.33860545516876, -163.54639421288428 64.5591604681905, -164.96082984114514 64.44694509546883, -166.42528825586447 64.68667206487066, -166.8450042389391 65.08889557561452, -168.11056006576715 65.66999705673675, -166.70527116602193 66.08831777613938, -164.47470964257548 66.5766600612975, -163.65251176659564 66.5766600612975, -163.78860165103623 66.07720734319668, -161.67777442121013 66.11611969671242, -162.48971452538004 66.73556509059512, -163.71971696679117 67.11639455837008, -164.4309913808565 67.61633820257777, -165.39028683170673 68.04277212185025, -166.76444068099605 68.35887685817966, -166.20470740462667 68.88303091091615, -164.43081051334346 68.91553538682774, -163.1686136546145 69.37111481391287, -162.930566169262 69.85806183539927, -161.90889726463556 70.33332998318764, -160.93479651593367 70.44768992784958, -159.03917578838713 70.89164215766891, -158.11972286683394 70.82472117785102, -156.58082455139808 71.35776357694175, -155.06779029032427 71.14777639432367, -154.3441652089412 70.69640859647018, -153.9000062733926 70.88998851183567, -152.21000606993528 70.82999217394485, -152.27000240782613 70.60000621202983, -150.73999243874448 70.43001658800569, -149.7200030181675 70.53001048449045, -147.61336157935705 70.2140349392418, -145.68998980022533 70.12000967068673, -144.9200109590764 69.98999176704046, -143.58944618042523 70.15251414659832, -142.07251034871348 69.85193817817265, -140.98598752156073 69.71199839952635, -140.98598761037601 69.71199839952635)), ((-171.73165686753944 63.782515367275934, -171.1144335602453 63.59219106714495, -170.4911124339407 63.694975490973505, -169.6825054596536 63.43111562769119, -168.6894394603007 63.297506212000556, -168.77194088445466 63.18859813094544, -169.5294398672051 62.97693146427792, -170.29055620021595 63.194437567794424, -170.67138566799093 63.3758218451389, -171.55306311753873 63.317789211675105, -171.79111060289122 63.40584585230046, -171.73165686753944 63.782515367275934)))" 7 | -------------------------------------------------------------------------------- /examples/example.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opengeospatial/geoparquet/d727b4cd568651911860fec013982a06c353b9a0/examples/example.parquet -------------------------------------------------------------------------------- /examples/example_metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "geo": { 3 | "columns": { 4 | "geometry": { 5 | "bbox": [ 6 | -180.0, 7 | -18.288, 8 | 180.0, 9 | 83.2332 10 | ], 11 | "covering": { 12 | "bbox": { 13 | "xmax": [ 14 | "bbox", 15 | "xmax" 16 | ], 17 | "xmin": [ 18 | "bbox", 19 | "xmin" 20 | ], 21 | "ymax": [ 22 | "bbox", 23 | "ymax" 24 | ], 25 | "ymin": [ 26 | "bbox", 27 | "ymin" 28 | ] 29 | } 30 | }, 31 | "crs": { 32 | "$schema": "https://proj.org/schemas/v0.7/projjson.schema.json", 33 | "area": "World.", 34 | "bbox": { 35 | "east_longitude": 180, 36 | "north_latitude": 90, 37 | "south_latitude": -90, 38 | "west_longitude": -180 39 | }, 40 | "coordinate_system": { 41 | "axis": [ 42 | { 43 | "abbreviation": "Lon", 44 | "direction": "east", 45 | "name": "Geodetic longitude", 46 | "unit": "degree" 47 | }, 48 | { 49 | "abbreviation": "Lat", 50 | "direction": "north", 51 | "name": "Geodetic latitude", 52 | "unit": "degree" 53 | } 54 | ], 55 | "subtype": "ellipsoidal" 56 | }, 57 | "datum_ensemble": { 58 | "accuracy": "2.0", 59 | "ellipsoid": { 60 | "inverse_flattening": 298.257223563, 61 | "name": "WGS 84", 62 | "semi_major_axis": 6378137 63 | }, 64 | "id": { 65 | "authority": "EPSG", 66 | "code": 6326 67 | }, 68 | "members": [ 69 | { 70 | "id": { 71 | "authority": "EPSG", 72 | "code": 1166 73 | }, 74 | "name": "World Geodetic System 1984 (Transit)" 75 | }, 76 | { 77 | "id": { 78 | "authority": "EPSG", 79 | "code": 1152 80 | }, 81 | "name": "World Geodetic System 1984 (G730)" 82 | }, 83 | { 84 | "id": { 85 | "authority": "EPSG", 86 | "code": 1153 87 | }, 88 | "name": "World Geodetic System 1984 (G873)" 89 | }, 90 | { 91 | "id": { 92 | "authority": "EPSG", 93 | "code": 1154 94 | }, 95 | "name": "World Geodetic System 1984 (G1150)" 96 | }, 97 | { 98 | "id": { 99 | "authority": "EPSG", 100 | "code": 1155 101 | }, 102 | "name": "World Geodetic System 1984 (G1674)" 103 | }, 104 | { 105 | "id": { 106 | "authority": "EPSG", 107 | "code": 1156 108 | }, 109 | "name": "World Geodetic System 1984 (G1762)" 110 | }, 111 | { 112 | "id": { 113 | "authority": "EPSG", 114 | "code": 1309 115 | }, 116 | "name": "World Geodetic System 1984 (G2139)" 117 | }, 118 | { 119 | "id": { 120 | "authority": "EPSG", 121 | "code": 1383 122 | }, 123 | "name": "World Geodetic System 1984 (G2296)" 124 | } 125 | ], 126 | "name": "World Geodetic System 1984 ensemble" 127 | }, 128 | "id": { 129 | "authority": "OGC", 130 | "code": "CRS84" 131 | }, 132 | "name": "WGS 84 (CRS84)", 133 | "scope": "Not known.", 134 | "type": "GeographicCRS" 135 | }, 136 | "edges": "planar", 137 | "encoding": "WKB", 138 | "geometry_types": [ 139 | "Polygon", 140 | "MultiPolygon" 141 | ] 142 | } 143 | }, 144 | "primary_column": "geometry", 145 | "version": "1.2.0-dev" 146 | } 147 | } -------------------------------------------------------------------------------- /examples/example_metadata_point.json: -------------------------------------------------------------------------------- 1 | { 2 | "geo": { 3 | "columns": { 4 | "geometry": { 5 | "encoding": "point", 6 | "geometry_types": [ 7 | "Point" 8 | ] 9 | } 10 | }, 11 | "primary_column": "geometry", 12 | "version": "1.2.0-dev" 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /format-specs/compatible-parquet.md: -------------------------------------------------------------------------------- 1 | # Parquet Geospatial Compatibility 2 | 3 | The goal of GeoParquet is that every tool producing Parquet and includes geospatial data uses official [metadata defined in the GeoParquet spec](./schema.json) to achieve true interoperability. This document represents a set of guidelines for those would like to produce geospatial Parquet data but are using tools that are not yet fully implementing GeoParquet metadata. It is meant to be used just for the interim time when only some tools properly produce GeoParquet, to enable data producers to support the growing ecosystem. 4 | 5 | To be clear, this is *only* recommended for those who are using tools that don't yet produce valid GeoParquet, and we encourage advocating to the creaters of tools you are using to implement the GeoParquet spec. Feel free to [start a discussion](https://github.com/opengeospatial/geoparquet/discussions) to raise awareness of tools that ideally support GeoParquet - the community can likely help in encouraging an implementation. 6 | 7 | The core idea behind these compatibility guidelines is that tools and libraries that read GeoParquet will be able to parse these geospatial compatible Parquet files, to make it easy to get data into the GeoParquet ecosystem. But it is only recommended for those tools and libraries to produce valid GeoParquet, following [Postel's Law](https://en.wikipedia.org/wiki/Robustness_principle) of being liberal in what you accept but conservative in what you send - if you are authoring a tool to write GeoParquet please do not give users the option to create these parquet files. 8 | 9 | ## Compatibility Guidelines 10 | 11 | The core idea of the compatibility guidelines is to have the output match the defaults of the official spec as closely as possible, so it is very easy for tools to simply add the appropriate Parquet metadata and create valid GeoParquet. The guidelines are as follows: 12 | 13 | * The geometry column should be named either `"geometry"` or `"geography"`. 14 | 15 | * The geometry column should be a `BYTE_ARRAY` with Well Known Binary (WKB) used to define the geometries, as defined in the [encoding](./geoparquet.md#encoding) section of the GeoParquet spec. Alternatively, the geometry column can be stored according to the Point, MultiPoint, MultiLineString, or MultiPolygon memory layouts with separated (struct) coordinates as specified in the [GeoArrow format](https://geoarrow.org/format). 16 | 17 | * All data is stored in longitude, latitude based on the WGS84 datum, as defined as the default in the [crs](./geoparquet.md#crs) section of the GeoParquet spec. 18 | 19 | * If the column is named `"geometry"` then the [edges](./geoparquet.md#edges) must be `"planar"`. If the column is named `"geography"` then the edges must be `"spherical"`. 20 | 21 | ### Data Reader Assumptions 22 | 23 | The above are the key recommendations a data producer should follow. Any implemented reader will need to make the following assumptions when reading one of these columns, unless the user supplies additional information that they are aware of: 24 | 25 | * The geometry_types values is an empty array, signaling the geometry type is not known and the reader should make no assumptions about the types, as defined in the [geometry_types](./geoparquet.md#geometry_types) section of the spec. 26 | 27 | * Any CRS-aware reader should assume that the CRS is OGC:CRS84 as explained in the [crs](./geoparquet.md#crs) section of the spec. (Or it could assume it is EPSG:4326 but overriding the axis order to assume longitude latitude as explained in the [Coordinate axis order](./geoparquet.md#coordinate-axis-order) section). 28 | 29 | * No assertions are made on the winding order, the default of the [orientation](./geoparquet.md#orientation) section of the spec. 30 | 31 | * The edge definition is based on whether the column is named `"geometry"` or `"geography"`. 32 | 33 | ## Data Reader Implementation Considerations 34 | 35 | Reading this non-compliant geospatial data from Parquet should ideally work with no user intervention if the producer followed all the guidelines and named their geometry column 'geometry'. Readers can optionally support user input (in whatever manner works for the tool / library) to provide hints for metadata that is not inline with the lowest common denominator compatibility. This would include things like letting the user supply a geometry column name (i.e., something other than 'geometry' or 'geography'), using Well Known Text (WKT) in a `STRING` column instead of WKB, providing a more specific geometry_type value, or providing other enhanced metadata (specifying the CRS, the winding order, the edges, the bbox or the epoch). 36 | 37 | We strongly advise against creating a reader that can only understand these geospatial compatible files - all readers should start by looking at the metadata specified by GeoParquet and only fall back on these compatibility techniques if the metadata is not present. A reader that could not read GeoParquet but would read compatible geodata would have no idea if there was in fact metadata, and thus could easily decrease interoperability. 38 | 39 | ## Data Producer Considerations 40 | 41 | As mentioned above, we strongly recommend trying to find tools that will produce valid GeoParquet. If the tool you are working with does not support it directly then there are [many tools](https://geoparquet.org) that can help you. We only recommend this route if there is no way to create valid GeoParquet metadata. This will enable those who have readers that understand GeoParquet and these compatible files to turn it into valid GeoParquet themselves. 42 | 43 | We recommend sticking to the core recommendations as much as possible - naming the geometry column 'geometry', using WKB, and storing data as long, lat. If your data must be formatted differently, fewer readers will be able to work with it. If you do go that route, be sure to make it clear in all your documentation where things are different. 44 | -------------------------------------------------------------------------------- /format-specs/geoparquet.md: -------------------------------------------------------------------------------- 1 | # GeoParquet Specification 2 | 3 | ## Overview 4 | 5 | The [Apache Parquet](https://parquet.apache.org/) provides a standardized open-source columnar storage format. The GeoParquet specification defines how geospatial data should be stored in parquet format, including the representation of geometries and the required additional metadata. 6 | 7 | The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", "SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this document are to be interpreted as described in [RFC 2119](https://www.ietf.org/rfc/rfc2119.txt). 8 | 9 | ## Version and schema 10 | 11 | This is version 1.2.0-dev of the GeoParquet specification. See the [JSON Schema](schema.json) to validate metadata for this version. See [Version Compatibility](#version-compatibility) for details on version compatibility guarantees. 12 | 13 | ## Geometry columns 14 | 15 | Geometry columns MUST be encoded as [WKB](https://en.wikipedia.org/wiki/Well-known_text_representation_of_geometry#Well-known_binary) or using the single-geometry type encodings based on the [GeoArrow](https://geoarrow.org/) specification. 16 | 17 | See the [encoding](#encoding) section below for more details. 18 | 19 | ### Nesting 20 | 21 | Geometry columns MUST be at the root of the schema. In practice, this means that when writing to GeoParquet from another format, geometries cannot be contained in complex or nested types such as structs, lists, arrays, or map types. 22 | 23 | ### Repetition 24 | 25 | The repetition for all geometry columns MUST be "required" (exactly one) or "optional" (zero or one). A geometry column MUST NOT be repeated. A GeoParquet file MAY have multiple geometry columns with different names, but those geometry columns cannot be repeated. 26 | 27 | ## Metadata 28 | 29 | GeoParquet files include additional metadata at two levels: 30 | 31 | 1. File metadata indicating things like the version of this specification used 32 | 2. Column metadata with additional metadata for each geometry column 33 | 34 | A GeoParquet file MUST include a `geo` key in the Parquet metadata (see [`FileMetaData::key_value_metadata`](https://github.com/apache/parquet-format#metadata)). The value of this key MUST be a JSON-encoded UTF-8 string representing the file and column metadata that validates against the [GeoParquet metadata schema](schema.json). The file and column metadata fields are described below. 35 | 36 | ## File metadata 37 | 38 | | Field Name | Type | Description | 39 | | ------------------ | ------ | -------------------------------------------------------------------- | 40 | | version | string | **REQUIRED.** The version identifier for the GeoParquet specification. | 41 | | primary_column | string | **REQUIRED.** The name of the "primary" geometry column. In cases where a GeoParquet file contains multiple geometry columns, the primary geometry may be used by default in geospatial operations. | 42 | | columns | object\ | **REQUIRED.** Metadata about geometry columns. Each key is the name of a geometry column in the table. | 43 | 44 | At this level, additional implementation-specific fields (e.g. library name) MAY be present, and readers should be robust in ignoring those. 45 | 46 | ### Column metadata 47 | 48 | Each geometry column in the dataset MUST be included in the `columns` field above with the following content, keyed by the column name: 49 | 50 | | Field Name | Type | Description | 51 | | -------------- | ------------ | ----------- | 52 | | encoding | string | **REQUIRED.** Name of the geometry encoding format. Currently `"WKB"`, `"point"`, `"linestring"`, `"polygon"`, `"multipoint"`, `"multilinestring"`, and `"multipolygon"` are supported. | 53 | | geometry_types | \[string] | **REQUIRED.** The geometry types of all geometries, or an empty array if they are not known. | 54 | | crs | object\|null | [PROJJSON](https://proj.org/specifications/projjson.html) object representing the Coordinate Reference System (CRS) of the geometry. If the field is not provided, the default CRS is [OGC:CRS84](https://www.opengis.net/def/crs/OGC/1.3/CRS84), which means the data in this column must be stored in longitude, latitude based on the WGS84 datum. | 55 | | orientation | string | Winding order of exterior ring of polygons. If present must be `"counterclockwise"`; interior rings are wound in opposite order. If absent, no assertions are made regarding the winding order. | 56 | | edges | string | Name of the coordinate system for the edges. Must be one of `"planar"` or `"spherical"`. The default value is `"planar"`. | 57 | | bbox | \[number] | Bounding Box of the geometries in the file, formatted according to [RFC 7946, section 5](https://tools.ietf.org/html/rfc7946#section-5). | 58 | | epoch | number | Coordinate epoch in case of a dynamic CRS, expressed as a decimal year. | 59 | | covering | object | Object containing bounding box column names to help accelerate spatial data retrieval | 60 | 61 | 62 | #### crs 63 | 64 | The Coordinate Reference System (CRS) is an optional parameter for each geometry column defined in GeoParquet format. 65 | 66 | The CRS MUST be provided in [PROJJSON](https://proj.org/specifications/projjson.html) format, which is a JSON encoding of [WKT2:2019 / ISO-19162:2019](https://docs.opengeospatial.org/is/18-010r7/18-010r7.html), which itself implements the model of [OGC Topic 2: Referencing by coordinates abstract specification / ISO-19111:2019](http://docs.opengeospatial.org/as/18-005r4/18-005r4.html). Apart from the difference of encodings, the semantics are intended to match WKT2:2019, and a CRS in one encoding can generally be represented in the other. 67 | 68 | If the `crs` key does not exist, all coordinates in the geometries MUST use longitude, latitude based on the WGS84 datum, and the default value is [OGC:CRS84](https://www.opengis.net/def/crs/OGC/1.3/CRS84) for CRS-aware implementations. Note that a missing `crs` key has different meaning than a `crs` key set to `null` (see below). 69 | 70 | [OGC:CRS84](https://www.opengis.net/def/crs/OGC/1.3/CRS84) is equivalent to the well-known [EPSG:4326](https://epsg.org/crs_4326/WGS-84.html) but changes the axis from latitude-longitude to longitude-latitude. 71 | 72 | Due to the large number of CRSes available and the difficulty of implementing all of them, we expect that a number of implementations will start without support for the optional `crs` field. Users are recommended to store their data in longitude, latitude (OGC:CRS84 or not including the `crs` field) for it to work with the widest number of tools. Data that are more appropriately represented in particular projections may use an alternate coordinate reference system. We expect many tools will support alternate CRSes, but encourage users to check to ensure their chosen tool supports their chosen CRS. 73 | 74 | See below for additional details about representing or identifying OGC:CRS84. 75 | 76 | The value of this key may be explicitly set to `null` to indicate that there is no CRS assigned to this column (CRS is undefined or unknown). 77 | 78 | #### epoch 79 | 80 | In a dynamic CRS, coordinates of a point on the surface of the Earth may change with time. To be unambiguous, the coordinates must always be qualified with the epoch at which they are valid. 81 | 82 | The optional `epoch` field allows to specify this in case the `crs` field defines a dynamic CRS. The coordinate epoch is expressed as a decimal year (e.g. `2021.47`). Currently, this specification only supports an epoch per column (and not per geometry). 83 | 84 | #### encoding 85 | 86 | This is the memory layout used to encode geometries in the geometry column. 87 | Supported values: 88 | 89 | - `"WKB"` 90 | - one of `"point"`, `"linestring"`, `"polygon"`, `"multipoint"`, `"multilinestring"`, `"multipolygon"` 91 | 92 | ##### WKB 93 | 94 | The preferred option for maximum portability is `"WKB"`, signifying [Well Known Binary](https://en.wikipedia.org/wiki/Well-known_text_representation_of_geometry#Well-known_binary). This SHOULD be the ["OpenGIS® Implementation Specification for Geographic information - Simple feature access - Part 1: Common architecture"](https://portal.ogc.org/files/?artifact_id=18241) WKB representation (using codes for 3D geometry types in the \[1001,1007\] range). This encoding is also consistent with the one defined in the ["ISO/IEC 13249-3:2016 (Information technology - Database languages - SQL multimedia and application packages - Part 3: Spatial)"](https://www.iso.org/standard/60343.html) standard. 95 | 96 | Note that the current version of the spec only allows for a subset of WKB: 2D or 3D geometries of the standard geometry types (the Point, LineString, Polygon, MultiPoint, MultiLineString, MultiPolygon, and GeometryCollection geometry types). This means that M values or non-linear geometry types are not yet supported. 97 | 98 | WKB geometry columns MUST be stored using the `BYTE_ARRAY` parquet type. 99 | 100 | Implementation note: when using WKB encoding with the ecosystem of Arrow libraries, Parquet types such as `BYTE_ARRAY` might not be directly accessible. Instead, the corresponding Arrow data type can be `Arrow::Type::BINARY` (for arrays that whose elements can be indexed through a 32-bit index) or `Arrow::Type::LARGE_BINARY` (64-bit index). It is recommended that GeoParquet readers are compatible with both data types, and writers preferably use `Arrow::Type::BINARY` (thus limiting to row groups with content smaller than 2 GB) for larger compatibility. 101 | 102 | ##### Native encodings (based on GeoArrow) 103 | 104 | Using the single-geometry type encodings (i.e., `"point"`, `"linestring"`, `"polygon"`, `"multipoint"`, `"multilinestring"`, `"multipolygon"`) may provide better performance and enable readers to leverage more features of the Parquet format to accelerate geospatial queries (e.g., row group-level min/max statistics). These encodings correspond to extension name suffix in the [GeoArrow metadata specification for extension names](https://geoarrow.org/extension-types#extension-names) to signify the memory layout used by the geometry column. GeoParquet uses the separated (struct) representation of coordinates for single-geometry type encodings because this encoding results in useful column statistics when row groups and/or files contain related features. 105 | 106 | The actual coordinates of the geometries MUST be stored as native numbers, i.e. using 107 | the `DOUBLE` parquet type in a (repeated) group of fields (exact repetition depending 108 | on the geometry type). 109 | 110 | For the `"point"` geometry type, this results in a struct of two fields for x 111 | and y coordinates (in case of 2D geometries): 112 | 113 | ``` 114 | // "point" geometry column as simple field with two child fields for x and y 115 | optional group geometry { 116 | required double x; 117 | required double y; 118 | } 119 | ``` 120 | 121 | For the other geometry types, those x and y coordinate values MUST be embedded 122 | in repeated groups (`LIST` logical parquet type). For example, for the 123 | `"multipolygon"` geometry type: 124 | 125 | ``` 126 | // "multipolygon" geometry column with multiple levels of nesting 127 | optional group geometry (List) { 128 | // the parts of the MultiPolygon 129 | repeated group list { 130 | required group element (List) { 131 | // the rings of one Polygon 132 | repeated group list { 133 | required group element (List) { 134 | // the list of coordinates of one ring 135 | repeated group list { 136 | required group element { 137 | required double x; 138 | required double y; 139 | } 140 | } 141 | } 142 | } 143 | } 144 | } 145 | } 146 | ``` 147 | 148 | There MUST NOT be any null values in the child fields and the x/y/z coordinate 149 | fields. Only the outer optional "geometry" group is allowed to have nulls (i.e 150 | representing a missing geometry). This MAY be indicated in the Parquet schema by 151 | using `required` group elements, as in the example above, but this is not 152 | required and `optional` fields are permitted (as long as the data itself does 153 | not contain any nulls). 154 | 155 | #### Coordinate axis order 156 | 157 | The axis order of the coordinates in WKB stored in a GeoParquet follows the de facto standard for axis order in WKB and is therefore always (x, y) where x is easting or longitude and y is northing or latitude. This ordering explicitly overrides the axis order as specified in the CRS. This follows the precedent of [GeoPackage](https://geopackage.org), see the [note in their spec](https://www.geopackage.org/spec130/#gpb_spec). 158 | 159 | #### geometry_types 160 | 161 | This field captures the geometry types of the geometries in the column, when known. Accepted geometry types are: `"Point"`, `"LineString"`, `"Polygon"`, `"MultiPoint"`, `"MultiLineString"`, `"MultiPolygon"`, `"GeometryCollection"`. 162 | 163 | In addition, the following rules are used: 164 | 165 | - In case of 3D geometries, a `" Z"` suffix gets added (e.g. `["Point Z"]`). 166 | - A list of multiple values indicates that multiple geometry types are present (e.g. `["Polygon", "MultiPolygon"]`). 167 | - An empty array explicitly signals that the geometry types are not known. 168 | - The geometry types in the list must be unique (e.g. `["Point", "Point"]` is not valid). 169 | 170 | It is expected that this field is strictly correct. For example, if having both polygons and multipolygons, it is not sufficient to specify `["MultiPolygon"]`, but it is expected to specify `["Polygon", "MultiPolygon"]`. Or if having 3D points, it is not sufficient to specify `["Point"]`, but it is expected to list `["Point Z"]`. 171 | 172 | #### orientation 173 | 174 | This attribute indicates the winding order of polygons. The only available value is `"counterclockwise"`. All vertices of exterior polygon rings MUST be ordered in the counterclockwise direction and all interior rings MUST be ordered in the clockwise direction. 175 | 176 | If no value is set, no assertions are made about winding order or consistency of such between exterior and interior rings or between individual geometries within a dataset. Readers are responsible for verifying and if necessary re-ordering vertices as required for their analytical representation. 177 | 178 | Writers are encouraged but not required to set `orientation="counterclockwise"` for portability of the data within the broader ecosystem. 179 | 180 | It is RECOMMENDED to always set the orientation (to counterclockwise) if `edges` is `"spherical"` (see below). 181 | 182 | #### edges 183 | 184 | This attribute indicates how to interpret the edges of the geometries: whether the line between two points is a straight cartesian line or the shortest line on the sphere (geodesic line). Available values are: 185 | - `"planar"`: use a flat cartesian coordinate system. 186 | - `"spherical"`: use a spherical coordinate system and radius derived from the spheroid defined by the coordinate reference system. 187 | 188 | If no value is set, the default value to assume is `"planar"`. 189 | 190 | Note if `edges` is `"spherical"` then it is RECOMMENDED that `orientation` is always ensured to be `"counterclockwise"`. If it is not set, it is not clear how polygons should be interpreted within spherical coordinate systems, which can lead to major analytical errors if interpreted incorrectly. In this case, software will typically interpret the rings of a polygon such that it encloses at most half of the sphere (i.e. the smallest polygon of both ways it could be interpreted). But the specification itself does not make any guarantee about this. 191 | 192 | #### bbox 193 | 194 | Bounding boxes are used to help define the spatial extent of each geometry column. Implementations of this schema may choose to use those bounding boxes to filter partitions (files) of a partitioned dataset. 195 | 196 | The bbox, if specified, MUST be encoded with an array representing the range of values for each dimension in the geometry coordinates. For geometries in a geographic coordinate reference system, longitude and latitude values are listed for the most southwesterly coordinate followed by values for the most northeasterly coordinate. This follows the GeoJSON specification ([RFC 7946, section 5](https://tools.ietf.org/html/rfc7946#section-5)), which also describes how to represent the bbox for a set of geometries that cross the antimeridian. 197 | 198 | For non-geographic coordinate reference systems, the items in the bbox are minimum values for each dimension followed by maximum values for each dimension. For example, given geometries that have coordinates with two dimensions, the bbox would have the form `[, , , ]`. For three dimensions, the bbox would have the form `[, , , , , ]`. 199 | 200 | The bbox values MUST be in the same coordinate reference system as the geometry. 201 | 202 | #### covering 203 | 204 | The covering field specifies optional simplified representations of each geometry. The keys of the "covering" object MUST be a supported encoding. Currently the only supported encoding is "bbox" which specifies the names of [bounding box columns](#bounding-box-columns) 205 | 206 | Example: 207 | ``` 208 | "covering": { 209 | "bbox": { 210 | "xmin": ["bbox", "xmin"], 211 | "ymin": ["bbox", "ymin"], 212 | "xmax": ["bbox", "xmax"], 213 | "ymax": ["bbox", "ymax"] 214 | } 215 | } 216 | ``` 217 | 218 | ##### bbox covering encoding 219 | 220 | Including a per-row bounding box can be useful for accelerating spatial queries by allowing consumers to inspect row group and page index bounding box summary statistics. Furthermore a bounding box may be used to avoid complex spatial operations by first checking for bounding box overlaps. This field captures the column name and fields containing the bounding box of the geometry for every row. 221 | 222 | The format of the `bbox` encoding is `{"xmin": ["column_name", "xmin"], "ymin": ["column_name", "ymin"], "xmax": ["column_name", "xmax"], "ymax": ["column_name", "ymax"]}`. The arrays represent Parquet schema paths for nested groups. In this example, `column_name` is a Parquet group with fields `xmin`, `ymin`, `xmax`, `ymax`. The value in `column_name` MUST exist in the Parquet file and meet the criteria in the [Bounding Box Column](#bounding-box-columns) definition. In order to constrain this value to a single bounding group field, the second item in each element MUST be `xmin`, `ymin`, etc. All values MUST use the same column name. 223 | 224 | The value specified in this field should not be confused with the top-level [`bbox`](#bbox) field which contains the single bounding box of this geometry over the whole GeoParquet file. 225 | 226 | Note: This technique to use the bounding box to improve spatial queries does not apply to geometries that cross the antimeridian. Such geometries are unsupported by this method. 227 | 228 | ### Bounding Box Columns 229 | 230 | A bounding box column MUST be a Parquet group field with 4 or 6 child fields representing the geometry's coordinate range. For two-dimensional data, the child fields MUST be named `xmin`, `ymin`, `xmax`, and `ymax` and MUST be ordered in this same way. As with the top-level [`bbox`](#bbox) column, the values follow the GeoJSON specification (RFC 7946, section 5), which also describes how to represent the bbox for geometries that cross the antimeridian. For three dimensions the additional fields `zmin` and `zmax` MAY be present but are not required. If `zmin` is present then `zmax` MUST be present and vice versa. If `zmin` and `zmax` are present, the ordering of the child fields MUST be `xmin`, `ymin`, `zmin`, `xmax`, `ymax`, `zmax`. The fields MUST be of Parquet type `FLOAT` or `DOUBLE` and all columns MUST use the same type. The repetition of a bounding box column MUST match the geometry column's [repetition](#repetition). A row MUST contain a bounding box value if and only if the row contains a geometry value. In cases where the geometry is optional and a row does not contain a geometry value, the row MUST NOT contain a bounding box value. 231 | 232 | The bounding box column MUST be at the root of the schema. The bounding box column MUST NOT be nested in a group. 233 | 234 | ### Additional information 235 | 236 | #### Feature identifiers 237 | 238 | If you are using GeoParquet to serialize geospatial data with feature identifiers, it is RECOMMENDED that you create your own [file key/value metadata](https://github.com/apache/parquet-format#metadata) to indicate the column that represents this identifier. As an example, GDAL writes additional metadata using the `gdal:schema` key including information about feature identifiers and other information outside the scope of the GeoParquet specification. 239 | 240 | ### OGC:CRS84 details 241 | 242 | The PROJJSON object for OGC:CRS84 is: 243 | 244 | ```json 245 | { 246 | "$schema": "https://proj.org/schemas/v0.5/projjson.schema.json", 247 | "type": "GeographicCRS", 248 | "name": "WGS 84 longitude-latitude", 249 | "datum": { 250 | "type": "GeodeticReferenceFrame", 251 | "name": "World Geodetic System 1984", 252 | "ellipsoid": { 253 | "name": "WGS 84", 254 | "semi_major_axis": 6378137, 255 | "inverse_flattening": 298.257223563 256 | } 257 | }, 258 | "coordinate_system": { 259 | "subtype": "ellipsoidal", 260 | "axis": [ 261 | { 262 | "name": "Geodetic longitude", 263 | "abbreviation": "Lon", 264 | "direction": "east", 265 | "unit": "degree" 266 | }, 267 | { 268 | "name": "Geodetic latitude", 269 | "abbreviation": "Lat", 270 | "direction": "north", 271 | "unit": "degree" 272 | } 273 | ] 274 | }, 275 | "id": { 276 | "authority": "OGC", 277 | "code": "CRS84" 278 | } 279 | } 280 | ``` 281 | 282 | For implementations that operate entirely with longitude, latitude coordinates and are not CRS-aware or do not have easy access to CRS-aware libraries that can fully parse PROJJSON, it may be possible to infer that coordinates conform to the OGC:CRS84 CRS based on elements of the `crs` field. For simplicity, Javascript object dot notation is used to refer to nested elements. 283 | 284 | The CRS is likely equivalent to OGC:CRS84 for a GeoParquet file if the `id` element is present: 285 | 286 | * `id.authority` = `"OGC"` and `id.code` = `"CRS84"` 287 | * `id.authority` = `"EPSG"` and `id.code` = `4326` (due to longitude, latitude ordering in this specification) 288 | 289 | It is reasonable for implementations to require that one of the above `id` elements are present and skip further tests to determine if the CRS is functionally equivalent with OGC:CRS84. 290 | 291 | Note: EPSG:4326 and OGC:CRS84 are equivalent with respect to this specification because this specification specifically overrides the coordinate axis order in the `crs` to be longitude-latitude. 292 | 293 | ## Version Compatibility 294 | 295 | GeoParquet version numbers follow [SemVer](https://semver.org), meaning patch releases are for bugfixes, minor releases represent backwards compatible changes, and major releases represent breaking changes. For this specification, a backwards compatible change means that a file written with the older specification will always be compatible with the newer specification. Minor releases are also guaranteed to be forward compatible up the the next major release. Forward compatiblity means that an implementation that is only aware of the older specification MUST be able to correctly interpret data written according to the newer specification, OR recognize that it cannot correctly interpret that data. 296 | 297 | Examples of a forward compatible change include: 298 | - Adding a new field in File or Column Metadata that can be ignored without changing the interpretation of the data (e.g. an index that can improve query performance). 299 | - Adding a new option to an existing field. 300 | 301 | Examples of a breaking change include: 302 | - Adding a new field that cannot be ignored without changing the interpretation of the data. 303 | - Changing the default value in an existing field. 304 | - Changing the meaning of an existing field value. 305 | 306 | In order to support data written according future minor relases, implementations of this specification: 307 | - SHOULD NOT reject metadata with unknown fields. 308 | - SHOULD explicitly validate all field values they rely on (e.g. an implementation of the 1.0.0 specification should validate enocoding = "WKB" even though it is the only allowed value, as new options might be added). 309 | 310 | ## File Extension 311 | 312 | It is RECOMMENDED to use `.parquet` as the file extension for a GeoParquet file. This provides the best interoperability with existing Parquet tools. The file extension `.geoparquet` SHOULD NOT be used. 313 | 314 | ## Media Type 315 | 316 | If a [media type](https://en.wikipedia.org/wiki/Media_type) (formerly: MIME type) is used, a GeoParquet file MUST use [application/vnd.apache.parquet](https://www.iana.org/assignments/media-types/application/vnd.apache.parquet) as the media type. 317 | -------------------------------------------------------------------------------- /format-specs/parquet-raster.md: -------------------------------------------------------------------------------- 1 | # [Work in Progress] Parquet Raster Specification 2 | 3 | *This spec should currently be considered of 'alpha' quality - there is much more to work out. We'd love early implementations and feedback, but please don't expect anywhere near the robustness of the main GeoParquet specification. You can track progress and contribute with the [raster tag in issues](https://github.com/opengeospatial/geoparquet/issues). 4 | 5 | ## Overview 6 | 7 | The [Apache Parquet](https://parquet.apache.org/) provides a standardized open-source columnar storage format and it also natively supports geo types (i.e., Geometry and Geography types). The Parquet Raster specification defines how geo-referenced raster imagery data (abbr., raster) should be stored in parquet format, including the representation of raster and the required additional metadata. 8 | 9 | The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", "SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this document are to be interpreted as described in [RFC 2119](https://www.ietf.org/rfc/rfc2119.txt). 10 | 11 | ## Raster columns 12 | 13 | A raster column MUST be stored as a `struct` type column in parquet files. The `struct` type MUST contain the fields defined in the following table. The `raster` column MUST be stored in the root level of the parquet file. 14 | 15 | Each raster column must also have a corresponding `Geometry` or `Geography` type column, stored in the top level of the parquet file. The name of the geometry column MUST be specified in the `geometry` field of the raster column metadata. 16 | 17 | ## Raster Representation 18 | 19 | The raster data model is largely inspired by the WKB raster encoding of PostGIS but extracts the raster metadata out of the binary encoding. It always uses the little-endian byte order for the raster data. 20 | 21 | ### Raster value 22 | 23 | A raster value is composed by the following components: 24 | 25 | | Field | Parquet Physical Type | Parquet Logical Type | Description | 26 | |--------------|-----------------------|----------------------|-------------------------------------------------------------------------| 27 | | `crs` | `BYTE_ARRAY` | UTF8 | **OPTIONAL.** The coordinate reference system of the raster | 28 | | `scale_x` | `DOUBLE` | | **REQUIRED.** The scale factor of the raster in X direction | 29 | | `scale_y` | `DOUBLE` | | **REQUIRED.** The scale factor of the raster in Y direction | 30 | | `ip_x` | `DOUBLE` | | **REQUIRED.** The X coordinate of the upper left corner of the raster | 31 | | `ip_y` | `DOUBLE` | | **REQUIRED.** The Y coordinate of the upper left corner of the raster | 32 | | `skew_x` | `DOUBLE` | | **REQUIRED.** The skew factor of the raster in X direction | 33 | | `skew_y` | `DOUBLE` | | **REQUIRED.** The skew factor of the raster in Y direction | 34 | | `width` | `INT32` | | **REQUIRED.** The width of the raster in pixels | 35 | | `height` | `INT32` | | **REQUIRED.** The height of the raster in pixels | 36 | | `bands` | `BYTE_ARRAY` | List | **REQUIRED.** The bands of the raster. See the band data encoding below | 37 | 38 | A raster is one or more grids of cells. All the grids should have `width` rows and `height` columns. The grid cells are represented by the `band` field. The grids are geo-referenced using an affine transformation that maps the grid coordinates to world coordinates. The coordinate reference system (CRS) of the world coordinates is specified by the `crs` field. For more details, please refer to the [CRS Customization](#crs-customization) section. 39 | 40 | The geo-referencing information is represented by the parameters of an affine transformation (`ip_x`, `ip_y`, `scale_x`, `scale_y`, `skew_x`, `skew_y`). This specification only supports affine transformation as geo-referencing transformation, other transformations such as polynomial transformation are not supported. 41 | 42 | The affine transformation is defined as follows: 43 | 44 | ``` 45 | world_x = ip_x + (col + 0.5) * scale_x + (row + 0.5) * skew_x 46 | world_y = ip_y + (col + 0.5) * skew_y + (row + 0.5) * scale_y 47 | ``` 48 | 49 | col = the column number (pixel index) from the left (0 is the first/leftmost column) 50 | row = the row number (pixel index) from the top (0 is the first/topmost row) 51 | 52 | The grid coordinates of a raster is always anchored at the center of grid cells. The translation factor of the affine transformation `ip_x` and `ip_y` also designates the world coordinate of the center of the upper left grid cell. 53 | 54 | This specification supports persisting raster band values in two different ways specified by the `isOffline` flag in the band data encoding. The two options are: 55 | 56 | * **in-db**: The band values are stored in the same Parquet file as the geo-referencing information. 57 | * **out-db**: The band values are stored in files external to the Parquet file. 58 | 59 | ### Band data encoding 60 | 61 | | Name | Type | Meaning | 62 | |-------------------|-------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| 63 | | `isOffline` | 1 bit | If true, data is found on external storage, through the path specified in `RASTERDATA`. | 64 | | `hasNodataValue` | 1 bit | If true, the stored nodata value is a true nodata value. Otherwise, the nodata value should be ignored. | 65 | | `isAllNodata` | 1 bit | If true, all values of the band are expected to be nodata values. This is a dirty flag; to set it properly, the function `st_bandisnodata` must be called with `TRUE` as the last argument. | 66 | | `isGZIPPed` | 1 bit | If true, the data is compressed using GZIP before being passed to the Parquet compression process. | 67 | | `pixtype` | 4 bits | Pixel type:
0: 1-bit boolean
1: 2-bit unsigned integer
2: 4-bit unsigned integer
3: 8-bit signed integer
4: 8-bit unsigned integer
5: 16-bit signed integer
6: 16-bit unsigned integer
7: 32-bit signed integer
8: 32-bit unsigned integer
10: 32-bit float
11: 64-bit float | 68 | | `nodata` | 1 to 8 bytes (depending on `pixtype` [1]) | Nodata value. | 69 | | `length` | int64 | Length of the `data` byte_array in bytes. | 70 | | `data` | byte_array | Raster band pixel data (see below). | 71 | 72 | ### In-DB pixel data encoding 73 | 74 | This encoding is used when `isOffline` flag is false. 75 | 76 | | Name | Type | Meaning | 77 | |--------------|-----------------|---------| 78 | | `pix[w*h]` | 1 to 8 bytes (depending on `pixtype` [1]) | Pixel values, row after row. `pix[0]` is the upper-left, `pix[w-1]` is the upper-right.

Endianness is specified at the start of WKB. It is implicit up to 8 bits (bit-order is most significant first). | 79 | 80 | ### Out-DB pixel data encoding 81 | 82 | This encoding is used when `isOffline` flag is true. 83 | 84 | | Name | Type | Meaning | 85 | |--------------|--------|-------------------------------------------------------------------------| 86 | | `bandNumber` | int8 | 0-based band number to use from the set available in the external file. | 87 | | `length` | int16 | Length of the `url` string in bytes. | 88 | | `url` | string | The URI of the out-db raster file (e.g., GeoTIFF files). | 89 | 90 | The allowed URI schemes are: 91 | * `file://`: Local file system 92 | * `http://`: HTTP 93 | * `https://`: HTTPS 94 | 95 | --- 96 | 97 | [1] Note: 1, 2, and 4 bit `pixtype`s are still encoded as 1 byte per value. 98 | 99 | ### CRS Customization 100 | 101 | CRS is represented as a string value. Writer and reader implementations are 102 | responsible for serializing and deserializing the CRS, respectively. 103 | 104 | As a convention to maximize the interoperability, custom CRS values can be 105 | specified by a string of the format `type:value`, where `type` is one of 106 | the following values: 107 | 108 | * `srid`: [Spatial reference identifier](https://en.wikipedia.org/wiki/Spatial_reference_system#Identifier), `value` is the SRID itself. 109 | * `projjson`: [PROJJSON](https://proj.org/en/stable/specifications/projjson.html), `value` is the PROJJSON string. 110 | 111 | 112 | ## Metadata 113 | 114 | Parquet Raster files include additional metadata at two levels: 115 | 116 | 1. File metadata indicating things like the version of this specification used 117 | 2. Column metadata with additional metadata for each raster column 118 | 119 | ### File metadata 120 | 121 | | Field Name | Type | Description | 122 | | ------------------ | ------ |----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| 123 | | version | string | **REQUIRED.** The version identifier for the Parquet Raster specification. | 124 | | primary_column | string | **REQUIRED.** The name of the "primary" raster column. In cases where a Parquet file contains multiple raster columns, the primary raster may be used by default in raster operations. | 125 | | columns | object\ | **REQUIRED.** Metadata about raster columns. Each key is the name of a raster column in the table. | 126 | 127 | At this level, additional implementation-specific fields (e.g. library name) MAY be present, and readers should be robust in ignoring those. 128 | 129 | ### Column metadata 130 | 131 | Each raster column in the dataset, although annotated with Parquet `struct` type, MUST be included in the `columns` field above with the following content, keyed by the column name: 132 | 133 | | Field Name | Type | Description | 134 | |------------| ------------ |------------------------------------------------------------------------------------------| 135 | | geometry | string | **REQUIRED.** Name of the geo-reference column to help accelerate spatial data retrieval | 136 | -------------------------------------------------------------------------------- /format-specs/schema.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "http://json-schema.org/draft-07/schema#", 3 | "title": "GeoParquet", 4 | "description": "Parquet metadata included in the geo field.", 5 | "type": "object", 6 | "required": ["version", "primary_column", "columns"], 7 | "properties": { 8 | "version": { 9 | "type": "string", 10 | "const": "1.2.0-dev" 11 | }, 12 | "primary_column": { 13 | "type": "string", 14 | "minLength": 1 15 | }, 16 | "columns": { 17 | "type": "object", 18 | "minProperties": 1, 19 | "patternProperties": { 20 | ".+": { 21 | "type": "object", 22 | "required": ["encoding", "geometry_types"], 23 | "properties": { 24 | "encoding": { 25 | "type": "string", 26 | "enum": [ 27 | "WKB", 28 | "point", 29 | "linestring", 30 | "polygon", 31 | "multipoint", 32 | "multilinestring", 33 | "multipolygon" 34 | ] 35 | }, 36 | "geometry_types": { 37 | "type": "array", 38 | "uniqueItems": true, 39 | "items": { 40 | "type": "string", 41 | "pattern": "^(GeometryCollection|(Multi)?(Point|LineString|Polygon))( Z)?$" 42 | } 43 | }, 44 | "crs": { 45 | "oneOf": [ 46 | { 47 | "$ref": "https://proj.org/schemas/v0.7/projjson.schema.json" 48 | }, 49 | { 50 | "type": "null" 51 | } 52 | ] 53 | }, 54 | "edges": { 55 | "type": "string", 56 | "enum": ["planar", "spherical"] 57 | }, 58 | "orientation": { 59 | "type": "string", 60 | "const": "counterclockwise" 61 | }, 62 | "bbox": { 63 | "type": "array", 64 | "items": { 65 | "type": "number" 66 | }, 67 | "oneOf": [ 68 | { 69 | "description": "2D bbox consisting of (xmin, ymin, xmax, ymax)", 70 | "minItems": 4, 71 | "maxItems": 4 72 | }, 73 | { 74 | "description": "3D bbox consisting of (xmin, ymin, zmin, xmax, ymax, zmax)", 75 | "minItems": 6, 76 | "maxItems": 6 77 | } 78 | ] 79 | }, 80 | "epoch": { 81 | "type": "number" 82 | }, 83 | "covering": { 84 | "type": "object", 85 | "required": [ 86 | "bbox" 87 | ], 88 | "properties": { 89 | "bbox": { 90 | "type": "object", 91 | "required": ["xmin", "xmax", "ymin", "ymax"], 92 | "properties": { 93 | "xmin": { 94 | "type": "array", 95 | "items": [ 96 | { "type": "string", "minLength": 1 }, 97 | { "const": "xmin" } 98 | ], 99 | "minItems": 2, 100 | "maxItems": 2 101 | }, 102 | "xmax": { 103 | "type": "array", 104 | "items": [ 105 | { "type": "string", "minLength": 1 }, 106 | { "const": "xmax" } 107 | ], 108 | "minItems": 2, 109 | "maxItems": 2 110 | }, 111 | "ymin": { 112 | "type": "array", 113 | "items": [ 114 | { "type": "string", "minLength": 1 }, 115 | { "const": "ymin" } 116 | ], 117 | "minItems": 2, 118 | "maxItems": 2 119 | }, 120 | "ymax": { 121 | "type": "array", 122 | "items": [ 123 | { "type": "string", "minLength": 1 }, 124 | { "const": "ymax" } 125 | ], 126 | "minItems": 2, 127 | "maxItems": 2 128 | } 129 | } 130 | } 131 | } 132 | } 133 | } 134 | } 135 | }, 136 | "additionalProperties": false 137 | } 138 | } 139 | } 140 | -------------------------------------------------------------------------------- /scripts/README.md: -------------------------------------------------------------------------------- 1 | # Helper scripts 2 | 3 | ## Usage 4 | 5 | The scripts in this directory use [uv](https://docs.astral.sh/uv/) for describing dependencies and keeping a consistent lockfile. This lockfile is useful because it ensures every contributor is able to use the exact same dependencies. 6 | 7 | To install uv, follow the [installation guide](https://docs.astral.sh/uv/getting-started/installation/). 8 | 9 | To install from the lockfile: 10 | 11 | ``` 12 | uv sync 13 | ``` 14 | 15 | To run a script, prefix it with `uv run`. For example: 16 | 17 | ``` 18 | uv run python generate_example.py 19 | ``` 20 | 21 | ### Tests 22 | 23 | To run the tests, change into the `scripts` directory and run the following: 24 | 25 | ``` 26 | uv run pytest test_json_schema.py -v 27 | ``` 28 | 29 | ### example.parquet 30 | 31 | The `example.parquet` file in the `examples` directory is generated with the `generate_example.py` script. This script needs to be updated and run any time there are changes to the "geo" file metadata or to the version constant in `schema.json`. 32 | 33 | To update the `../examples/example.parquet` file, run this from the `scripts` directory: 34 | 35 | ``` 36 | uv run python generate_example.py 37 | ``` 38 | 39 | ### nz-building-outlines to Parquet 40 | 41 | ```bash 42 | uv run python write_nz_building_outline.py \ 43 | --input nz-building-outlines.gpkg \ 44 | --output nz-building-outlines.parquet \ 45 | --compression SNAPPY 46 | ``` 47 | -------------------------------------------------------------------------------- /scripts/generate_example.py: -------------------------------------------------------------------------------- 1 | """ 2 | Generates `example.parquet` using pyarrow by running `python example.py`. 3 | 4 | You can print the metadata with: 5 | 6 | .. code-block:: python 7 | 8 | >>> import json, pprint, pyarrow.parquet as pq 9 | >>> pprint.pprint(json.loads(pq.read_schema("example.parquet").metadata[b"geo"])) 10 | """ 11 | 12 | from collections import OrderedDict 13 | import json 14 | import pathlib 15 | 16 | import pandas as pd 17 | import geopandas 18 | import pyarrow as pa 19 | import pyarrow.parquet as pq 20 | 21 | HERE = pathlib.Path(__file__).parent 22 | 23 | df = pd.read_csv(HERE.parent / "examples" / "example.csv") 24 | df = geopandas.GeoDataFrame( 25 | df, geometry=geopandas.GeoSeries.from_wkt(df.geometry, crs="OGC:CRS84") 26 | ) 27 | 28 | geometry_bbox = df.bounds.rename( 29 | OrderedDict( 30 | [("minx", "xmin"), ("miny", "ymin"), ("maxx", "xmax"), ("maxy", "ymax")] 31 | ), 32 | axis=1, 33 | ) 34 | df["bbox"] = geometry_bbox.to_dict("records") 35 | table = pa.Table.from_pandas(df.head().to_wkb()) 36 | 37 | 38 | def get_version() -> str: 39 | """Read the version const from the schema.json file""" 40 | with open(HERE / "../format-specs/schema.json") as f: 41 | spec_schema = json.load(f) 42 | return spec_schema["properties"]["version"]["const"] 43 | 44 | 45 | metadata = { 46 | "version": get_version(), 47 | "primary_column": "geometry", 48 | "columns": { 49 | "geometry": { 50 | "encoding": "WKB", 51 | "geometry_types": ["Polygon", "MultiPolygon"], 52 | "crs": json.loads(df.crs.to_json()), 53 | "edges": "planar", 54 | "bbox": [round(x, 4) for x in df.total_bounds], 55 | "covering": { 56 | "bbox": { 57 | "xmin": ["bbox", "xmin"], 58 | "ymin": ["bbox", "ymin"], 59 | "xmax": ["bbox", "xmax"], 60 | "ymax": ["bbox", "ymax"], 61 | }, 62 | }, 63 | }, 64 | }, 65 | } 66 | 67 | schema = table.schema.with_metadata({"geo": json.dumps(metadata)}) 68 | table = table.cast(schema) 69 | 70 | pq.write_table(table, HERE / "../examples/example.parquet") 71 | -------------------------------------------------------------------------------- /scripts/pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "geoparquet-scripts" 3 | version = "0.1.0" 4 | description = "" 5 | authors = [] 6 | license = "MIT" 7 | requires-python = ">=3.10" 8 | 9 | dependencies = [ 10 | "black", 11 | "click", 12 | "geopandas >= 1.0.0", 13 | "isort", 14 | "jsonschema", 15 | "pandas", 16 | "pyarrow >= 20.0.0", 17 | "numpy >= 2.0.0", 18 | "pytest", 19 | "shapely >= 2.1.0", 20 | ] 21 | -------------------------------------------------------------------------------- /scripts/test_json_schema.py: -------------------------------------------------------------------------------- 1 | """ 2 | Test cases (valid and invalid ones) to test the JSON schema. 3 | 4 | Run tests with `pytest test_json_schema.py` 5 | 6 | Test cases are generated on the fly, but if you want to have them written 7 | as .json files to inspect, run `python test_json_schema.py` 8 | 9 | """ 10 | import copy 11 | import json 12 | import pathlib 13 | 14 | from jsonschema.validators import Draft7Validator 15 | 16 | import pytest 17 | 18 | 19 | HERE = pathlib.Path(__file__).parent 20 | SCHEMA_SRC = HERE / ".." / "format-specs" / "schema.json" 21 | SCHEMA = json.loads(SCHEMA_SRC.read_text()) 22 | 23 | 24 | # # Define test cases 25 | 26 | valid_cases = {} 27 | invalid_cases = {} 28 | 29 | 30 | def get_version() -> str: 31 | """Read the version const from the schema.json file""" 32 | with open(HERE / "../format-specs/schema.json") as f: 33 | spec_schema = json.load(f) 34 | return spec_schema["properties"]["version"]["const"] 35 | 36 | 37 | metadata_template = { 38 | "version": get_version(), 39 | "primary_column": "geometry", 40 | "columns": { 41 | "geometry": { 42 | "encoding": "WKB", 43 | "geometry_types": [] 44 | } 45 | }, 46 | } 47 | 48 | 49 | # Minimum required metadata 50 | 51 | metadata = copy.deepcopy(metadata_template) 52 | valid_cases["minimal"] = metadata 53 | 54 | metadata = copy.deepcopy(metadata_template) 55 | metadata.pop("version") 56 | invalid_cases["missing_version"] = metadata 57 | 58 | metadata = copy.deepcopy(metadata_template) 59 | metadata["version"] = "bad-version" 60 | invalid_cases["bad_version"] = metadata 61 | 62 | metadata = copy.deepcopy(metadata_template) 63 | metadata.pop("primary_column") 64 | invalid_cases["missing_primary_column"] = metadata 65 | 66 | metadata = copy.deepcopy(metadata_template) 67 | metadata.pop("columns") 68 | invalid_cases["missing_columns"] = metadata 69 | 70 | metadata = copy.deepcopy(metadata_template) 71 | metadata["columns"] = {} 72 | invalid_cases["missing_columns_entry"] = metadata 73 | 74 | metadata = copy.deepcopy(metadata_template) 75 | metadata["columns"]["geometry"].pop("encoding") 76 | invalid_cases["missing_geometry_encoding"] = metadata 77 | 78 | metadata = copy.deepcopy(metadata_template) 79 | metadata["columns"]["geometry"].pop("geometry_types") 80 | invalid_cases["missing_geometry_type"] = metadata 81 | 82 | metadata = copy.deepcopy(metadata_template) 83 | metadata["custom_key"] = "value" 84 | valid_cases["custom_key"] = metadata 85 | 86 | metadata = copy.deepcopy(metadata_template) 87 | metadata["columns"]["geometry"]["custom_key"] = "value" 88 | valid_cases["custom_key_column"] = metadata 89 | 90 | 91 | # Geometry columns 92 | 93 | metadata = copy.deepcopy(metadata_template) 94 | metadata["columns"]["other_geom"] = copy.deepcopy(metadata["columns"]["geometry"]) 95 | valid_cases["geometry_columns_multiple"] = metadata 96 | 97 | metadata = copy.deepcopy(metadata_template) 98 | metadata["columns"]["invalid_column_object"] = "foo" 99 | invalid_cases["geometry_columns_invalid_object"] = metadata 100 | 101 | 102 | # Geometry column name 103 | 104 | metadata = copy.deepcopy(metadata_template) 105 | metadata["primary_column"] = "geom" 106 | metadata["columns"]["geom"] = metadata["columns"].pop("geometry") 107 | valid_cases["geometry_column_name"] = metadata 108 | 109 | metadata = copy.deepcopy(metadata_template) 110 | metadata["primary_column"] = "" 111 | invalid_cases["geometry_column_name_primary_empty"] = metadata 112 | 113 | metadata = copy.deepcopy(metadata_template) 114 | metadata["columns"][""] = metadata["columns"]["geometry"] 115 | invalid_cases["geometry_column_name_empty"] = metadata 116 | 117 | 118 | # Encoding 119 | 120 | metadata = copy.deepcopy(metadata_template) 121 | metadata["columns"]["geometry"]["encoding"] = "WKT" 122 | invalid_cases["encoding"] = metadata 123 | 124 | 125 | # Geometry type - non-empty list 126 | 127 | metadata = copy.deepcopy(metadata_template) 128 | metadata["columns"]["geometry"]["geometry_types"] = ["Point"] 129 | valid_cases["geometry_type_list"] = metadata 130 | 131 | metadata = copy.deepcopy(metadata_template) 132 | metadata["columns"]["geometry"]["geometry_types"] = "Point" 133 | invalid_cases["geometry_type_string"] = metadata 134 | 135 | metadata = copy.deepcopy(metadata_template) 136 | metadata["columns"]["geometry"]["geometry_types"] = ["Curve"] 137 | invalid_cases["geometry_type_nonexistent"] = metadata 138 | 139 | metadata = copy.deepcopy(metadata_template) 140 | metadata["columns"]["geometry"]["geometry_types"] = ["Point", "Point"] 141 | invalid_cases["geometry_type_uniqueness"] = metadata 142 | 143 | metadata = copy.deepcopy(metadata_template) 144 | metadata["columns"]["geometry"]["geometry_types"] = ["PointZ"] 145 | invalid_cases["geometry_type_z_missing_space"] = metadata 146 | 147 | 148 | # CRS - explicit null 149 | 150 | metadata = copy.deepcopy(metadata_template) 151 | metadata["columns"]["geometry"]["crs"] = None 152 | valid_cases["crs_null"] = metadata 153 | 154 | metadata = copy.deepcopy(metadata_template) 155 | metadata["columns"]["geometry"]["crs"] = "EPSG:4326" 156 | invalid_cases["crs_string"] = metadata 157 | 158 | 159 | # Bbox 160 | 161 | metadata = copy.deepcopy(metadata_template) 162 | metadata["columns"]["geometry"]["bbox"] = [0, 0, 0, 0] 163 | valid_cases["bbox_4_element"] = metadata 164 | 165 | metadata = copy.deepcopy(metadata_template) 166 | metadata["columns"]["geometry"]["bbox"] = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0] 167 | valid_cases["bbox_6_element"] = metadata 168 | 169 | for n in [3, 5, 7]: 170 | metadata = copy.deepcopy(metadata_template) 171 | metadata["columns"]["geometry"]["bbox"] = [0] * n 172 | invalid_cases[f"bbox_{str(n)}_element"] = metadata 173 | 174 | metadata = copy.deepcopy(metadata_template) 175 | metadata["columns"]["geometry"]["bbox"] = ["0", "0", "0", "0"] 176 | invalid_cases["bbox_invalid_type"] = metadata 177 | 178 | 179 | # Orientation 180 | 181 | metadata = copy.deepcopy(metadata_template) 182 | metadata["columns"]["geometry"]["orientation"] = "counterclockwise" 183 | valid_cases["orientation"] = metadata 184 | 185 | metadata = copy.deepcopy(metadata_template) 186 | metadata["columns"]["geometry"]["orientation"] = "clockwise" 187 | invalid_cases["orientation"] = metadata 188 | 189 | # Edges 190 | 191 | metadata = copy.deepcopy(metadata_template) 192 | metadata["columns"]["geometry"]["edges"] = "planar" 193 | valid_cases["edges_planar"] = metadata 194 | 195 | metadata = copy.deepcopy(metadata_template) 196 | metadata["columns"]["geometry"]["edges"] = "spherical" 197 | valid_cases["edges_spherical"] = metadata 198 | 199 | metadata = copy.deepcopy(metadata_template) 200 | metadata["columns"]["geometry"]["edges"] = "ellipsoid" 201 | invalid_cases["edges"] = metadata 202 | 203 | # Epoch 204 | 205 | metadata = copy.deepcopy(metadata_template) 206 | metadata["columns"]["geometry"]["epoch"] = 2015.1 207 | valid_cases["epoch"] = metadata 208 | 209 | metadata = copy.deepcopy(metadata_template) 210 | metadata["columns"]["geometry"]["epoch"] = "2015.1" 211 | invalid_cases["epoch_string"] = metadata 212 | 213 | # Geometry Bbox 214 | metadata_covering_template = copy.deepcopy(metadata_template) 215 | metadata_covering_template["columns"]["geometry"]["covering"] = { 216 | "bbox": { 217 | "xmin": ["bbox", "xmin"], 218 | "ymin": ["bbox", "ymin"], 219 | "xmax": ["bbox", "xmax"], 220 | "ymax": ["bbox", "ymax"], 221 | }, 222 | } 223 | 224 | # Allow "any_column.xmin" etc. 225 | metadata = copy.deepcopy(metadata_covering_template) 226 | valid_cases["valid_default_bbox"] = metadata 227 | 228 | metadata = copy.deepcopy(metadata_covering_template) 229 | metadata["columns"]["geometry"]["covering"]["bbox"] = { 230 | "xmin": ["any_column", "xmin"], 231 | "ymin": ["any_column", "ymin"], 232 | "xmax": ["any_column", "xmax"], 233 | "ymax": ["any_column", "ymax"], 234 | } 235 | valid_cases["valid_but_not_bbox_struct_name"] = metadata 236 | 237 | metadata = copy.deepcopy(metadata_covering_template) 238 | metadata["columns"]["geometry"]["covering"]["bbox"] = { 239 | "xmin": ["", "xmin"], 240 | "ymin": ["", "ymin"], 241 | "xmax": ["", "xmax"], 242 | "ymax": ["", "ymax"], 243 | } 244 | invalid_cases["empty_column_name"] = metadata 245 | 246 | metadata = copy.deepcopy(metadata_covering_template) 247 | metadata["columns"]["geometry"]["covering"]["bbox"]["xmin"] = [] 248 | invalid_cases["xmin_array_length_must_be_2_is_0"] = metadata 249 | 250 | metadata = copy.deepcopy(metadata_covering_template) 251 | metadata["columns"]["geometry"]["covering"]["bbox"]["ymax"] = [] 252 | invalid_cases["ymax_array_length_must_be_2_is_0"] = metadata 253 | 254 | metadata = copy.deepcopy(metadata_covering_template) 255 | metadata["columns"]["geometry"]["covering"]["bbox"]["ymin"] = ["column"] 256 | invalid_cases["ymin_array_length_must_be_2_is_1"] = metadata 257 | 258 | metadata = copy.deepcopy(metadata_covering_template) 259 | metadata["columns"]["geometry"]["covering"]["bbox"]["xmax"] = ["column"] 260 | invalid_cases["xmax_array_length_must_be_2_is_1"] = metadata 261 | 262 | metadata = copy.deepcopy(metadata_covering_template) 263 | metadata["columns"]["geometry"]["covering"]["bbox"]["xmin"] = ["xmin", "xmin", "xmin"] 264 | invalid_cases["xmin_array_length_must_be_2_is_3"] = metadata 265 | 266 | metadata = copy.deepcopy(metadata_covering_template) 267 | metadata["columns"]["geometry"]["covering"].pop("bbox") 268 | invalid_cases["empty_geometry_bbox"] = metadata 269 | 270 | metadata = copy.deepcopy(metadata_covering_template) 271 | metadata["columns"]["geometry"]["covering"]["bbox"] = {} 272 | invalid_cases["empty_geometry_bbox_missing_fields"] = metadata 273 | 274 | metadata = copy.deepcopy(metadata_covering_template) 275 | metadata["columns"]["geometry"]["covering"]["bbox"].pop("xmin") 276 | invalid_cases["covering_bbox_missing_xmin"] = metadata 277 | 278 | metadata = copy.deepcopy(metadata_covering_template) 279 | metadata["columns"]["geometry"]["covering"]["bbox"].pop("ymin") 280 | invalid_cases["covering_bbox_missing_ymin"] = metadata 281 | 282 | metadata = copy.deepcopy(metadata_covering_template) 283 | metadata["columns"]["geometry"]["covering"]["bbox"].pop("xmax") 284 | invalid_cases["covering_bbox_missing_xmax"] = metadata 285 | 286 | metadata = copy.deepcopy(metadata_covering_template) 287 | metadata["columns"]["geometry"]["covering"]["bbox"].pop("ymax") 288 | invalid_cases["covering_bbox_missing_ymax"] = metadata 289 | 290 | # Invalid bbox xmin/xmax/ymin/ymax values 291 | metadata = copy.deepcopy(metadata_covering_template) 292 | metadata["columns"]["geometry"]["covering"]["bbox"]["xmin"] = ["bbox", "not_xmin"] 293 | invalid_cases["covering_bbox_invalid_xmin"] = metadata 294 | 295 | metadata = copy.deepcopy(metadata_covering_template) 296 | metadata["columns"]["geometry"]["covering"]["bbox"]["xmax"] = ["bbox", "not_xmax"] 297 | invalid_cases["covering_bbox_invalid_xmax"] = metadata 298 | 299 | metadata = copy.deepcopy(metadata_covering_template) 300 | metadata["columns"]["geometry"]["covering"]["bbox"]["ymin"] = ["bbox", "not_ymin"] 301 | invalid_cases["covering_bbox_invalid_ymin"] = metadata 302 | 303 | metadata = copy.deepcopy(metadata_covering_template) 304 | metadata["columns"]["geometry"]["covering"]["bbox"]["ymax"] = ["bbox", "not_ymax"] 305 | invalid_cases["covering_bbox_invalid_ymax"] = metadata 306 | 307 | metadata = copy.deepcopy(metadata_covering_template) 308 | metadata["columns"]["geometry"]["covering"]["bbox"]["xmin"] = ["bbox", "xmin", "invalid_extra"] 309 | invalid_cases["covering_bbox_extra_xmin_elements"] = metadata 310 | 311 | metadata = copy.deepcopy(metadata_covering_template) 312 | metadata["columns"]["geometry"]["covering"]["bbox"]["xmax"] = ["bbox", "xmax", "invalid_extra"] 313 | invalid_cases["covering_bbox_extra_xmax_elements"] = metadata 314 | 315 | metadata = copy.deepcopy(metadata_covering_template) 316 | metadata["columns"]["geometry"]["covering"]["bbox"]["ymin"] = ["bbox", "ymin", "invalid_extra"] 317 | invalid_cases["covering_bbox_extra_ymin_elements"] = metadata 318 | 319 | metadata = copy.deepcopy(metadata_covering_template) 320 | metadata["columns"]["geometry"]["covering"]["bbox"]["ymax"] = ["bbox", "ymax", "invalid_extra"] 321 | invalid_cases["covering_bbox_extra_ymax_elements"] = metadata 322 | 323 | 324 | # # Tests 325 | 326 | @pytest.mark.parametrize( 327 | "metadata", valid_cases.values(), ids=valid_cases.keys() 328 | ) 329 | def test_valid_schema(request, metadata): 330 | errors = Draft7Validator(SCHEMA).iter_errors(metadata) 331 | 332 | msgs = [] 333 | valid = True 334 | for error in errors: 335 | valid = False 336 | msg = f"- {error.json_path}: {error.message}" 337 | if "description" in error.schema: 338 | msg += f". {error.schema['description']}" 339 | msgs.append(msg) 340 | 341 | if not valid: 342 | raise AssertionError( 343 | f"Error while validating '{request.node.callspec.id}':\n" 344 | + json.dumps({"geo": metadata}, indent=2, sort_keys=True) 345 | + "\n\nErrors:\n" + "\n".join(msgs) 346 | ) 347 | 348 | 349 | @pytest.mark.parametrize( 350 | "metadata", invalid_cases.values(), ids=invalid_cases.keys() 351 | ) 352 | def test_invalid_schema(request, metadata): 353 | errors = Draft7Validator(SCHEMA).iter_errors(metadata) 354 | 355 | if not len(list(errors)): 356 | raise AssertionError( 357 | "This is an invalid GeoParquet file, but no validation error " 358 | f"occurred for '{request.node.callspec.id}':\n" 359 | + json.dumps({"geo": metadata}, indent=2, sort_keys=True) 360 | ) 361 | 362 | 363 | if __name__ == "__main__": 364 | (HERE / "data").mkdir(exist_ok=True) 365 | 366 | def write_metadata_json(metadata, name): 367 | with open(HERE / "data" / ("metadata_" + name + ".json"), "w") as f: 368 | json.dump({"geo": metadata}, f, indent=2, sort_keys=True) 369 | 370 | for case, metadata in valid_cases.items(): 371 | write_metadata_json(metadata, "valid_" + case) 372 | 373 | for case, metadata in invalid_cases.items(): 374 | write_metadata_json(metadata, "invalid_" + case) 375 | -------------------------------------------------------------------------------- /scripts/update_example_schemas.py: -------------------------------------------------------------------------------- 1 | import json 2 | from pathlib import Path 3 | from typing import Any, Dict, Union 4 | 5 | import pyarrow.parquet as pq 6 | 7 | 8 | def find_root_dir() -> Path: 9 | """Find root geoparquet directory 10 | 11 | This is a more robust approach than assuming __file__ refers to a specific place 12 | """ 13 | folder = Path(__file__).resolve() 14 | 15 | while folder.name != "geoparquet": 16 | if folder == Path("/"): 17 | raise ValueError("Could not find geoparquet folder") 18 | 19 | folder = folder.parent 20 | 21 | return folder 22 | 23 | 24 | def copy_parquet_schema_metadata_to_json(parquet_file: Path) -> None: 25 | """Copy Parquet schema metadata to a neighboring JSON file""" 26 | neighboring_json_file = parquet_file.parent / f"{parquet_file.stem}_metadata.json" 27 | schema = pq.read_schema(parquet_file) 28 | decoded_meta = decode_dict(schema.metadata) 29 | 30 | with open(neighboring_json_file, "w") as f: 31 | json.dump(decoded_meta, f, indent=2, sort_keys=True) 32 | 33 | 34 | def decode_dict(d: Dict[Union[bytes, str], Union[bytes, Any]]) -> Dict[str, Any]: 35 | """Decode binary keys and values to string and dict objects""" 36 | new_dict = {} 37 | for key, val in d.items(): 38 | new_key = key.decode() if isinstance(key, bytes) else key 39 | new_val = val.decode() if isinstance(val, bytes) else val 40 | if new_val.lstrip().startswith("{"): 41 | new_val = json.loads(new_val) 42 | 43 | new_dict[new_key] = new_val 44 | 45 | return new_dict 46 | 47 | 48 | def main(): 49 | examples_dir = find_root_dir() / 'examples' 50 | for parquet_file in examples_dir.glob("*.parquet"): 51 | copy_parquet_schema_metadata_to_json(parquet_file) 52 | 53 | 54 | if __name__ == "__main__": 55 | main() 56 | -------------------------------------------------------------------------------- /scripts/write_nz_building_outline.py: -------------------------------------------------------------------------------- 1 | import json 2 | import sys 3 | from pathlib import Path 4 | from typing import Any, Dict, List 5 | 6 | import click 7 | import geopandas as gpd 8 | import numpy as np 9 | import pandas as pd 10 | import pyarrow as pa 11 | import pyarrow.parquet as pq 12 | import shapely 13 | from numpy.typing import NDArray 14 | from shapely import GeometryType 15 | 16 | AVAILABLE_COMPRESSIONS = ["NONE", "SNAPPY", "GZIP", "BROTLI", "LZ4", "ZSTD"] 17 | 18 | ShapelyGeometryArray = NDArray[np.object_] 19 | 20 | 21 | class PathType(click.Path): 22 | """A Click path argument that returns a pathlib Path, not a string""" 23 | 24 | def convert(self, value, param, ctx): 25 | return Path(super().convert(value, param, ctx)) 26 | 27 | 28 | def parse_to_shapely(df: gpd.GeoDataFrame) -> Dict[str, ShapelyGeometryArray]: 29 | """Parse to shapely geometry array 30 | 31 | This is split out from _create_metadata so that we don't have to create the pygeos 32 | array twice: once for converting to wkb and another time for metadata handling. 33 | """ 34 | geometry_columns: Dict[str, ShapelyGeometryArray] = {} 35 | for col in df.columns[df.dtypes == "geometry"]: 36 | geometry_columns[col] = df[col].values 37 | 38 | return geometry_columns 39 | 40 | 41 | def _create_metadata( 42 | df: gpd.GeoDataFrame, geometry_columns: Dict[str, ShapelyGeometryArray] 43 | ) -> Dict[str, Any]: 44 | """Create and encode geo metadata dict. 45 | 46 | Parameters 47 | ---------- 48 | df : GeoDataFrame 49 | 50 | Returns 51 | ------- 52 | dict 53 | """ 54 | 55 | # Construct metadata for each geometry 56 | column_metadata = {} 57 | for col, geometry_array in geometry_columns.items(): 58 | geometry_types = get_geometry_types(geometry_array) 59 | bbox = list(shapely.total_bounds(geometry_array)) 60 | 61 | series = df[col] 62 | column_metadata[col] = { 63 | "encoding": "WKB", 64 | "geometry_types": geometry_types, 65 | "crs": series.crs.to_json_dict() if series.crs else None, 66 | # We don't specify orientation for now 67 | # "orientation" 68 | "edges": "planar", 69 | "bbox": bbox, 70 | # I don't know how to get the epoch from a pyproj CRS, and if it's relevant 71 | # here 72 | # "epoch": 73 | } 74 | 75 | with open("../format-specs/schema.json") as f: 76 | spec_schema = json.load(f) 77 | version = spec_schema["properties"]["version"]["const"] 78 | 79 | return { 80 | "version": version, 81 | "primary_column": df._geometry_column_name, 82 | "columns": column_metadata, 83 | # "creator": {"library": "geopandas", "version": geopandas.__version__}, 84 | } 85 | 86 | 87 | def get_geometry_types(shapely_geoms: ShapelyGeometryArray) -> List[str]: 88 | type_ids = shapely.get_type_id(shapely_geoms) 89 | unique_type_ids = set(type_ids) 90 | 91 | geom_type_names: List[str] = [] 92 | for type_id in unique_type_ids: 93 | geom_type_names.append(GeometryType(type_id).name) 94 | 95 | return geom_type_names 96 | 97 | 98 | def encode_metadata(metadata: Dict) -> bytes: 99 | """Encode metadata dict to UTF-8 JSON string 100 | 101 | Parameters 102 | ---------- 103 | metadata : dict 104 | 105 | Returns 106 | ------- 107 | UTF-8 encoded JSON string 108 | """ 109 | # Remove unnecessary whitespace in JSON metadata 110 | # https://stackoverflow.com/a/33233406 111 | return json.dumps(metadata, separators=(",", ":")).encode("utf-8") 112 | 113 | 114 | def cast_dtypes(df: gpd.GeoDataFrame) -> gpd.GeoDataFrame: 115 | """ 116 | Note: This is specific to the nz-building-outlines data 117 | See reference here: 118 | https://nz-buildings.readthedocs.io/en/latest/published_data.html#table-nz-building-outlines 119 | """ 120 | # Double checks 121 | assert df["building_id"].min() >= np.iinfo(np.int32).min 122 | assert df["building_id"].max() <= np.iinfo(np.int32).max 123 | df["building_id"] = df["building_id"].astype(np.int32) 124 | 125 | assert df["capture_source_id"].min() >= np.iinfo(np.int32).min 126 | assert df["capture_source_id"].max() <= np.iinfo(np.int32).max 127 | df["capture_source_id"] = df["capture_source_id"].astype(np.int32) 128 | 129 | for date_col in ["capture_source_from", "capture_source_to", "last_modified"]: 130 | df[date_col] = pd.to_datetime(df[date_col], format="%Y-%m-%d") 131 | 132 | return df 133 | 134 | 135 | def geopandas_to_arrow(df: gpd.GeoDataFrame) -> pa.Table: 136 | geometry_columns = parse_to_shapely(df) 137 | geo_metadata = _create_metadata(df, geometry_columns) 138 | 139 | df = pd.DataFrame(df) 140 | for col, geometry_array in geometry_columns.items(): 141 | df[col] = shapely.to_wkb(geometry_array) 142 | 143 | table = pa.Table.from_pandas(df, preserve_index=False) 144 | 145 | metadata = table.schema.metadata 146 | metadata.update({b"geo": encode_metadata(geo_metadata)}) 147 | return table.replace_schema_metadata(metadata) 148 | 149 | 150 | @click.command() 151 | @click.option( 152 | "-i", 153 | "--input", 154 | type=PathType(exists=True, file_okay=True, dir_okay=False, readable=True), 155 | help="Path to input nz-building-outlines.gpkg", 156 | required=True, 157 | ) 158 | @click.option( 159 | "--layer-name", 160 | type=str, 161 | required=False, 162 | help="Name of layer within GeoPackage", 163 | show_default=True, 164 | default="nz_building_outlines", 165 | ) 166 | @click.option( 167 | "-o", 168 | "--output", 169 | type=PathType(file_okay=True, dir_okay=False, writable=True), 170 | help="Path to output Parquet file.", 171 | required=True, 172 | ) 173 | @click.option( 174 | "--compression", 175 | type=click.Choice(AVAILABLE_COMPRESSIONS, case_sensitive=False), 176 | default="SNAPPY", 177 | help="Compression codec to use when writing to Parquet.", 178 | show_default=True, 179 | ) 180 | def main(input: Path, layer_name: str, output: Path, compression: str): 181 | print("Starting to read geopackage", file=sys.stderr) 182 | df = gpd.read_file(input, layer=layer_name) 183 | print("Finished reading geopackage", file=sys.stderr) 184 | df = cast_dtypes(df) 185 | 186 | print("Starting conversion to Arrow", file=sys.stderr) 187 | arrow_table = geopandas_to_arrow(df) 188 | print("Finished conversion to Arrow", file=sys.stderr) 189 | print("Starting write to Parquet", file=sys.stderr) 190 | pq.write_table(arrow_table, output, compression=compression) 191 | print("Finished write to Parquet", file=sys.stderr) 192 | 193 | 194 | if __name__ == "__main__": 195 | main() 196 | -------------------------------------------------------------------------------- /test_data/data-linestring-encoding_native.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opengeospatial/geoparquet/d727b4cd568651911860fec013982a06c353b9a0/test_data/data-linestring-encoding_native.parquet -------------------------------------------------------------------------------- /test_data/data-linestring-encoding_wkb.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opengeospatial/geoparquet/d727b4cd568651911860fec013982a06c353b9a0/test_data/data-linestring-encoding_wkb.parquet -------------------------------------------------------------------------------- /test_data/data-linestring-wkt.csv: -------------------------------------------------------------------------------- 1 | "col","geometry" 2 | 0,"LINESTRING (30 10, 10 30, 40 40)" 3 | 1,"LINESTRING EMPTY" 4 | 2, 5 | -------------------------------------------------------------------------------- /test_data/data-multilinestring-encoding_native.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opengeospatial/geoparquet/d727b4cd568651911860fec013982a06c353b9a0/test_data/data-multilinestring-encoding_native.parquet -------------------------------------------------------------------------------- /test_data/data-multilinestring-encoding_wkb.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opengeospatial/geoparquet/d727b4cd568651911860fec013982a06c353b9a0/test_data/data-multilinestring-encoding_wkb.parquet -------------------------------------------------------------------------------- /test_data/data-multilinestring-wkt.csv: -------------------------------------------------------------------------------- 1 | "col","geometry" 2 | 0,"MULTILINESTRING ((30 10, 10 30, 40 40))" 3 | 1,"MULTILINESTRING ((10 10, 20 20, 10 40), (40 40, 30 30, 40 20, 30 10))" 4 | 2,"MULTILINESTRING EMPTY" 5 | 3, 6 | -------------------------------------------------------------------------------- /test_data/data-multipoint-encoding_native.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opengeospatial/geoparquet/d727b4cd568651911860fec013982a06c353b9a0/test_data/data-multipoint-encoding_native.parquet -------------------------------------------------------------------------------- /test_data/data-multipoint-encoding_wkb.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opengeospatial/geoparquet/d727b4cd568651911860fec013982a06c353b9a0/test_data/data-multipoint-encoding_wkb.parquet -------------------------------------------------------------------------------- /test_data/data-multipoint-wkt.csv: -------------------------------------------------------------------------------- 1 | "col","geometry" 2 | 0,"MULTIPOINT ((30 10))" 3 | 1,"MULTIPOINT ((10 40), (40 30), (20 20), (30 10))" 4 | 2,"MULTIPOINT EMPTY" 5 | 3, 6 | -------------------------------------------------------------------------------- /test_data/data-multipolygon-encoding_native.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opengeospatial/geoparquet/d727b4cd568651911860fec013982a06c353b9a0/test_data/data-multipolygon-encoding_native.parquet -------------------------------------------------------------------------------- /test_data/data-multipolygon-encoding_wkb.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opengeospatial/geoparquet/d727b4cd568651911860fec013982a06c353b9a0/test_data/data-multipolygon-encoding_wkb.parquet -------------------------------------------------------------------------------- /test_data/data-multipolygon-wkt.csv: -------------------------------------------------------------------------------- 1 | "col","geometry" 2 | 0,"MULTIPOLYGON (((30 10, 40 40, 20 40, 10 20, 30 10)))" 3 | 1,"MULTIPOLYGON (((30 20, 45 40, 10 40, 30 20)), ((15 5, 40 10, 10 20, 5 10, 15 5)))" 4 | 2,"MULTIPOLYGON (((40 40, 20 45, 45 30, 40 40)), ((20 35, 10 30, 10 10, 30 5, 45 20, 20 35), (30 20, 20 15, 20 25, 30 20)))" 5 | 3,"MULTIPOLYGON EMPTY" 6 | 4, 7 | -------------------------------------------------------------------------------- /test_data/data-point-encoding_native.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opengeospatial/geoparquet/d727b4cd568651911860fec013982a06c353b9a0/test_data/data-point-encoding_native.parquet -------------------------------------------------------------------------------- /test_data/data-point-encoding_wkb.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opengeospatial/geoparquet/d727b4cd568651911860fec013982a06c353b9a0/test_data/data-point-encoding_wkb.parquet -------------------------------------------------------------------------------- /test_data/data-point-wkt.csv: -------------------------------------------------------------------------------- 1 | "col","geometry" 2 | 0,"POINT (30 10)" 3 | 1,"POINT EMPTY" 4 | 2, 5 | 3,"POINT (40 40)" 6 | -------------------------------------------------------------------------------- /test_data/data-polygon-encoding_native.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opengeospatial/geoparquet/d727b4cd568651911860fec013982a06c353b9a0/test_data/data-polygon-encoding_native.parquet -------------------------------------------------------------------------------- /test_data/data-polygon-encoding_wkb.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opengeospatial/geoparquet/d727b4cd568651911860fec013982a06c353b9a0/test_data/data-polygon-encoding_wkb.parquet -------------------------------------------------------------------------------- /test_data/data-polygon-wkt.csv: -------------------------------------------------------------------------------- 1 | "col","geometry" 2 | 0,"POLYGON ((30 10, 40 40, 20 40, 10 20, 30 10))" 3 | 1,"POLYGON ((35 10, 45 45, 15 40, 10 20, 35 10), (20 30, 35 35, 30 20, 20 30))" 4 | 2,"POLYGON EMPTY" 5 | 3, 6 | -------------------------------------------------------------------------------- /test_data/generate_test_data.py: -------------------------------------------------------------------------------- 1 | """ 2 | Generates example data using pyarrow by running `python generate_test_data.py`. 3 | 4 | You can print the metadata with: 5 | 6 | .. code-block:: python 7 | 8 | >>> import json, pprint, pyarrow.parquet as pq 9 | >>> pprint.pprint(json.loads(pq.read_schema("example.parquet").metadata[b"geo"])) 10 | """ 11 | import json 12 | import pathlib 13 | import copy 14 | 15 | import numpy as np 16 | import pyarrow as pa 17 | import pyarrow.parquet as pq 18 | from pyarrow.csv import write_csv 19 | 20 | from shapely import from_wkt, to_wkb 21 | 22 | 23 | HERE = pathlib.Path(__file__).parent 24 | 25 | 26 | metadata_template = { 27 | "version": "1.1.0", 28 | "primary_column": "geometry", 29 | "columns": { 30 | "geometry": { 31 | "encoding": "WKB", 32 | "geometry_types": [], 33 | }, 34 | }, 35 | } 36 | 37 | 38 | ## Various geometry types with WKB and native (GeoArrow-based) encodings 39 | 40 | def write_encoding_files(geometries_wkt, geometries_geoarrow, geometry_type): 41 | 42 | table = pa.table({"col": range(len(geometries_wkt)), "geometry": geometries_wkt}) 43 | write_csv(table, HERE / f"data-{geometry_type.lower()}-wkt.csv") 44 | 45 | # WKB encoding 46 | table = pa.table( 47 | {"col": range(len(geometries_wkt)), "geometry": to_wkb(from_wkt(geometries_wkt))} 48 | ) 49 | metadata = copy.deepcopy(metadata_template) 50 | metadata["columns"]["geometry"]["geometry_types"] = [geometry_type] 51 | table = table.replace_schema_metadata({"geo": json.dumps(metadata)}) 52 | pq.write_table(table, HERE / f"data-{geometry_type.lower()}-encoding_wkb.parquet") 53 | 54 | # native (geoarrow) encoding 55 | table = pa.table( 56 | {"col": range(len(geometries_wkt)), "geometry": geometries_geoarrow} 57 | ) 58 | metadata["columns"]["geometry"]["encoding"] = geometry_type.lower() 59 | table = table.replace_schema_metadata({"geo": json.dumps(metadata)}) 60 | pq.write_table(table, HERE / f"data-{geometry_type.lower()}-encoding_native.parquet") 61 | 62 | 63 | # point 64 | 65 | geometries_wkt = [ 66 | "POINT (30 10)", 67 | "POINT EMPTY", 68 | None, 69 | "POINT (40 40)", 70 | ] 71 | 72 | point_type = pa.struct( 73 | [ 74 | pa.field("x", pa.float64(), nullable=False), 75 | pa.field("y", pa.float64(), nullable=False) 76 | ] 77 | ) 78 | geometries = pa.array( 79 | [(30, 10), (float("nan"), float("nan")), (float("nan"), float("nan")), (40, 40)], 80 | mask=np.array([False, False, True, False]), 81 | type=point_type 82 | ) 83 | 84 | write_encoding_files( 85 | geometries_wkt, geometries, geometry_type="Point" 86 | ) 87 | 88 | # linestring 89 | 90 | geometries_wkt = [ 91 | "LINESTRING (30 10, 10 30, 40 40)", 92 | "LINESTRING EMPTY", 93 | None 94 | ] 95 | 96 | linestring_type = pa.list_(pa.field("vertices", point_type, nullable=False)) 97 | geometries = pa.array( 98 | [[(30, 10), (10, 30), (40, 40)], [], []], 99 | mask=np.array([False, False, True]), 100 | type=linestring_type 101 | ) 102 | 103 | write_encoding_files( 104 | geometries_wkt, geometries, geometry_type="LineString" 105 | ) 106 | 107 | # polygon 108 | 109 | geometries_wkt = [ 110 | "POLYGON ((30 10, 40 40, 20 40, 10 20, 30 10))", 111 | "POLYGON ((35 10, 45 45, 15 40, 10 20, 35 10), (20 30, 35 35, 30 20, 20 30))", 112 | "POLYGON EMPTY", 113 | None, 114 | ] 115 | 116 | polygon_type = pa.list_( 117 | pa.field("rings", pa.list_( 118 | pa.field("vertices", point_type, nullable=False) 119 | ), nullable=False) 120 | ) 121 | geometries = pa.array( 122 | [ 123 | [[(30, 10), (40, 40), (20, 40), (10, 20), (30, 10)]], 124 | [[(35, 10), (45, 45), (15, 40), (10, 20), (35, 10)], 125 | [(20, 30), (35, 35), (30, 20), (20, 30)]], 126 | [], 127 | [], 128 | ], 129 | mask=np.array([False, False, False, True]), 130 | type=polygon_type 131 | ) 132 | 133 | write_encoding_files( 134 | geometries_wkt, geometries, geometry_type="Polygon" 135 | ) 136 | 137 | # multipoint 138 | 139 | geometries_wkt = [ 140 | "MULTIPOINT ((30 10))", 141 | "MULTIPOINT ((10 40), (40 30), (20 20), (30 10))", 142 | "MULTIPOINT EMPTY", 143 | None, 144 | ] 145 | 146 | multipoint_type = pa.list_(pa.field("points", point_type, nullable=False)) 147 | geometries = pa.array( 148 | [ 149 | [(30, 10)], 150 | [(10, 40), (40, 30), (20, 20), (30, 10)], 151 | [], 152 | [], 153 | ], 154 | mask=np.array([False, False, False, True]), 155 | type=multipoint_type 156 | ) 157 | 158 | write_encoding_files( 159 | geometries_wkt, geometries, geometry_type="MultiPoint" 160 | ) 161 | 162 | # multilinestring 163 | 164 | geometries_wkt = [ 165 | "MULTILINESTRING ((30 10, 10 30, 40 40))", 166 | "MULTILINESTRING ((10 10, 20 20, 10 40), (40 40, 30 30, 40 20, 30 10))", 167 | "MULTILINESTRING EMPTY", 168 | None, 169 | ] 170 | 171 | multilinestring_type = pa.list_( 172 | pa.field("linestrings", linestring_type, nullable=False) 173 | ) 174 | geometries = pa.array( 175 | [ 176 | [[(30, 10), (10, 30), (40, 40)]], 177 | [[(10, 10), (20, 20), (10, 40)], 178 | [(40, 40), (30, 30), (40, 20), (30, 10)]], 179 | [], 180 | [], 181 | ], 182 | mask=np.array([False, False, False, True]), 183 | type=multilinestring_type 184 | ) 185 | 186 | write_encoding_files( 187 | geometries_wkt, geometries, geometry_type="MultiLineString" 188 | ) 189 | 190 | # multipolygon 191 | 192 | geometries_wkt = [ 193 | "MULTIPOLYGON (((30 10, 40 40, 20 40, 10 20, 30 10)))", 194 | "MULTIPOLYGON (((30 20, 45 40, 10 40, 30 20)), ((15 5, 40 10, 10 20, 5 10, 15 5)))", 195 | "MULTIPOLYGON (((40 40, 20 45, 45 30, 40 40)), ((20 35, 10 30, 10 10, 30 5, 45 20, 20 35), (30 20, 20 15, 20 25, 30 20)))", 196 | "MULTIPOLYGON EMPTY", 197 | None, 198 | ] 199 | 200 | multipolygon_type = pa.list_(pa.field("polygons", polygon_type, nullable=False)) 201 | geometries = pa.array( 202 | [ 203 | [[[(30, 10), (40, 40), (20, 40), (10, 20), (30, 10)]]], 204 | [[[(30, 20), (45, 40), (10, 40), (30, 20)]], 205 | [[(15, 5), (40, 10), (10, 20), (5, 10), (15, 5)]]], 206 | [[[(40, 40), (20, 45), (45, 30), (40, 40)]], 207 | [[(20, 35), (10, 30), (10, 10), (30, 5), (45, 20), (20, 35)], 208 | [(30, 20), (20, 15), (20, 25), (30, 20)]]], 209 | [], 210 | [], 211 | ], 212 | mask=np.array([False, False, False, False, True]), 213 | type=multipolygon_type 214 | ) 215 | 216 | write_encoding_files( 217 | geometries_wkt, geometries, geometry_type="MultiPolygon" 218 | ) 219 | --------------------------------------------------------------------------------