├── .clang-format ├── .editorconfig ├── .github └── workflows │ ├── MainDistributionPipeline.yml │ └── dev.yaml ├── .gitignore ├── .gitmodules ├── .pre-commit-config.yaml ├── CMakeLists.txt ├── CMakeUserPresets.json ├── LICENSE ├── Makefile ├── README.md ├── data ├── README.md ├── cities.tsv └── countries.tsv ├── docs ├── README.md ├── UPDATING.md ├── duckdoc.py ├── function-reference.md ├── function-reference.md.jinja └── requirements.txt ├── extension_config.cmake ├── function-reference.md ├── scripts ├── extension-upload.sh └── setup-custom-toolchain.sh ├── src ├── function_builder.cpp ├── geography_extension.cpp ├── include │ ├── function_builder.hpp │ ├── geography_extension.hpp │ ├── global_options.hpp │ ├── s2_cell_ops.hpp │ ├── s2_data.hpp │ ├── s2_data_static.hpp │ ├── s2_dependencies.hpp │ ├── s2_functions_io.hpp │ ├── s2_geography_ops.hpp │ ├── s2_geography_serde.hpp │ └── s2_types.hpp ├── s2_accessors.cpp ├── s2_binary_index_ops.cpp ├── s2_bounds.cpp ├── s2_cell_ops.cpp ├── s2_data.cpp ├── s2_dependencies.cpp ├── s2_functions_io.cpp ├── s2_geoarrow.cpp └── s2_types.cpp ├── test ├── README.md ├── python │ ├── conftest.py │ ├── requirements.txt │ └── test_geoarrow.py └── sql │ ├── accessors.test │ ├── binary_index_ops.test │ ├── bounds.test │ ├── cell_ops.test │ ├── data.test │ ├── extension.test │ ├── functions_io.test │ └── geoarrow.test ├── test_local.sh ├── vcpkg.json └── vcpkg_ports └── abseil ├── portfile.cmake └── vcpkg.json /.clang-format: -------------------------------------------------------------------------------- 1 | --- 2 | BasedOnStyle: Google 3 | ColumnLimit: 90 4 | DerivePointerAlignment: false 5 | IncludeBlocks: Preserve 6 | -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | duckdb/.editorconfig -------------------------------------------------------------------------------- /.github/workflows/MainDistributionPipeline.yml: -------------------------------------------------------------------------------- 1 | # 2 | # This workflow calls the main distribution pipeline from DuckDB to build, test and (optionally) release the extension 3 | # 4 | name: Main Extension Distribution Pipeline 5 | on: 6 | push: 7 | pull_request: 8 | workflow_dispatch: 9 | 10 | concurrency: 11 | group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || '' }}-${{ github.base_ref || '' }}-${{ github.ref != 'refs/heads/main' || github.sha }} 12 | cancel-in-progress: true 13 | 14 | jobs: 15 | duckdb-next-build: 16 | name: Build extension binaries 17 | uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@main 18 | with: 19 | duckdb_version: main 20 | ci_tools_version: main 21 | extension_name: geography 22 | 23 | duckdb-stable-build: 24 | name: Build extension binaries 25 | uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@v1.2.0 26 | with: 27 | duckdb_version: v1.2.0 28 | ci_tools_version: v1.2.0 29 | extension_name: geography 30 | 31 | # Not currently working, and also we don't currently deploy anywhere 32 | # duckdb-stable-deploy: 33 | # name: Deploy extension binaries 34 | # needs: duckdb-stable-build 35 | # uses: ./.github/workflows/_extension_deploy.yml 36 | # secrets: inherit 37 | # with: 38 | # duckdb_version: v1.1.3 39 | # extension_name: geography 40 | # deploy_latest: ${{ startsWith(github.ref, 'refs/tags/v') || github.ref == 'refs/heads/main' }} 41 | -------------------------------------------------------------------------------- /.github/workflows/dev.yaml: -------------------------------------------------------------------------------- 1 | name: dev 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | pull_request: 8 | branches: 9 | - main 10 | 11 | permissions: 12 | contents: read 13 | 14 | jobs: 15 | pre-commit: 16 | runs-on: ubuntu-latest 17 | steps: 18 | - uses: actions/checkout@v4 19 | with: 20 | fetch-depth: 0 21 | persist-credentials: false 22 | - uses: actions/setup-python@v5 23 | with: 24 | python-version: '3.x' 25 | - name: pre-commit (cache) 26 | uses: actions/cache@v4 27 | with: 28 | path: ~/.cache/pre-commit 29 | key: pre-commit-${{ hashFiles('.pre-commit-config.yaml') }} 30 | - name: pre-commit (--all-files) 31 | run: | 32 | python -m pip install pre-commit 33 | pre-commit run --show-diff-on-failure --color=always --all-files 34 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | build 2 | .idea 3 | cmake-build-debug 4 | duckdb_unittest_tempdir/ 5 | .DS_Store 6 | testext 7 | test/python/__pycache__/ 8 | .Rhistory 9 | .cache 10 | .vscode/ 11 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "duckdb"] 2 | path = duckdb 3 | url = https://github.com/duckdb/duckdb 4 | branch = main 5 | [submodule "extension-ci-tools"] 6 | path = extension-ci-tools 7 | url = https://github.com/duckdb/extension-ci-tools 8 | branch = main 9 | [submodule "s2geography"] 10 | path = s2geography 11 | url = https://github.com/paleolimbot/s2geography.git 12 | branch = main 13 | [submodule "s2geometry"] 14 | path = s2geometry 15 | url = https://github.com/google/s2geometry.git 16 | branch = main 17 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/pre-commit/pre-commit-hooks 3 | rev: v2.3.0 4 | hooks: 5 | - id: check-yaml 6 | - id: end-of-file-fixer 7 | - id: trailing-whitespace 8 | - repo: https://github.com/pre-commit/mirrors-clang-format 9 | rev: v19.1.4 10 | hooks: 11 | - id: clang-format 12 | types_or: [c, c++] 13 | - repo: https://github.com/cheshirekow/cmake-format-precommit 14 | rev: v0.6.13 15 | hooks: 16 | - id: cmake-format 17 | args: [--in-place] 18 | - repo: https://github.com/codespell-project/codespell 19 | rev: v2.2.5 20 | hooks: 21 | - id: codespell 22 | types_or: [rst, markdown, c, c++] 23 | additional_dependencies: [tomli] 24 | 25 | exclude: "^src/vendor" 26 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.5) 2 | 3 | include(FetchContent) 4 | 5 | # Set extension name here 6 | set(TARGET_NAME geography) 7 | 8 | # Required for S2 9 | set(CMAKE_CXX_STANDARD 17) 10 | 11 | # DuckDB's extension distribution supports vcpkg. As such, dependencies can be 12 | # added in ./vcpkg.json and then used in cmake with find_package. Feel free to 13 | # remove or replace with other dependencies. Note that it should also be removed 14 | # from vcpkg.json to prevent needlessly installing it.. 15 | find_package(OpenSSL REQUIRED) 16 | find_package(absl REQUIRED) 17 | 18 | set(NANOARROW_NAMESPACE DuckDBGeography) 19 | FetchContent_Declare( 20 | nanoarrow 21 | URL https://github.com/apache/arrow-nanoarrow/archive/refs/tags/apache-arrow-nanoarrow-0.6.0.zip 22 | URL_HASH 23 | SHA256=73b3235453902c9e92b5e942683d02d764a2aea1f999c03bf4a747c90df2f505) 24 | 25 | set(GEOARROW_NAMESPACE DuckDBGeography) 26 | FetchContent_Declare( 27 | geoarrow 28 | URL https://github.com/geoarrow/geoarrow-c/archive/4a755ea6a09b8e842a9ccaef6ef05e6e3870f973.zip 29 | URL_HASH 30 | SHA256=7d9caab1b55cc116b52b4eae34f392bc323a6f2e8afdd9c2a586957b99020f90) 31 | 32 | FetchContent_MakeAvailable(nanoarrow) 33 | FetchContent_MakeAvailable(geoarrow) 34 | 35 | # S2's CMake is pretty awful so we just build the library ourselves 36 | add_library( 37 | s2 38 | s2geometry/src/s2/encoded_s2cell_id_vector.cc 39 | s2geometry/src/s2/encoded_s2point_vector.cc 40 | s2geometry/src/s2/encoded_s2shape_index.cc 41 | s2geometry/src/s2/encoded_string_vector.cc 42 | s2geometry/src/s2/id_set_lexicon.cc 43 | s2geometry/src/s2/mutable_s2shape_index.cc 44 | s2geometry/src/s2/r2rect.cc 45 | s2geometry/src/s2/s1angle.cc 46 | s2geometry/src/s2/s1chord_angle.cc 47 | s2geometry/src/s2/s1interval.cc 48 | s2geometry/src/s2/s2boolean_operation.cc 49 | s2geometry/src/s2/s2buffer_operation.cc 50 | s2geometry/src/s2/s2builder.cc 51 | s2geometry/src/s2/s2builder_graph.cc 52 | s2geometry/src/s2/s2builderutil_closed_set_normalizer.cc 53 | s2geometry/src/s2/s2builderutil_find_polygon_degeneracies.cc 54 | s2geometry/src/s2/s2builderutil_get_snapped_winding_delta.cc 55 | s2geometry/src/s2/s2builderutil_lax_polygon_layer.cc 56 | s2geometry/src/s2/s2builderutil_lax_polyline_layer.cc 57 | s2geometry/src/s2/s2builderutil_s2point_vector_layer.cc 58 | s2geometry/src/s2/s2builderutil_s2polygon_layer.cc 59 | s2geometry/src/s2/s2builderutil_s2polyline_layer.cc 60 | s2geometry/src/s2/s2builderutil_s2polyline_vector_layer.cc 61 | s2geometry/src/s2/s2builderutil_snap_functions.cc 62 | s2geometry/src/s2/s2cap.cc 63 | s2geometry/src/s2/s2cell.cc 64 | s2geometry/src/s2/s2cell_id.cc 65 | s2geometry/src/s2/s2cell_index.cc 66 | s2geometry/src/s2/s2cell_union.cc 67 | s2geometry/src/s2/s2centroids.cc 68 | s2geometry/src/s2/s2closest_cell_query.cc 69 | s2geometry/src/s2/s2closest_edge_query.cc 70 | s2geometry/src/s2/s2closest_point_query.cc 71 | s2geometry/src/s2/s2contains_vertex_query.cc 72 | s2geometry/src/s2/s2convex_hull_query.cc 73 | s2geometry/src/s2/s2coords.cc 74 | s2geometry/src/s2/s2crossing_edge_query.cc 75 | s2geometry/src/s2/s2debug.cc 76 | s2geometry/src/s2/s2earth.cc 77 | s2geometry/src/s2/s2edge_clipping.cc 78 | s2geometry/src/s2/s2edge_crosser.cc 79 | s2geometry/src/s2/s2edge_crossings.cc 80 | s2geometry/src/s2/s2edge_distances.cc 81 | s2geometry/src/s2/s2edge_tessellator.cc 82 | s2geometry/src/s2/s2error.cc 83 | s2geometry/src/s2/s2furthest_edge_query.cc 84 | s2geometry/src/s2/s2hausdorff_distance_query.cc 85 | s2geometry/src/s2/s2latlng.cc 86 | s2geometry/src/s2/s2latlng_rect.cc 87 | s2geometry/src/s2/s2latlng_rect_bounder.cc 88 | s2geometry/src/s2/s2lax_loop_shape.cc 89 | s2geometry/src/s2/s2lax_polygon_shape.cc 90 | s2geometry/src/s2/s2lax_polyline_shape.cc 91 | s2geometry/src/s2/s2loop.cc 92 | s2geometry/src/s2/s2loop_measures.cc 93 | s2geometry/src/s2/s2measures.cc 94 | s2geometry/src/s2/s2memory_tracker.cc 95 | s2geometry/src/s2/s2metrics.cc 96 | s2geometry/src/s2/s2max_distance_targets.cc 97 | s2geometry/src/s2/s2min_distance_targets.cc 98 | s2geometry/src/s2/s2padded_cell.cc 99 | s2geometry/src/s2/s2point_compression.cc 100 | s2geometry/src/s2/s2point_region.cc 101 | s2geometry/src/s2/s2pointutil.cc 102 | s2geometry/src/s2/s2polygon.cc 103 | s2geometry/src/s2/s2polyline.cc 104 | s2geometry/src/s2/s2polyline_alignment.cc 105 | s2geometry/src/s2/s2polyline_measures.cc 106 | s2geometry/src/s2/s2polyline_simplifier.cc 107 | s2geometry/src/s2/s2predicates.cc 108 | s2geometry/src/s2/s2projections.cc 109 | s2geometry/src/s2/s2r2rect.cc 110 | s2geometry/src/s2/s2region.cc 111 | s2geometry/src/s2/s2region_term_indexer.cc 112 | s2geometry/src/s2/s2region_coverer.cc 113 | s2geometry/src/s2/s2region_intersection.cc 114 | s2geometry/src/s2/s2region_union.cc 115 | s2geometry/src/s2/s2shape_index.cc 116 | s2geometry/src/s2/s2shape_index_buffered_region.cc 117 | s2geometry/src/s2/s2shape_index_measures.cc 118 | s2geometry/src/s2/s2shape_measures.cc 119 | s2geometry/src/s2/s2shape_nesting_query.cc 120 | s2geometry/src/s2/s2shapeutil_build_polygon_boundaries.cc 121 | s2geometry/src/s2/s2shapeutil_coding.cc 122 | s2geometry/src/s2/s2shapeutil_contains_brute_force.cc 123 | s2geometry/src/s2/s2shapeutil_conversion.cc 124 | s2geometry/src/s2/s2shapeutil_edge_iterator.cc 125 | s2geometry/src/s2/s2shapeutil_get_reference_point.cc 126 | s2geometry/src/s2/s2shapeutil_visit_crossing_edge_pairs.cc 127 | s2geometry/src/s2/s2text_format.cc 128 | s2geometry/src/s2/s2wedge_relations.cc 129 | s2geometry/src/s2/s2winding_operation.cc 130 | s2geometry/src/s2/util/bits/bit-interleave.cc 131 | s2geometry/src/s2/util/coding/coder.cc 132 | s2geometry/src/s2/util/coding/varint.cc 133 | s2geometry/src/s2/util/math/exactfloat/exactfloat.cc 134 | s2geometry/src/s2/util/math/mathutil.cc 135 | s2geometry/src/s2/util/units/length-units.cc) 136 | 137 | if(WIN32 AND NOT MSVC) 138 | target_compile_definitions(s2 PUBLIC _USE_MATH_DEFINES) 139 | set(S2_EXTRA_OPENSSL_LIBS crypt32 z ws2_32 gdi32 crypt32) 140 | elseif(MSVC) 141 | target_compile_definitions(s2 PUBLIC NOMINMAX _USE_MATH_DEFINES) 142 | target_compile_options(s2 PUBLIC /J) 143 | else() 144 | # target_compile_definitions(s2 PRIVATE -Wno-attributes 145 | # -Wno-deprecated-declarations "-Wno-comment" -Wno-pedantic ) 146 | endif() 147 | 148 | # HACK ALERT *** HACK ALERT *** HACK ALERT s2geometry doesn't bother to 149 | # namespace any of their symbols, and neither does libfsst. Both of them define 150 | # an "Encoder" :facepalm:. This seems to work. 151 | target_compile_definitions(s2 PUBLIC Encoder=S2Encoder) 152 | 153 | target_include_directories( 154 | s2 PUBLIC $) 155 | target_link_libraries( 156 | s2 157 | ${OPENSSL_LIBRARIES} 158 | absl::base 159 | absl::btree 160 | absl::check 161 | absl::config 162 | absl::core_headers 163 | absl::dynamic_annotations 164 | absl::endian 165 | absl::fixed_array 166 | absl::flags 167 | absl::flat_hash_map 168 | absl::flat_hash_set 169 | absl::hash 170 | absl::inlined_vector 171 | absl::int128 172 | absl::log 173 | absl::log_severity 174 | absl::memory 175 | absl::span 176 | absl::status 177 | absl::str_format 178 | absl::strings 179 | absl::type_traits 180 | absl::utility 181 | ${CMAKE_THREAD_LIBS_INIT}) 182 | 183 | # Build s2geography 184 | add_library(s2::s2 ALIAS s2) 185 | set(S2_VERSION_MAJOR 0) 186 | set(S2_VERSION_MINOR 11) 187 | set(S2_VERSION_PATCH 1) 188 | set(BUILD_SHARED_LIBS OFF) 189 | add_subdirectory(s2geography) 190 | target_compile_definitions( 191 | s2geography PRIVATE -DGEOARROW_FAST_FLOAT_NAMESPACE=duckdb_fast_float) 192 | 193 | set(EXTENSION_NAME ${TARGET_NAME}_extension) 194 | set(LOADABLE_EXTENSION_NAME ${TARGET_NAME}_loadable_extension) 195 | 196 | project(${TARGET_NAME}) 197 | include_directories(src/include) 198 | 199 | set(EXTENSION_SOURCES 200 | src/geography_extension.cpp 201 | src/function_builder.cpp 202 | src/s2_dependencies.cpp 203 | src/s2_types.cpp 204 | src/s2_cell_ops.cpp 205 | src/s2_functions_io.cpp 206 | src/s2_binary_index_ops.cpp 207 | src/s2_data.cpp 208 | src/s2_accessors.cpp 209 | src/s2_bounds.cpp 210 | src/s2_geoarrow.cpp) 211 | 212 | # Workaround for difference between v1.1.3 and main with respect to 213 | # FunctionEntry fields 214 | if(DUCKDB_VERSION STREQUAL "v1.1.3") 215 | add_definitions(-DDUCKDB_FUNC_ENTRY_HAS_METADATA=1) 216 | endif() 217 | 218 | build_static_extension(${TARGET_NAME} ${EXTENSION_SOURCES}) 219 | build_loadable_extension(${TARGET_NAME} " " ${EXTENSION_SOURCES}) 220 | 221 | # Link OpenSSL in both the static library as the loadable extension 222 | target_link_libraries( 223 | ${EXTENSION_NAME} 224 | s2geography 225 | s2 226 | OpenSSL::SSL 227 | OpenSSL::Crypto 228 | ${S2_EXTRA_OPENSSL_LIBS} 229 | geoarrow::geoarrow 230 | nanoarrow::nanoarrow) 231 | target_link_libraries( 232 | ${LOADABLE_EXTENSION_NAME} 233 | s2geography 234 | s2 235 | OpenSSL::SSL 236 | OpenSSL::Crypto 237 | ${S2_EXTRA_OPENSSL_LIBS} 238 | geoarrow::geoarrow 239 | nanoarrow::nanoarrow) 240 | 241 | install( 242 | TARGETS ${EXTENSION_NAME} s2geography s2 243 | EXPORT "${DUCKDB_EXPORT_SET}" 244 | LIBRARY DESTINATION "${INSTALL_LIB_DIR}" 245 | ARCHIVE DESTINATION "${INSTALL_LIB_DIR}") 246 | -------------------------------------------------------------------------------- /CMakeUserPresets.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": 3, 3 | "cmakeMinimumRequired": { 4 | "major": 3, 5 | "minor": 21, 6 | "patch": 0 7 | }, 8 | "configurePresets": [ 9 | { 10 | "name": "extension", 11 | "displayName": "Extension", 12 | "generator": "Ninja", 13 | "binaryDir": "${sourceDir}/../build", 14 | "cacheVariables": { 15 | "CMAKE_BUILD_TYPE": "Release", 16 | "EXTENSION_STATIC_BUILD": "1", 17 | "DUCKDB_EXTENSION_CONFIGS": "${sourceDir}/../extension_config.cmake" 18 | } 19 | }, 20 | { 21 | "name": "extension_vcpkg", 22 | "displayName": "Extension (using vcpkg)", 23 | "inherits": ["extension_debug"], 24 | "cacheVariables": { 25 | "CMAKE_TOOLCHAIN_FILE": "/$env{VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake", 26 | "VCPKG_MANIFEST_DIR": "${sourceDir}/..", 27 | "VCPKG_BUILD": "1" 28 | } 29 | }, 30 | { 31 | "name": "extension_debug", 32 | "displayName": "Extension Debug", 33 | "generator": "Ninja", 34 | "binaryDir": "${sourceDir}/../build", 35 | "cacheVariables": { 36 | "CMAKE_BUILD_TYPE": "Debug", 37 | "ENABLE_SANITIZER": "OFF", 38 | "ENABLE_UBSAN": "OFF" 39 | } 40 | }, 41 | { 42 | "name": "extension_debug_vcpkg", 43 | "displayName": "Extension Debug (using vcpkg)", 44 | "inherits": ["extension_debug"], 45 | "cacheVariables": { 46 | "CMAKE_TOOLCHAIN_FILE": "/$env{VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake", 47 | "VCPKG_MANIFEST_DIR": "${sourceDir}/..", 48 | "VCPKG_BUILD": "1" 49 | } 50 | } 51 | ] 52 | } 53 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2018-2024 Stichting DuckDB Foundation 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 4 | 5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 6 | 7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 8 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | PROJ_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST)))) 2 | 3 | # Configuration of extension 4 | EXT_NAME=geography 5 | EXT_CONFIG=${PROJ_DIR}extension_config.cmake 6 | 7 | # Include the Makefile from extension-ci-tools 8 | include extension-ci-tools/makefiles/duckdb_extension.Makefile 9 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # DuckDB Geography 2 | 3 | This repository is based on https://github.com/duckdb/extension-template, check it out if you want to build and ship your own DuckDB extension. 4 | 5 | --- 6 | 7 | This extension, geography, allows you leverage [Google's s2geometry library](https://github.com/google/s2geometry) via the [s2geography wrapper library](https://github.com/paleolimbot/s2geography) that also powers S2 integration as an [R package](https://r-spatial.github.io/s2) and a [Python library](https://github.com/benbovy/spherely). It is preliminary and not currently published as a community extension. 8 | 9 | In general, the functions are the same as those implemented in the [spatial extension](https://duckdb.org/docs/extensions/spatial/functions.html) except they are prefixed with `s2_` instead of `st_`. See [the function reference](docs/function-reference.md) for a complete list with documentation. 10 | 11 | ``` 12 | LOAD geography; 13 | 14 | D CREATE TABLE countries as SELECT name, s2_prepare(geog) as geog FROM s2_data_countries(); 15 | D SELECT 16 | countries.name as country, cities.name as city, cities.geog as geog 17 | FROM countries 18 | INNER JOIN s2_data_cities() AS cities 19 | ON s2_intersects(countries.geog, cities.geog); 20 | 21 | ┌──────────────────────────┬──────────────────┬────────────────────────────────────────────────┐ 22 | │ country │ city │ geog │ 23 | │ varchar │ varchar │ geography │ 24 | ├──────────────────────────┼──────────────────┼────────────────────────────────────────────────┤ 25 | │ Afghanistan │ Kabul │ POINT (69.18131420000002 34.5186361) │ 26 | │ Angola │ Luanda │ POINT (13.2324812 -8.836340260000002) │ 27 | │ Albania │ Tirana │ POINT (19.818883 41.3275407) │ 28 | │ United Arab Emirates │ Abu Dhabi │ POINT (54.3665934 24.466683599999996) │ 29 | │ United Arab Emirates │ Dubai │ POINT (55.2780285 25.231942000000004) │ 30 | │ Argentina │ Buenos Aires │ POINT (-58.39947719999999 -34.6005557) │ 31 | │ Armenia │ Yerevan │ POINT (44.5116055 40.1830966) │ 32 | │ Australia │ Canberra │ POINT (149.129026 -35.2830285) │ 33 | │ Australia │ Melbourne │ POINT (144.97307 -37.8180855) │ 34 | │ Australia │ Sydney │ POINT (151.183234 -33.9180651) │ 35 | │ Austria │ Vaduz │ POINT (9.51666947 47.1337238) │ 36 | │ Austria │ Vienna │ POINT (16.364693100000004 48.2019611) │ 37 | │ Azerbaijan │ Baku │ POINT (49.8602713 40.3972179) │ 38 | │ Burundi │ Bujumbura │ POINT (29.360006100000003 -3.3760872200000005) │ 39 | │ Belgium │ Brussels │ POINT (4.33137075 50.8352629) │ 40 | │ Benin │ Porto-Novo │ POINT (2.6166255300000003 6.483310970000001) │ 41 | │ Benin │ Cotonou │ POINT (2.51804474 6.40195442) │ 42 | │ Burkina Faso │ Ouagadougou │ POINT (-1.52666961 12.3722618) │ 43 | │ Bangladesh │ Dhaka │ POINT (90.4066336 23.7250056) │ 44 | │ Bulgaria │ Sofia │ POINT (23.314708199999995 42.6852953) │ 45 | │ · │ · │ · │ 46 | │ · │ · │ · │ 47 | │ · │ · │ · │ 48 | │ Ukraine │ Kiev │ POINT (30.514682099999998 50.4353132) │ 49 | │ United States of America │ San Francisco │ POINT (-122.417169 37.7691956) │ 50 | │ United States of America │ Denver │ POINT (-104.98596200000001 39.7411339) │ 51 | │ United States of America │ Houston │ POINT (-95.3419251 29.821920199999994) │ 52 | │ United States of America │ Miami │ POINT (-80.2260519 25.7895566) │ 53 | │ United States of America │ Atlanta │ POINT (-84.4018952 33.8319597) │ 54 | │ United States of America │ Chicago │ POINT (-87.7520008 41.8319365) │ 55 | │ United States of America │ Los Angeles │ POINT (-118.181926 33.991924100000006) │ 56 | │ United States of America │ Washington, D.C. │ POINT (-77.0113644 38.9014952) │ 57 | │ United States of America │ New York │ POINT (-73.9819628 40.75192489999999) │ 58 | │ Uzbekistan │ Tashkent │ POINT (69.292987 41.3136477) │ 59 | │ Venezuela │ Caracas │ POINT (-66.9189831 10.502944399999999) │ 60 | │ Vietnam │ Hanoi │ POINT (105.848068 21.035273099999998) │ 61 | │ Yemen │ Sanaa │ POINT (44.20464750000001 15.356679200000002) │ 62 | │ South Africa │ Bloemfontein │ POINT (26.2299129 -29.119993899999994) │ 63 | │ South Africa │ Pretoria │ POINT (28.2274832 -25.7049747) │ 64 | │ South Africa │ Johannesburg │ POINT (28.028063900000003 -26.168098900000004) │ 65 | │ South Africa │ Cape Town │ POINT (18.433042299999997 -33.9180651) │ 66 | │ Zambia │ Lusaka │ POINT (28.281381699999997 -15.4146984) │ 67 | │ Zimbabwe │ Harare │ POINT (31.0427636 -17.8158438) │ 68 | ├──────────────────────────┴──────────────────┴────────────────────────────────────────────────┤ 69 | │ 210 rows (40 shown) 3 columns │ 70 | └──────────────────────────────────────────────────────────────────────────────────────────────┘ 71 | ``` 72 | 73 | ## Installation 74 | 75 | The s2 extension is not currently a community extension (although could be in the future!). To use it, you'll have to grab a binary from the CI job on the main branch and load it after allowing 76 | unsigned extensions in your DuckDB session. 77 | 78 | ```python 79 | import duckdb 80 | 81 | con = duckdb.connect(config={"allow_unsigned_extensions": True}) 82 | con.sql("INSTALL '/path/to/geography.duckdb_extension'") 83 | con.sql("LOAD geography") 84 | con.sql("SELECT 'POINT (-64 45)'::GEOGRAPHY") 85 | #> ┌─────────────────────────────────────┐ 86 | #> │ CAST('POINT (-64 45)' AS GEOGRAPHY) │ 87 | #> │ geography │ 88 | #> ├─────────────────────────────────────┤ 89 | #> │ POINT (-64 44.99999999999999) │ 90 | #> └─────────────────────────────────────┘ 91 | ``` 92 | 93 | ## Types 94 | 95 | The geography extension defines the following types: 96 | 97 | - `GEOGRAPHY`: A (multi)point, (multi)linestring, (multi)polygon, or an arbitrary 98 | collection of those where coordinates are represented as geodedic longitude, latitude on 99 | the WGS84 ellipsoid and edges are represented as geodesics approximated on the 100 | sphere. This is exactly the same as the definition of coordinates and edges in 101 | [BigQuery Geography](https://cloud.google.com/bigquery/docs/geospatial-data#coordinate_systems_and_edges). 102 | 103 | The underlying representation of the `GEOGRAPHY` type is a `BLOB`. The exact 104 | packing of bytes in this blob is not currently guaranteed but is intended to 105 | be documented when stable such that other libraries can decode the value 106 | independently. 107 | 108 | - `S2_CELL`: A cell in [S2's cell indexing system](http://s2geometry.io/devguide/s2cell_hierarchy). 109 | Briefly, this is a way to encode every ~2cm square on earth with an unsigned 64-bit 110 | integer. The indexing system is heiarchical with 111 | [31 levels](http://s2geometry.io/resources/s2cell_statistics). 112 | 113 | - `S2_CELL_CENTER`: The center of an `S2_CELL`. This shares a physical representation 114 | of the `S2_CELL` but has a different logical meaning (a point rather than a polygon). 115 | This is a compact mechanism to encode a point (8 bytes) and can be more efficiently 116 | compared for intersection and containment against an `S2_CELL` or `S2_CELL_UNION`. 117 | For maximum efficiency, always store points as cell centers (they can be loaded 118 | directly from WKB using `s2_cellfromwkb()` created from longitude and latitude 119 | with `s2_cellfromlonlat()`, or casted from an existing `GEOGRAPHY`). 120 | 121 | - `S2_CELL_UNION`: A normalized list of `S2_CELL`s. This can be used to 122 | approximate a polygon and is used internally as a rapid mechanism for 123 | approximating the bounds of a `GEOGRAPHY` in a way that is more efficient 124 | to compare for possible intersection. This covering can be generated 125 | with `s2_covering()`. 126 | 127 | ## Functions 128 | 129 | Currently implemented functions are listed in the 130 | [function reference](docs/function-reference.md). Documentation is a work in progress! 131 | Note that all types listed above are implicitly castable to `GEOGRAPHY` such that 132 | you can use them with any function that accepts a `GEOGRAPHY`. In general, functions 133 | are intended to have the same behaviour as the equivalent `ST_xx()` function 134 | (if it exists). 135 | 136 | If you need a function that is missing, open an issue (most functions have already 137 | been ported to the underlying C++ library and just aren't wired up to DuckDB yet). 138 | 139 | ## Building 140 | 141 | To build the extension, clone the repository with submodules: 142 | 143 | ``` shell 144 | git clone --recurse-submodules https://github.com/paleolimbot/duckdb-nanoarrow.git 145 | ``` 146 | 147 | ...or if you forget to clone the submodules/you're using VSCode to do your checkout, you can run: 148 | 149 | ``` shell 150 | git submodule init 151 | git submodule update --checkout 152 | ``` 153 | 154 | A quick-and-dirty way to get your build up and running is to run `make`: 155 | 156 | ```sh 157 | make 158 | 159 | ``` 160 | The main binaries that will be built are: 161 | 162 | ```sh 163 | ./build/release/duckdb 164 | ./build/release/test/unittest 165 | ./build/release/extension/nanoarrow/nanoarrow.duckdb_extension 166 | ``` 167 | 168 | - `duckdb` is the binary for the duckdb shell with the extension code automatically loaded. 169 | - `unittest` is the test runner of duckdb. Again, the extension is already linked into the binary. 170 | - `nanoarrow.duckdb_extension` is the loadable binary as it would be distributed. 171 | 172 | If you'd like to use VSCode with the integration provided by the CMake/clangd extension, you 173 | can run: 174 | 175 | ``` shell 176 | cp CMakeUserPresets.json duckdb/ 177 | ``` 178 | 179 | ...and ensure that `.vscode/settings.json` contains: 180 | 181 | ``` json 182 | { 183 | "cmake.sourceDirectory": "${workspaceFolder}/duckdb" 184 | } 185 | ``` 186 | 187 | Then choose *Developer: Reload window* from the command palette and choose the 188 | *Extension (Debug build)* preset. 189 | 190 | See the [README in the docs directory](docs/README.md) for instructions to build 191 | the documentation. 192 | 193 | ## Running the extension 194 | 195 | To run the extension code, simply start the shell with `./build/release/duckdb` 196 | (if you're using `make` to build) or `./build/duckdb` (if you're using CMake 197 | via VSCode). 198 | 199 | Now we can use the features from the extension directly in DuckDB. 200 | 201 | ## Running the tests 202 | 203 | Different tests can be created for DuckDB extensions. Tests are written in 204 | SQL `./test/sql`. These SQL tests can be run using `make test` (if using 205 | make) or `./test_local.sh` (if using CMake via VSCode). 206 | 207 | ## Debugging 208 | 209 | You can debug an interactive SQL session by launching it with `gdb` or `lldb`: 210 | 211 | ``` shell 212 | lldb build/duckdb 213 | ``` 214 | 215 | ...or you can use the CodeLLDB extension (Command Palette: *LLDB: Attach to process*) 216 | to launch a VSCode interactive debugger launched in a terminal. 217 | -------------------------------------------------------------------------------- /data/README.md: -------------------------------------------------------------------------------- 1 | 2 | ## Test Data 3 | 4 | Generated with: 5 | 6 | ```r 7 | library(s2) 8 | 9 | cities <- s2_data_tbl_cities 10 | cities$geometry <- s2_as_text(cities$geometry, precision = 9) 11 | cities |> 12 | readr::write_tsv("~/Desktop/rscratch/duckdb_s2/data/cities.tsv") 13 | 14 | countries <- s2_data_tbl_countries 15 | countries$geometry <- countries$geometry |> 16 | s2_rebuild(options = s2_options(snap = s2_snap_precision(1e6), duplicate_edges = FALSE, validate = TRUE)) 17 | countries$geometry <- s2_as_text(countries$geometry, precision = 9) 18 | 19 | countries |> 20 | readr::write_tsv("~/Desktop/rscratch/duckdb_s2/data/countries.tsv") 21 | ``` 22 | -------------------------------------------------------------------------------- /data/cities.tsv: -------------------------------------------------------------------------------- 1 | name population geog 2 | Vatican City 832 POINT (12.4533865 41.9032822) 3 | San Marino 29000 POINT (12.4417702 43.9360958) 4 | Vaduz 5342 POINT (9.51666947 47.1337238) 5 | Lobamba 4557 POINT (31.1999971 -26.4666675) 6 | Luxembourg 76684 POINT (6.13000281 49.6116604) 7 | Palikir 4645 POINT (158.149974 6.9166437) 8 | Majuro 20500 POINT (171.38 7.10300431) 9 | Funafuti 4749 POINT (179.216647 -8.516652) 10 | Melekeok 7026 POINT (134.626548 7.48739617) 11 | Bir Lehlou 200 POINT (-9.65252222 26.1191667) 12 | Monaco 36371 POINT (7.40691317 43.7396457) 13 | Tarawa 22534 POINT (173.017571 1.33818751) 14 | Moroni 42872 POINT (43.2402441 -11.7041577) 15 | Andorra 22256 POINT (1.51648596 42.5000014) 16 | Port-of-Spain 49031 POINT (-61.5170309 10.6519971) 17 | Kigali 745261 POINT (30.0585859 -1.95164421) 18 | Mbabane 76218 POINT (31.1333345 -26.3166508) 19 | Juba 111975 POINT (31.5800256 4.8299752) 20 | The Hague 501725 POINT (4.2699613 52.0800368) 21 | Ljubljana 255115 POINT (14.514969 46.0552883) 22 | Bratislava 373687 POINT (17.1169808 48.1500183) 23 | Doha 731310 POINT (51.5329679 25.286556) 24 | Podgorica 136473 POINT (19.2663069 42.4659725) 25 | Sri Jawewardenepura Kotte 115826 POINT (79.949993 6.90000388) 26 | Baguio City 272714 POINT (120.569943 16.4299907) 27 | Dodoma 180541 POINT (35.7500036 -6.18330605) 28 | Bern 121631 POINT (7.46697546 46.9166828) 29 | Laayoune 176365 POINT (-13.2000059 27.1499823) 30 | Pristina 198214 POINT (21.1659843 42.6667096) 31 | Roseau 16571 POINT (-61.387013 15.3010156) 32 | Djibouti 604013 POINT (43.1480017 11.5950145) 33 | Putrajaya 50000 POINT (101.701947 2.91401979) 34 | Kyoto 1459640 POINT (135.748052 35.0319381) 35 | Banjul 34589 POINT (-16.5917015 13.4538765) 36 | Skopje 474889 POINT (21.4334615 42.0000061) 37 | Bridgetown 96578 POINT (-59.6165267 13.1020026) 38 | Porto-Novo 234168 POINT (2.61662553 6.48331097) 39 | Bujumbura 331700 POINT (29.3600061 -3.37608722) 40 | Kingstown 24518 POINT (-61.2120624 13.1482788) 41 | Castries 10634 POINT (-61.0000082 14.0019735) 42 | Basseterre 15500 POINT (-62.7170093 17.3020305) 43 | Port Louis 148416 POINT (57.4999939 -20.1666386) 44 | Saint George's 27343 POINT (-61.7416432 12.0526334) 45 | Manama 157474 POINT (50.5830517 26.2361363) 46 | Saint John's 24226 POINT (-61.8500338 17.1180365) 47 | Montevideo 5324 POINT (-56.1729981 -34.8560957) 48 | Lome 749700 POINT (1.22081126 6.13388293) 49 | Tunis 728453 POINT (10.1796781 36.8027781) 50 | Abu Dhabi 560230 POINT (54.3665934 24.4666836) 51 | Ashgabat 577982 POINT (58.3832991 37.9499949) 52 | Lusaka 1267440 POINT (28.2813817 -15.4146984) 53 | Harare 1542813 POINT (31.0427636 -17.8158438) 54 | Dili 193563 POINT (125.579456 -8.55938841) 55 | Port Vila 35901 POINT (168.316641 -17.7333504) 56 | Tegucigalpa 850848 POINT (-87.2194752 14.1039908) 57 | Georgetown 235017 POINT (-58.1670286 6.80197369) 58 | Reykjavik 113906 POINT (-21.9500145 64.1500236) 59 | Port-au-Prince 1234742 POINT (-72.3379804 18.5429705) 60 | Kampala 1353189 POINT (32.5813777 0.318604813) 61 | Paramaribo 223757 POINT (-55.1670309 5.83503013) 62 | Niamey 742791 POINT (2.11471019 13.5186518) 63 | Dushanbe 679400 POINT (68.7738794 38.5600352) 64 | Asuncion 11693 POINT (-57.643451 -25.2944571) 65 | Managua 920000 POINT (-86.2704375 12.1549624) 66 | Freetown 13768 POINT (-13.2361616 8.47195727) 67 | Islamabad 601600 POINT (73.1646886 33.7019418) 68 | Kathmandu 895000 POINT (85.3146964 27.7186378) 69 | Bloemfontein 456669 POINT (26.2299129 -29.1199939) 70 | Pretoria 1338000 POINT (28.2274832 -25.7049747) 71 | Port Moresby 251136 POINT (147.192504 -9.46470783) 72 | Honiara 56298 POINT (159.949766 -9.4379943) 73 | Panama City 408168 POINT (-79.534983 8.96996305) 74 | Rabat 1655753 POINT (-6.83640816 34.0253073) 75 | Chisinau 635994 POINT (28.8577111 47.0050236) 76 | Maputo 1191613 POINT (32.5872171 -25.9533316) 77 | Mogadishu 875388 POINT (45.3647318 2.06862719) 78 | Muscat 586861 POINT (58.5933121 23.6133248) 79 | Colombo 217000 POINT (79.8577506 6.93196576) 80 | Ulaanbaatar 769612 POINT (106.91467 47.9186193) 81 | Windhoek 262796 POINT (17.0835461 -22.5700061) 82 | Abuja 162135 POINT (7.53138214 9.08527901) 83 | Bissau 388028 POINT (-15.5983608 11.8650238) 84 | Amman 1060000 POINT (35.9313541 31.9519711) 85 | Vilnius 507029 POINT (25.3166353 54.6833663) 86 | Riga 705033 POINT (24.0999654 56.9500238) 87 | Bishkek 804212 POINT (74.5832584 42.8750253) 88 | Maseru 118355 POINT (27.4832731 -29.3166744) 89 | Antananarivo 1391433 POINT (47.514678 -18.9146915) 90 | Quito 1399814 POINT (-78.501997 -0.213042322) 91 | San Jose 1724 POINT (-84.0859972 9.93695829) 92 | San Salvador 2807 POINT (-89.2049871 13.7119475) 93 | Kingston 664973 POINT (-76.7674337 17.9770766) 94 | Ndjamena 681387 POINT (15.0472025 12.1150424) 95 | Malabo 155963 POINT (8.78327755 3.75001528) 96 | Asmara 563930 POINT (38.9333235 15.3333393) 97 | Zagreb 698966 POINT (15.9999947 45.8000067) 98 | Tallinn 340027 POINT (24.7280407 59.4338774) 99 | Lilongwe 646750 POINT (33.783302 -13.9832951) 100 | Guatemala 994938 POINT (-90.5289114 14.6230805) 101 | Libreville 483355 POINT (9.45796505 0.38538861) 102 | Suva 88271 POINT (178.441707 -18.1330159) 103 | Valparaiso 15938 POINT (-71.6229595 -33.0458186) 104 | Nouakchott 661400 POINT (-15.9753404 18.086427) 105 | Bamako 1297281 POINT (-8.00198496 12.6519605) 106 | Beirut 1712125 POINT (35.5077624 33.873921) 107 | Tbilisi 1005257 POINT (44.7888496 41.7269558) 108 | Astana 325021 POINT (71.4277742 51.1811253) 109 | Vientiane 570348 POINT (102.59998 17.9666927) 110 | Brazzaville 1163890 POINT (15.2827436 -4.25723991) 111 | Conakry 1494000 POINT (-13.6821809 9.53346871) 112 | Yamoussoukro 194530 POINT (-5.27550256 6.81838096) 113 | Ottawa 812129 POINT (-75.7019612 45.4186427) 114 | Belgrade 1099000 POINT (20.4660448 44.8205913) 115 | Bandar Seri Begawan 140000 POINT (114.933284 4.88333111) 116 | Sucre 221736 POINT (-65.2595156 -19.0409708) 117 | Belmopan 13381 POINT (-88.767073 17.2520335) 118 | Bangui 622771 POINT (18.5582881 4.36664431) 119 | Yaounde 1060587 POINT (11.5147049 3.86864652) 120 | Tirana 421286 POINT (19.818883 41.3275407) 121 | Yerevan 1093485 POINT (44.5116055 40.1830966) 122 | Baku 1892000 POINT (49.8602713 40.3972179) 123 | Phnom Penh 1466000 POINT (104.914689 11.551976) 124 | La Paz 812799 POINT (-68.151931 -16.4960278) 125 | Cotonou 690584 POINT (2.51804474 6.40195442) 126 | Sofia 874827 POINT (23.3147082 42.6852953) 127 | Minsk 1577138 POINT (27.5646813 53.9019233) 128 | Thimphu 79185 POINT (89.639014 27.4729859) 129 | Gaborone 159243 POINT (25.9119478 -24.6463135) 130 | Canberra 234032 POINT (149.129026 -35.2830285) 131 | Ouagadougou 835457 POINT (-1.52666961 12.3722618) 132 | Sarajevo 628902 POINT (18.3830017 43.8500224) 133 | Naypyidaw 194824 POINT (96.1166727 19.7685029) 134 | Nukualofa 23658 POINT (-175.220564 -21.1385124) 135 | Hargeysa 247018 POINT (44.06531 9.5600224) 136 | Victoria 22881 POINT (55.4499898 -4.61663165) 137 | Sao Tome 56166 POINT (6.73332515 0.333402119) 138 | Apia 37708 POINT (-171.738642 -13.841545) 139 | Valletta 6966 POINT (14.5147107 35.8997325) 140 | Male 103693 POINT (73.4999475 4.16670819) 141 | Jerusalem 801000 POINT (35.2066259 31.7784078) 142 | Praia 88859 POINT (-23.5166889 14.916698) 143 | Nassau 160966 POINT (-77.3500438 25.0833901) 144 | Nicosia 200452 POINT (33.3666349 35.1666765) 145 | Wellington 393400 POINT (174.783266 -41.2999879) 146 | Hanoi 1431270 POINT (105.848068 21.0352731) 147 | Ankara 3307379 POINT (32.8624458 39.9291844) 148 | Budapest 1679000 POINT (19.0813748 47.5019522) 149 | Sanaa 1835853 POINT (44.2046475 15.3566792) 150 | Bucharest 1742194 POINT (26.0980008 44.4353177) 151 | Damascus 2466000 POINT (36.29805 33.5019799) 152 | Lisbon 517802 POINT (-9.14681216 38.7246687) 153 | Khartoum 1974647 POINT (32.5322334 15.5900241) 154 | Oslo 580000 POINT (10.7480333 59.9186361) 155 | Warsaw 1702139 POINT (20.9980537 52.2519465) 156 | Pyongyang 2498797 POINT (125.752745 39.0213846) 157 | Dar es Salaam 2698652 POINT (39.266396 -6.79806674) 158 | Dublin 968976 POINT (-6.25085154 53.335007) 159 | Monrovia 785662 POINT (-10.7996604 6.31458165) 160 | Kuala Lumpur 1448000 POINT (101.698037 3.16861173) 161 | Havana 1990917 POINT (-82.366128 23.1339047) 162 | Prague 2087 POINT (14.4640339 50.0852829) 163 | Kuwait 60064 POINT (47.9763553 29.3716635) 164 | Santo Domingo 2873 POINT (-69.9020309 18.4720187) 165 | Accra 1963264 POINT (-0.218661599 5.55198046) 166 | Tripoli 229398 POINT (13.1800118 32.8925) 167 | Tel Aviv-Yafo 378358 POINT (34.7680659 32.0819373) 168 | Helsinki 558457 POINT (24.9321805 60.1775092) 169 | Kobenhavn 1085000 POINT (12.5615399 55.68051) 170 | Abidjan 3190395 POINT (-4.04199412 5.32194283) 171 | Brasilia 2562963 POINT (-47.9179981 -15.7813944) 172 | Brussels 1019022 POINT (4.33137075 50.8352629) 173 | Dhaka 7000940 POINT (90.4066336 23.7250056) 174 | Luanda 1951272 POINT (13.2324812 -8.83634026) 175 | Algiers 1977663 POINT (3.04860667 36.7650107) 176 | Rangoon 3301820 POINT (96.1647318 16.7853) 177 | San Francisco 732072 POINT (-122.417169 37.7691956) 178 | Denver 1548599 POINT (-104.985962 39.7411339) 179 | Houston 3647574 POINT (-95.3419251 29.8219202) 180 | Miami 382894 POINT (-80.2260519 25.7895566) 181 | Atlanta 422908 POINT (-84.4018952 33.8319597) 182 | Chicago 2841952 POINT (-87.7520008 41.8319365) 183 | Caracas 1815679 POINT (-66.9189831 10.5029444) 184 | Kiev 1662508 POINT (30.5146821 50.4353132) 185 | Dubai 1137347 POINT (55.2780285 25.231942) 186 | Tashkent 1978028 POINT (69.292987 41.3136477) 187 | Madrid 50437 POINT (-3.68529754 40.4019721) 188 | Geneva 192385 POINT (6.14002803 46.2100075) 189 | Stockholm 1253309 POINT (18.0953889 59.3527058) 190 | Bangkok 5104476 POINT (100.514699 13.7519451) 191 | Lima 6758234 POINT (-77.052008 -12.0460668) 192 | Dakar 2476400 POINT (-17.475076 14.7177776) 193 | Johannesburg 2026469 POINT (28.0280639 -26.1680989) 194 | Amsterdam 741636 POINT (4.91469432 52.3519145) 195 | Casablanca 3144909 POINT (-7.61831329 33.6019221) 196 | Seoul 9796000 POINT (126.997785 37.568295) 197 | Manila 3077575 POINT (120.980271 14.6061048) 198 | Monterrey 1122874 POINT (-100.331931 25.671941) 199 | Berlin 3094014 POINT (13.3996028 52.5237645) 200 | Urumqi 1508225 POINT (87.5730598 43.8069581) 201 | Chengdu 3950437 POINT (104.068074 30.6719459) 202 | Osaka 2592413 POINT (135.458199 34.7519811) 203 | Kinshasa 5565703 POINT (15.313026 -4.32777824) 204 | New Delhi 317797 POINT (77.19998 28.600023) 205 | Bangalore 5104047 POINT (77.5580639 12.971941) 206 | Athens 729137 POINT (23.7313752 37.9852721) 207 | Baghdad 5054000 POINT (44.3919229 33.3405944) 208 | Addis Ababa 2757729 POINT (38.6980586 9.03525622) 209 | Tehran 7153309 POINT (51.4223982 35.6738886) 210 | Vancouver 603502 POINT (-123.12359 49.2753624) 211 | Toronto 3934421 POINT (-79.4219667 43.7019257) 212 | Buenos Aires 10929146 POINT (-58.3994772 -34.6005557) 213 | Kabul 3043532 POINT (69.1813142 34.5186361) 214 | Vienna 1731000 POINT (16.3646931 48.2019611) 215 | Melbourne 93625 POINT (144.97307 -37.8180855) 216 | Taipei 2618772 POINT (121.568333 25.0358333) 217 | Auckland 395982 POINT (174.763027 -36.8480549) 218 | Los Angeles 3694820 POINT (-118.181926 33.9919241) 219 | Washington, D.C. 552433 POINT (-77.0113644 38.9014952) 220 | New York 8008278 POINT (-73.9819628 40.7519249) 221 | London 7421209 POINT (-0.118667702 51.5019406) 222 | Istanbul 9945610 POINT (29.0080557 41.106942) 223 | Riyadh 4205961 POINT (46.7707958 24.642779) 224 | Cape Town 2432858 POINT (18.4330423 -33.9180651) 225 | Moscow 10452000 POINT (37.613577 55.75411) 226 | Mexico City 10811002 POINT (-99.1329341 19.4443883) 227 | Lagos 1536 POINT (3.38958521 6.44520751) 228 | Rome 35452 POINT (12.4813126 41.8979015) 229 | Beijing 7480601 POINT (116.38634 39.9308381) 230 | Nairobi 2750547 POINT (36.814711 -1.28140088) 231 | Jakarta 8540121 POINT (106.827492 -6.17247185) 232 | Bogota 6333661 POINT (-74.0852898 4.59836942) 233 | Cairo 7734614 POINT (31.2480224 30.0519062) 234 | Shanghai 14608512 POINT (121.434559 31.2183983) 235 | Tokyo 8336599 POINT (139.749462 35.6869628) 236 | Mumbai 12691836 POINT (72.8550434 19.0189362) 237 | Paris 11177 POINT (2.33138947 48.8686388) 238 | Santiago 46611 POINT (-70.6689867 -33.448068) 239 | Kolkata 4631392 POINT (88.3227298 22.4969152) 240 | Rio de Janeiro 2010175 POINT (-43.2269667 -22.9230773) 241 | Sao Paulo 10021295 POINT (-46.6269658 -23.5567337) 242 | Sydney 3641422 POINT (151.183234 -33.9180651) 243 | Singapore 3289529 POINT (103.853875 1.29497933) 244 | Hong Kong 4551579 POINT (114.183063 22.3069268) 245 | -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | 2 | # Build duckdb-geography documentation 3 | 4 | Install the requirements: 5 | 6 | ```shell 7 | pip install -r requirements.txt 8 | ``` 9 | 10 | Build `duckdb` with the extension statically linked or install it 11 | into some existing `duckdb` environment: 12 | 13 | ```shell 14 | make debug 15 | ``` 16 | 17 | Run `duckdoc.py`: 18 | 19 | ```shell 20 | python duckdoc.py \ 21 | --extension geography \ 22 | --output function-reference.md \ 23 | --run-examples 24 | ``` 25 | 26 | This will update the rendered documentation (which is currently just checked in 27 | as a `function-reference.md` in this directory). 28 | -------------------------------------------------------------------------------- /docs/UPDATING.md: -------------------------------------------------------------------------------- 1 | # Extension updating 2 | When cloning this template, the target version of DuckDB should be the latest stable release of DuckDB. However, there 3 | will inevitably come a time when a new DuckDB is released and the extension repository needs updating. This process goes 4 | as follows: 5 | 6 | - Bump submodules 7 | - `./duckdb` should be set to latest tagged release 8 | - `./extension-ci-tools` should be set to updated branch corresponding to latest DuckDB release. So if you're building for DuckDB `v1.1.0` there will be a branch in `extension-ci-tools` named `v1.1.0` to which you should check out. 9 | - Bump versions in `./github/workflows` 10 | - `duckdb_version` input in `duckdb-stable-build` job in `MainDistributionPipeline.yml` should be set to latest tagged release 11 | - `duckdb_version` input in `duckdb-stable-deploy` job in `MainDistributionPipeline.yml` should be set to latest tagged release 12 | - the reusable workflow `duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml` for the `duckdb-stable-build` job should be set to latest tagged release 13 | 14 | # API changes 15 | DuckDB extensions built with this extension template are built against the internal C++ API of DuckDB. This API is not guaranteed to be stable. 16 | What this means for extension development is that when updating your extensions DuckDB target version using the above steps, you may run into the fact that your extension no longer builds properly. 17 | 18 | Currently, DuckDB does not (yet) provide a specific change log for these API changes, but it is generally not too hard to figure out what has changed. 19 | 20 | For figuring out how and why the C++ API changed, we recommend using the following resources: 21 | - DuckDB's [Release Notes](https://github.com/duckdb/duckdb/releases) 22 | - DuckDB's history of [Core extension patches](https://github.com/duckdb/duckdb/commits/main/.github/patches/extensions) 23 | - The git history of the relevant C++ Header file of the API that has changed 24 | -------------------------------------------------------------------------------- /docs/duckdoc.py: -------------------------------------------------------------------------------- 1 | import json 2 | import subprocess 3 | import re 4 | import tempfile 5 | from pathlib import Path 6 | 7 | from jinja2 import Environment, FileSystemLoader 8 | 9 | 10 | def main(extension_name, duckdb_path=None, output_path=None, run_examples=False): 11 | if not duckdb_path: 12 | duckdb_path = find_duckdb(extension_name) 13 | 14 | functions = query_functions(duckdb_path, extension_name) 15 | parse_functions(functions) 16 | 17 | if run_examples: 18 | run_function_examples(functions, duckdb_path, extension_name) 19 | 20 | context = generate_context(functions) 21 | render_all(context, output_path) 22 | 23 | 24 | def render_all(context, output_path): 25 | this_dir = Path(__file__).parent 26 | env = Environment(loader=FileSystemLoader(this_dir)) 27 | 28 | template = env.get_template("function-reference.md.jinja") 29 | content = template.render(context) 30 | 31 | # Canonicalize multiple newlines 32 | content = re.sub("\n\n+", "\n\n", content) 33 | 34 | with open(output_path, "w") as out: 35 | out.write(content.rstrip() + "\n") 36 | 37 | 38 | def generate_context(functions): 39 | category_names = set(fun["category"] for fun in functions) 40 | categories = [ 41 | { 42 | "name": category, 43 | "functions": [fun for fun in functions if fun["category"] == category], 44 | } 45 | for category in sorted(category_names) 46 | ] 47 | 48 | return {"categories": categories} 49 | 50 | 51 | def parse_functions(functions): 52 | for func in functions: 53 | if "category" in func["tags"]: 54 | func["category"] = func["tags"]["category"] 55 | else: 56 | func["category"] = "-".join(["other", func["type"], "functions"]) 57 | 58 | if "description" in func and func["description"]: 59 | desc_lines = func["description"].strip().splitlines() 60 | func["summary"] = desc_lines[0] 61 | func["description"] = "\n".join(desc_lines[1:]) 62 | 63 | 64 | def run_function_examples(functions, duckdb_path, extension_name): 65 | for func in functions: 66 | if "example" in func and func["example"]: 67 | func["example"] = run_examples(duckdb_path, extension_name, func["example"]) 68 | 69 | 70 | def query_functions(duckdb_path, extension_name): 71 | sql = FUNCTION_DEF_SQL.replace("$EXTENSION_NAME$", extension_name) 72 | proc = subprocess.run( 73 | [ 74 | duckdb_path, 75 | "-noheader", 76 | "-list", 77 | "-c", 78 | f"INSTALL json; LOAD json; LOAD {extension_name};" + sql, 79 | ], 80 | capture_output=True, 81 | ) 82 | 83 | if proc.returncode != 0: 84 | raise ValueError("Function query failed\n---\n" + proc.stderr.decode()) 85 | elif not proc.stdout.strip(): 86 | raise ValueError("Function query returned zero functions") 87 | 88 | return [json.loads(line) for line in proc.stdout.splitlines()] 89 | 90 | 91 | def run_examples(duckdb_path, extension_name, example_sql): 92 | with tempfile.TemporaryDirectory() as tdir: 93 | example_sql = example_sql.strip() 94 | examples = example_sql.split("\n----") 95 | example_results = [ 96 | run_example(duckdb_path, extension_name, example, tdir) 97 | for example in examples 98 | ] 99 | return "\n\n".join(example_results) 100 | 101 | 102 | def run_example(duckdb_path, extension_name, example_sql, cwd=None): 103 | example_sql = example_sql.strip() 104 | proc = subprocess.run( 105 | [ 106 | Path(duckdb_path).resolve(), 107 | "-c", 108 | f"LOAD {extension_name};" + example_sql, 109 | ], 110 | capture_output=True, 111 | cwd=cwd, 112 | ) 113 | 114 | if proc.returncode != 0: 115 | raise ValueError( 116 | "Example query failed. Query was:\n---\n" 117 | + example_sql 118 | + "\n---\n" 119 | + proc.stderr.decode() 120 | ) 121 | 122 | out_lines = ["--" + line for line in proc.stdout.decode().splitlines()] 123 | return "\n".join([example_sql] + out_lines) 124 | 125 | 126 | def find_duckdb(extension_name): 127 | this_dir = Path(__file__).parent 128 | build_dir = this_dir.parent / "build" 129 | for possible in [ 130 | build_dir / "duckdb", 131 | build_dir / "debug" / "duckdb", 132 | "duckdb", 133 | ]: 134 | 135 | if subprocess.run( 136 | [possible, "-c", f"LOAD {extension_name};"], capture_output=True 137 | ): 138 | return possible 139 | 140 | raise ValueError(f"Can't find duckdb that can load extension '{extension_name}'") 141 | 142 | 143 | def generate_test_functions(): 144 | return [ 145 | { 146 | "name": "s2_data_city", 147 | "type": "scalar", 148 | "summary": "Return a city from the example data or error if no such city exists.", 149 | "description": "An extended summary of the city", 150 | "example": "SELECT s2_data_city('Toronto');", 151 | "signatures": [ 152 | {"return": "GEOGRAPHY", "params": []}, 153 | { 154 | "return": "GEOGRAPHY", 155 | "params": [{"name": "city", "type": "VARCHAR"}], 156 | }, 157 | { 158 | "return": "GEOGRAPHY", 159 | "params": [ 160 | {"name": "city", "type": "VARCHAR"}, 161 | {"name": "foofy", "type": "INTEGER"}, 162 | ], 163 | }, 164 | ], 165 | "category": "example-data", 166 | }, 167 | { 168 | "name": "s2_data_country", 169 | "summary": "Return a country from the example data or error if no such country exists.", 170 | "signatures": [ 171 | { 172 | "return": "GEOGRAPHY", 173 | "params": [{"name": "country", "type": "VARCHAR"}], 174 | }, 175 | ], 176 | "category": "example-data", 177 | }, 178 | { 179 | "name": "s2_x", 180 | "summary": "Return the longitude of a point geography or NaN if none exist.s", 181 | "signatures": [ 182 | { 183 | "return": "DOUBLE", 184 | "params": [{"name": "geog", "type": "GEOGRAPHY"}], 185 | }, 186 | ], 187 | "category": "accessors", 188 | }, 189 | ] 190 | 191 | 192 | FUNCTION_DEF_SQL = """ 193 | SELECT 194 | json({ 195 | name: function_name, 196 | type: function_type, 197 | signatures: signatures, 198 | tags: func_tags, 199 | description: description, 200 | example: example 201 | }) 202 | FROM ( 203 | SELECT 204 | function_type, 205 | function_name, 206 | list({ 207 | return: return_type, 208 | params: list_zip(parameters, parameter_types)::STRUCT(name VARCHAR, type VARCHAR)[] 209 | }) as signatures, 210 | any_value(tags) AS func_tags, 211 | any_value(description) AS description, 212 | any_value(example) AS example 213 | FROM duckdb_functions() as funcs 214 | GROUP BY function_name, function_type 215 | HAVING func_tags['ext'] = ['$EXTENSION_NAME$'] 216 | ORDER BY function_name 217 | ); 218 | """ 219 | 220 | 221 | if __name__ == "__main__": 222 | import argparse 223 | import sys 224 | 225 | parser = argparse.ArgumentParser( 226 | description="Render function documentation for a DuckDB extension", 227 | ) 228 | parser.add_argument( 229 | "--extension", 230 | help="The name of the extension for which reference should be rendered", 231 | default="geography", 232 | ) 233 | parser.add_argument( 234 | "--duckdb", 235 | help=( 236 | "The path to the DuckDB executable used to load the " 237 | "desired version of the extension" 238 | ), 239 | default="", 240 | ) 241 | parser.add_argument( 242 | "--run-examples", 243 | help=( 244 | "Run examples and append the commented output. Experimental " 245 | "and currently assumes that the 'example' field is valid SQL separated by " 246 | "four dashes (----) on a new line" 247 | ), 248 | action="store_true", 249 | ) 250 | parser.add_argument( 251 | "-o", "--output", help="The output file path", default="function-reference.md" 252 | ) 253 | 254 | args = parser.parse_args(sys.argv[1:]) 255 | main(args.extension, args.duckdb, args.output, args.run_examples) 256 | -------------------------------------------------------------------------------- /docs/function-reference.md.jinja: -------------------------------------------------------------------------------- 1 | 2 | # Function Reference 3 | 4 | | Function | Summary | 5 | | --- | --- | 6 | {% for category in categories -%} 7 | {% for function in category.functions -%} 8 | | [`{{ function.name }}`](#{{ function.name }}) | {{ function.summary }}| 9 | {% endfor %} 10 | {%- endfor %} 11 | 12 | {% for category in categories -%} 13 | ## {{ category.name | replace('-', ' ') | title }} 14 | 15 | {% for function in category.functions %} 16 | ### {{ function.name }} 17 | 18 | {{ function.summary }} 19 | 20 | ```sql 21 | {% for sig in function.signatures -%} 22 | {{ sig.return }} {{ function.name }}( 23 | {%- for arg in sig.params -%} 24 | {%- if loop.index > 1 %}, {% endif -%} 25 | {{ arg.name }} {{ arg.type }} 26 | {%- endfor -%} 27 | ) 28 | {% endfor -%} 29 | ``` 30 | 31 | {% if function.description -%} 32 | #### Description 33 | 34 | {{ function.description }} 35 | {% endif -%} 36 | 37 | {% if function.example %} 38 | #### Example 39 | 40 | ```sql 41 | {{ function.example }} 42 | ``` 43 | {% endif %} 44 | {%- endfor -%} 45 | {%- endfor -%} 46 | -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | jinja2 2 | -------------------------------------------------------------------------------- /extension_config.cmake: -------------------------------------------------------------------------------- 1 | # This file is included by DuckDB's build system. It specifies which extension 2 | # to load 3 | 4 | # Extension from this repo 5 | duckdb_extension_load(geography SOURCE_DIR ${CMAKE_CURRENT_LIST_DIR} LOAD_TESTS) 6 | 7 | # Any extra extensions that should be built e.g.: duckdb_extension_load(json) 8 | -------------------------------------------------------------------------------- /function-reference.md: -------------------------------------------------------------------------------- 1 | 2 | # Function Reference 3 | 4 | | Function | Summary | 5 | | --- | --- | 6 | | [`s2_area`](#s2_area) | Returns the area of the geography.| 7 | | [`s2_isempty`](#s2_isempty) | Returns true if the geography is empty.| 8 | | [`s2_length`](#s2_length) | Returns the length of the geography.| 9 | | [`s2_perimeter`](#s2_perimeter) | Returns the perimeter of the geography.| 10 | | [`s2_x`](#s2_x) | Returns the x coordinate of the geography.| 11 | | [`s2_y`](#s2_y) | Returns the y coordinate of the geography.| 12 | | [`s2_covering`](#s2_covering) | Returns the S2 cell covering of the geography.| 13 | | [`s2_covering_fixed_level`](#s2_covering_fixed_level) | Returns the S2 cell covering of the geography with a fixed level.| 14 | | [`s2_arbitrarycellfromwkb`](#s2_arbitrarycellfromwkb) | Convert the first vertex to S2_CELL_CENTER for sorting.| 15 | | [`s2_cell_child`](#s2_cell_child) | | 16 | | [`s2_cell_contains`](#s2_cell_contains) | | 17 | | [`s2_cell_edge_neighbor`](#s2_cell_edge_neighbor) | | 18 | | [`s2_cell_from_token`](#s2_cell_from_token) | | 19 | | [`s2_cell_intersects`](#s2_cell_intersects) | | 20 | | [`s2_cell_level`](#s2_cell_level) | | 21 | | [`s2_cell_parent`](#s2_cell_parent) | | 22 | | [`s2_cell_range_max`](#s2_cell_range_max) | | 23 | | [`s2_cell_range_min`](#s2_cell_range_min) | | 24 | | [`s2_cell_token`](#s2_cell_token) | | 25 | | [`s2_cell_vertex`](#s2_cell_vertex) | Returns the vertex of the S2 cell.| 26 | | [`s2_cellfromlonlat`](#s2_cellfromlonlat) | Convert a lon/lat pair to S2_CELL_CENTER| 27 | | [`s2_cellfromwkb`](#s2_cellfromwkb) | Convert a WKB point directly to S2_CELL_CENTER| 28 | | [`s2_astext`](#s2_astext) | Returns the WKT string of the geography.| 29 | | [`s2_aswkb`](#s2_aswkb) | Returns the WKB blob of the geography.| 30 | | [`s2_format`](#s2_format) | Returns the WKT string of the geography with a given precision.| 31 | | [`s2_geogfromtext`](#s2_geogfromtext) | Returns the geography from a WKT string.| 32 | | [`s2_geogfromwkb`](#s2_geogfromwkb) | Converts a WKB blob to a geography.| 33 | | [`s2_prepare`](#s2_prepare) | Prepares a geography for faster predicate and overlay operations.| 34 | | [`s2_data_city`](#s2_data_city) | | 35 | | [`s2_data_country`](#s2_data_country) | | 36 | | [`s2_difference`](#s2_difference) | Returns the difference of two geographies.| 37 | | [`s2_intersection`](#s2_intersection) | Returns the intersection of two geographies.| 38 | | [`s2_union`](#s2_union) | Returns the union of two geographies.| 39 | | [`s2_contains`](#s2_contains) | Returns true if the first geography contains the second.| 40 | | [`s2_equals`](#s2_equals) | Returns true if the two geographies are equal.| 41 | | [`s2_intersects`](#s2_intersects) | Returns true if the two geographies intersect.| 42 | | [`s2_mayintersect`](#s2_mayintersect) | Returns true if the two geographies may intersect.| 43 | 44 | 45 | ## Accessors 46 | 47 | 48 | ### s2_area 49 | 50 | Returns the area of the geography. 51 | 52 | ```sql 53 | DOUBLE s2_area(geog GEOGRAPHY) 54 | ``` 55 | 56 | 57 | ### s2_isempty 58 | 59 | Returns true if the geography is empty. 60 | 61 | ```sql 62 | BOOLEAN s2_isempty(geog GEOGRAPHY) 63 | ``` 64 | 65 | 66 | #### Example 67 | 68 | ```sql 69 | SELECT s2_isempty('POINT(0 0)') AS is_empty; 70 | --┌──────────┐ 71 | --│ is_empty │ 72 | --│ boolean │ 73 | --├──────────┤ 74 | --│ false │ 75 | --└──────────┘ 76 | ``` 77 | 78 | ### s2_length 79 | 80 | Returns the length of the geography. 81 | 82 | ```sql 83 | DOUBLE s2_length(geog GEOGRAPHY) 84 | ``` 85 | 86 | 87 | ### s2_perimeter 88 | 89 | Returns the perimeter of the geography. 90 | 91 | ```sql 92 | DOUBLE s2_perimeter(geog GEOGRAPHY) 93 | ``` 94 | 95 | 96 | ### s2_x 97 | 98 | Returns the x coordinate of the geography. 99 | 100 | ```sql 101 | DOUBLE s2_x(geog GEOGRAPHY) 102 | ``` 103 | 104 | 105 | ### s2_y 106 | 107 | Returns the y coordinate of the geography. 108 | 109 | ```sql 110 | DOUBLE s2_y(geog GEOGRAPHY) 111 | ``` 112 | 113 | ## Bounds 114 | 115 | 116 | ### s2_covering 117 | 118 | Returns the S2 cell covering of the geography. 119 | 120 | ```sql 121 | S2_CELL_UNION s2_covering(geog GEOGRAPHY) 122 | ``` 123 | 124 | 125 | #### Example 126 | 127 | ```sql 128 | SELECT s2_covering('POINT(0 0)') AS covering; 129 | --┌──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ 130 | --│ covering │ 131 | --│ s2_cell_union │ 132 | --├──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┤ 133 | --│ [0/022222222222222222222222222222, 0/133333333333333333333333333333, 0/200000000000000000000000000000, 0/311111111… │ 134 | --└──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ 135 | ``` 136 | 137 | ### s2_covering_fixed_level 138 | 139 | Returns the S2 cell covering of the geography with a fixed level. 140 | 141 | ```sql 142 | S2_CELL_UNION s2_covering_fixed_level(geog GEOGRAPHY, fixed_level INTEGER) 143 | ``` 144 | 145 | 146 | #### Example 147 | 148 | ```sql 149 | SELECT s2_covering_fixed_level('POINT(0 0)', 4) AS covering; 150 | --┌──────────────────────────────────┐ 151 | --│ covering │ 152 | --│ s2_cell_union │ 153 | --├──────────────────────────────────┤ 154 | --│ [0/0222, 0/1333, 0/2000, 0/3111] │ 155 | --└──────────────────────────────────┘ 156 | ``` 157 | ## Cellops 158 | 159 | 160 | ### s2_arbitrarycellfromwkb 161 | 162 | Convert the first vertex to S2_CELL_CENTER for sorting. 163 | 164 | ```sql 165 | S2_CELL_CENTER s2_arbitrarycellfromwkb(wkb BLOB) 166 | ``` 167 | 168 | 169 | ### s2_cell_child 170 | 171 | 172 | 173 | ```sql 174 | S2_CELL s2_cell_child(cell S2_CELL, index TINYINT) 175 | ``` 176 | 177 | 178 | ### s2_cell_contains 179 | 180 | 181 | 182 | ```sql 183 | BOOLEAN s2_cell_contains(cell1 S2_CELL, cell2 S2_CELL) 184 | ``` 185 | 186 | 187 | ### s2_cell_edge_neighbor 188 | 189 | 190 | 191 | ```sql 192 | S2_CELL s2_cell_edge_neighbor(cell S2_CELL, index TINYINT) 193 | ``` 194 | 195 | 196 | ### s2_cell_from_token 197 | 198 | 199 | 200 | ```sql 201 | S2_CELL s2_cell_from_token(text VARCHAR) 202 | ``` 203 | 204 | 205 | ### s2_cell_intersects 206 | 207 | 208 | 209 | ```sql 210 | BOOLEAN s2_cell_intersects(cell1 S2_CELL, cell2 S2_CELL) 211 | ``` 212 | 213 | 214 | ### s2_cell_level 215 | 216 | 217 | 218 | ```sql 219 | TINYINT s2_cell_level(cell S2_CELL) 220 | ``` 221 | 222 | 223 | ### s2_cell_parent 224 | 225 | 226 | 227 | ```sql 228 | S2_CELL s2_cell_parent(cell S2_CELL, index TINYINT) 229 | ``` 230 | 231 | 232 | ### s2_cell_range_max 233 | 234 | 235 | 236 | ```sql 237 | S2_CELL s2_cell_range_max(cell S2_CELL) 238 | ``` 239 | 240 | 241 | ### s2_cell_range_min 242 | 243 | 244 | 245 | ```sql 246 | S2_CELL s2_cell_range_min(cell S2_CELL) 247 | ``` 248 | 249 | 250 | ### s2_cell_token 251 | 252 | 253 | 254 | ```sql 255 | VARCHAR s2_cell_token(cell S2_CELL) 256 | ``` 257 | 258 | 259 | ### s2_cell_vertex 260 | 261 | Returns the vertex of the S2 cell. 262 | 263 | ```sql 264 | GEOGRAPHY s2_cell_vertex(cell_id S2_CELL, vertex_id TINYINT) 265 | ``` 266 | 267 | 268 | ### s2_cellfromlonlat 269 | 270 | Convert a lon/lat pair to S2_CELL_CENTER 271 | 272 | ```sql 273 | S2_CELL_CENTER s2_cellfromlonlat(lon DOUBLE, lat DOUBLE) 274 | ``` 275 | 276 | 277 | ### s2_cellfromwkb 278 | 279 | Convert a WKB point directly to S2_CELL_CENTER 280 | 281 | ```sql 282 | S2_CELL_CENTER s2_cellfromwkb(wkb BLOB) 283 | ``` 284 | 285 | ## Conversion 286 | 287 | 288 | ### s2_astext 289 | 290 | Returns the WKT string of the geography. 291 | 292 | ```sql 293 | VARCHAR s2_astext(geog GEOGRAPHY) 294 | ``` 295 | 296 | 297 | ### s2_aswkb 298 | 299 | Returns the WKB blob of the geography. 300 | 301 | ```sql 302 | BLOB s2_aswkb(geog GEOGRAPHY) 303 | ``` 304 | 305 | 306 | ### s2_format 307 | 308 | Returns the WKT string of the geography with a given precision. 309 | 310 | ```sql 311 | VARCHAR s2_format(geog GEOGRAPHY, precision TINYINT) 312 | ``` 313 | 314 | 315 | ### s2_geogfromtext 316 | 317 | Returns the geography from a WKT string. 318 | 319 | ```sql 320 | GEOGRAPHY s2_geogfromtext(wkt VARCHAR) 321 | ``` 322 | 323 | 324 | ### s2_geogfromwkb 325 | 326 | Converts a WKB blob to a geography. 327 | 328 | ```sql 329 | GEOGRAPHY s2_geogfromwkb(wkb BLOB) 330 | ``` 331 | 332 | 333 | ### s2_prepare 334 | 335 | Prepares a geography for faster predicate and overlay operations. 336 | 337 | ```sql 338 | GEOGRAPHY s2_prepare(geog GEOGRAPHY) 339 | ``` 340 | 341 | ## Data 342 | 343 | 344 | ### s2_data_city 345 | 346 | 347 | 348 | ```sql 349 | GEOGRAPHY s2_data_city(name VARCHAR) 350 | ``` 351 | 352 | 353 | ### s2_data_country 354 | 355 | 356 | 357 | ```sql 358 | GEOGRAPHY s2_data_country(name VARCHAR) 359 | ``` 360 | 361 | ## Overlay 362 | 363 | 364 | ### s2_difference 365 | 366 | Returns the difference of two geographies. 367 | 368 | ```sql 369 | GEOGRAPHY s2_difference(geog1 GEOGRAPHY, geog2 GEOGRAPHY) 370 | ``` 371 | 372 | 373 | ### s2_intersection 374 | 375 | Returns the intersection of two geographies. 376 | 377 | ```sql 378 | GEOGRAPHY s2_intersection(geog1 GEOGRAPHY, geog2 GEOGRAPHY) 379 | ``` 380 | 381 | 382 | ### s2_union 383 | 384 | Returns the union of two geographies. 385 | 386 | ```sql 387 | GEOGRAPHY s2_union(geog1 GEOGRAPHY, geog2 GEOGRAPHY) 388 | ``` 389 | 390 | ## Predicate 391 | 392 | 393 | ### s2_contains 394 | 395 | Returns true if the first geography contains the second. 396 | 397 | ```sql 398 | BOOLEAN s2_contains(geog1 GEOGRAPHY, geog2 GEOGRAPHY) 399 | ``` 400 | 401 | 402 | ### s2_equals 403 | 404 | Returns true if the two geographies are equal. 405 | 406 | ```sql 407 | BOOLEAN s2_equals(geog1 GEOGRAPHY, geog2 GEOGRAPHY) 408 | ``` 409 | 410 | 411 | ### s2_intersects 412 | 413 | Returns true if the two geographies intersect. 414 | 415 | ```sql 416 | BOOLEAN s2_intersects(geog1 GEOGRAPHY, geog2 GEOGRAPHY) 417 | ``` 418 | 419 | 420 | ### s2_mayintersect 421 | 422 | Returns true if the two geographies may intersect. 423 | 424 | ```sql 425 | BOOLEAN s2_mayintersect(geog1 GEOGRAPHY, geog2 GEOGRAPHY) 426 | ``` 427 | -------------------------------------------------------------------------------- /scripts/extension-upload.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Extension upload script 4 | 5 | # Usage: ./extension-upload.sh 6 | # : Name of the extension 7 | # : Version (commit / version tag) of the extension 8 | # : Version (commit / version tag) of DuckDB 9 | # : Architecture target of the extension binary 10 | # : S3 bucket to upload to 11 | # : Set this as the latest version ("true" / "false", default: "false") 12 | # : Set this as a versioned version that will prevent its deletion 13 | 14 | set -e 15 | 16 | if [[ $4 == wasm* ]]; then 17 | ext="/tmp/extension/$1.duckdb_extension.wasm" 18 | else 19 | ext="/tmp/extension/$1.duckdb_extension" 20 | fi 21 | 22 | echo $ext 23 | 24 | script_dir="$(dirname "$(readlink -f "$0")")" 25 | 26 | # calculate SHA256 hash of extension binary 27 | cat $ext > $ext.append 28 | 29 | if [[ $4 == wasm* ]]; then 30 | # 0 for custom section 31 | # 113 in hex = 275 in decimal, total lenght of what follows (1 + 16 + 2 + 256) 32 | # [1(continuation) + 0010011(payload) = \x93, 0(continuation) + 10(payload) = \x02] 33 | echo -n -e '\x00' >> $ext.append 34 | echo -n -e '\x93\x02' >> $ext.append 35 | # 10 in hex = 16 in decimal, lenght of name, 1 byte 36 | echo -n -e '\x10' >> $ext.append 37 | echo -n -e 'duckdb_signature' >> $ext.append 38 | # the name of the WebAssembly custom section, 16 bytes 39 | # 100 in hex, 256 in decimal 40 | # [1(continuation) + 0000000(payload) = ff, 0(continuation) + 10(payload)], 41 | # for a grand total of 2 bytes 42 | echo -n -e '\x80\x02' >> $ext.append 43 | fi 44 | 45 | # (Optionally) Sign binary 46 | if [ "$DUCKDB_EXTENSION_SIGNING_PK" != "" ]; then 47 | echo "$DUCKDB_EXTENSION_SIGNING_PK" > private.pem 48 | $script_dir/../duckdb/scripts/compute-extension-hash.sh $ext.append > $ext.hash 49 | openssl pkeyutl -sign -in $ext.hash -inkey private.pem -pkeyopt digest:sha256 -out $ext.sign 50 | rm -f private.pem 51 | fi 52 | 53 | # Signature is always there, potentially defaulting to 256 zeros 54 | truncate -s 256 $ext.sign 55 | 56 | # append signature to extension binary 57 | cat $ext.sign >> $ext.append 58 | 59 | # compress extension binary 60 | if [[ $4 == wasm_* ]]; then 61 | brotli < $ext.append > "$ext.compressed" 62 | else 63 | gzip < $ext.append > "$ext.compressed" 64 | fi 65 | 66 | set -e 67 | 68 | # Abort if AWS key is not set 69 | if [ -z "$AWS_ACCESS_KEY_ID" ]; then 70 | echo "No AWS key found, skipping.." 71 | exit 0 72 | fi 73 | 74 | # upload versioned version 75 | if [[ $7 = 'true' ]]; then 76 | if [[ $4 == wasm* ]]; then 77 | aws s3 cp $ext.compressed s3://$5/$1/$2/$3/$4/$1.duckdb_extension.wasm --acl public-read --content-encoding br --content-type="application/wasm" 78 | else 79 | aws s3 cp $ext.compressed s3://$5/$1/$2/$3/$4/$1.duckdb_extension.gz --acl public-read 80 | fi 81 | fi 82 | 83 | # upload to latest version 84 | if [[ $6 = 'true' ]]; then 85 | if [[ $4 == wasm* ]]; then 86 | aws s3 cp $ext.compressed s3://$5/$3/$4/$1.duckdb_extension.wasm --acl public-read --content-encoding br --content-type="application/wasm" 87 | else 88 | aws s3 cp $ext.compressed s3://$5/$3/$4/$1.duckdb_extension.gz --acl public-read 89 | fi 90 | fi 91 | -------------------------------------------------------------------------------- /scripts/setup-custom-toolchain.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # This is an example script that can be used to install additional toolchain dependencies. Feel free to remove this script 4 | # if no additional toolchains are required 5 | 6 | # To enable this script, set the `custom_toolchain_script` option to true when calling the reusable workflow 7 | # `.github/workflows/_extension_distribution.yml` from `https://github.com/duckdb/extension-ci-tools` 8 | 9 | # note that the $DUCKDB_PLATFORM environment variable can be used to discern between the platforms 10 | echo "This is the sample custom toolchain script running for architecture '$DUCKDB_PLATFORM' for the s2 extension." 11 | -------------------------------------------------------------------------------- /src/function_builder.cpp: -------------------------------------------------------------------------------- 1 | #include "function_builder.hpp" 2 | #include "duckdb/catalog/catalog_entry/function_entry.hpp" 3 | #include "duckdb/main/extension_util.hpp" 4 | 5 | namespace duckdb { 6 | 7 | void FunctionBuilder::Register(DatabaseInstance& db, const char* name, 8 | ScalarFunctionBuilder& builder) { 9 | // Register the function 10 | ExtensionUtil::RegisterFunction(db, std::move(builder.set)); 11 | 12 | // Also add the parameter names. We need to access the catalog entry for this. 13 | auto& catalog = Catalog::GetSystemCatalog(db); 14 | auto transaction = CatalogTransaction::GetSystemTransaction(db); 15 | auto& schema = catalog.GetSchema(transaction, DEFAULT_SCHEMA); 16 | auto catalog_entry = 17 | schema.GetEntry(transaction, CatalogType::SCALAR_FUNCTION_ENTRY, name); 18 | if (!catalog_entry) { 19 | // This should not happen, we just registered the function 20 | throw InternalException( 21 | "Function with name \"%s\" not found in FunctionBuilder::AddScalar", name); 22 | } 23 | 24 | auto& func_entry = catalog_entry->Cast(); 25 | 26 | #if defined(DUCKDB_FUNC_ENTRY_HAS_METADATA) 27 | if (!builder.parameter_names.empty()) { 28 | func_entry.parameter_names = std::move(builder.parameter_names); 29 | } 30 | 31 | if (!builder.description.empty()) { 32 | func_entry.description = std::move(builder.description); 33 | } 34 | 35 | if (!builder.example.empty()) { 36 | func_entry.example = std::move(builder.example); 37 | } 38 | #endif 39 | if (!builder.tags.empty()) { 40 | func_entry.tags = std::move(builder.tags); 41 | } 42 | } 43 | 44 | } // namespace duckdb 45 | -------------------------------------------------------------------------------- /src/geography_extension.cpp: -------------------------------------------------------------------------------- 1 | #define DUCKDB_EXTENSION_MAIN 2 | 3 | #include "geography_extension.hpp" 4 | #include 5 | #include "duckdb.hpp" 6 | #include "duckdb/common/exception.hpp" 7 | #include "duckdb/common/string_util.hpp" 8 | #include "duckdb/function/scalar_function.hpp" 9 | #include "duckdb/main/extension_util.hpp" 10 | 11 | #include "s2_cell_ops.hpp" 12 | #include "s2_data.hpp" 13 | #include "s2_dependencies.hpp" 14 | #include "s2_geography_ops.hpp" 15 | #include "s2_types.hpp" 16 | 17 | namespace duckdb { 18 | 19 | inline void S2ScalarFun(DataChunk& args, ExpressionState& state, Vector& result) { 20 | result.SetVectorType(VectorType::CONSTANT_VECTOR); 21 | result.SetValue(0, "s2"); 22 | } 23 | 24 | static void LoadInternal(DatabaseInstance& instance) { 25 | // Register a scalar function 26 | auto s2_scalar_function = ScalarFunction("s2", {}, LogicalType::VARCHAR, S2ScalarFun); 27 | ExtensionUtil::RegisterFunction(instance, s2_scalar_function); 28 | 29 | duckdb_s2::RegisterTypes(instance); 30 | duckdb_s2::RegisterS2Dependencies(instance); 31 | duckdb_s2::RegisterS2CellOps(instance); 32 | duckdb_s2::RegisterS2GeographyOps(instance); 33 | duckdb_s2::RegisterS2Data(instance); 34 | } 35 | 36 | void GeographyExtension::Load(DuckDB& db) { LoadInternal(*db.instance); } 37 | std::string GeographyExtension::Name() { return "geography"; } 38 | 39 | std::string GeographyExtension::Version() const { 40 | #ifdef EXT_VERSION_GEOGRAPHY 41 | return EXT_VERSION_GEOGRAPHY; 42 | #else 43 | return ""; 44 | #endif 45 | } 46 | 47 | } // namespace duckdb 48 | 49 | extern "C" { 50 | 51 | DUCKDB_EXTENSION_API void geography_init(duckdb::DatabaseInstance& db) { 52 | duckdb::DuckDB db_wrapper(db); 53 | db_wrapper.LoadExtension(); 54 | } 55 | 56 | DUCKDB_EXTENSION_API const char* geography_version() { 57 | return duckdb::DuckDB::LibraryVersion(); 58 | } 59 | } 60 | 61 | #ifndef DUCKDB_EXTENSION_MAIN 62 | #error DUCKDB_EXTENSION_MAIN not defined 63 | #endif 64 | -------------------------------------------------------------------------------- /src/include/function_builder.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "duckdb.hpp" 4 | 5 | #include "duckdb/function/function_set.hpp" 6 | #include "duckdb/function/scalar_function.hpp" 7 | 8 | namespace duckdb { 9 | 10 | //------------------------------------------------------------------------------ 11 | // Scalar Function Variant Builder 12 | //------------------------------------------------------------------------------ 13 | 14 | class ScalarFunctionVariantBuilder { 15 | friend class ScalarFunctionBuilder; 16 | 17 | public: 18 | void AddParameter(const char* name, LogicalType type); 19 | void SetReturnType(LogicalType type); 20 | void SetFunction(scalar_function_t fn); 21 | 22 | private: 23 | explicit ScalarFunctionVariantBuilder() 24 | : function({}, LogicalTypeId::INVALID, nullptr) {} 25 | 26 | ScalarFunction function; 27 | 28 | vector parameter_names = {}; 29 | }; 30 | 31 | inline void ScalarFunctionVariantBuilder::AddParameter(const char* name, 32 | LogicalType type) { 33 | function.arguments.emplace_back(std::move(type)); 34 | parameter_names.emplace_back(name); 35 | } 36 | 37 | inline void ScalarFunctionVariantBuilder::SetReturnType(LogicalType type) { 38 | function.return_type = std::move(type); 39 | } 40 | 41 | inline void ScalarFunctionVariantBuilder::SetFunction(scalar_function_t fn) { 42 | function.function = fn; 43 | } 44 | 45 | //------------------------------------------------------------------------------ 46 | // Scalar Function Builder 47 | //------------------------------------------------------------------------------ 48 | 49 | class ScalarFunctionBuilder { 50 | friend class FunctionBuilder; 51 | 52 | public: 53 | template 54 | void AddVariant(CALLBACK&& callback); 55 | void SetDescription(const string& desc); 56 | void SetExample(const string& ex); 57 | void SetTag(const string& key, const string& value); 58 | 59 | private: 60 | explicit ScalarFunctionBuilder(const char* name) : set(name) {} 61 | 62 | ScalarFunctionSet set; 63 | 64 | vector parameter_names; 65 | string description; 66 | string example; 67 | unordered_map tags = {}; 68 | }; 69 | 70 | inline void ScalarFunctionBuilder::SetDescription(const string& desc) { 71 | description = desc; 72 | } 73 | 74 | inline void ScalarFunctionBuilder::SetExample(const string& ex) { example = ex; } 75 | 76 | inline void ScalarFunctionBuilder::SetTag(const string& key, const string& value) { 77 | tags[key] = value; 78 | } 79 | 80 | template 81 | void ScalarFunctionBuilder::AddVariant(CALLBACK&& callback) { 82 | ScalarFunctionVariantBuilder builder; 83 | 84 | callback(builder); 85 | 86 | // A return type is required 87 | if (builder.function.return_type.id() == LogicalTypeId::INVALID) { 88 | throw InternalException("Return type not set in ScalarFunctionBuilder::AddVariant"); 89 | } 90 | 91 | // Add the new variant to the set 92 | set.AddFunction(std::move(builder.function)); 93 | 94 | // DuckDB does not support naming individual parameters differently between overloads, 95 | // there is only a single list of parameter names for the entire function. 96 | // Therefore, our only option right now is to append the new parameter names to the 97 | // list. This is going to change in DuckDB 1.2 where overloads will be able to have 98 | // different parameter names. 99 | 100 | // Add any new parameter names to the list 101 | const auto& old_params = parameter_names; 102 | const auto& new_params = builder.parameter_names; 103 | 104 | for (idx_t offset = old_params.size(); offset < new_params.size(); offset++) { 105 | parameter_names.emplace_back(builder.parameter_names[offset]); 106 | } 107 | } 108 | 109 | //------------------------------------------------------------------------------ 110 | // Function Builder 111 | //------------------------------------------------------------------------------ 112 | 113 | class FunctionBuilder { 114 | public: 115 | template 116 | static void RegisterScalar(DatabaseInstance& db, const char* name, CALLBACK&& callback); 117 | 118 | private: 119 | static void Register(DatabaseInstance& db, const char* name, 120 | ScalarFunctionBuilder& builder); 121 | }; 122 | 123 | template 124 | void FunctionBuilder::RegisterScalar(DatabaseInstance& db, const char* name, 125 | CALLBACK&& callback) { 126 | ScalarFunctionBuilder builder(name); 127 | callback(builder); 128 | 129 | Register(db, name, builder); 130 | } 131 | 132 | } // namespace duckdb 133 | -------------------------------------------------------------------------------- /src/include/geography_extension.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "duckdb.hpp" 4 | 5 | namespace duckdb { 6 | 7 | class GeographyExtension : public Extension { 8 | public: 9 | void Load(DuckDB& db) override; 10 | std::string Name() override; 11 | std::string Version() const override; 12 | }; 13 | 14 | } // namespace duckdb 15 | -------------------------------------------------------------------------------- /src/include/global_options.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "s2geography/build.h" 4 | 5 | namespace duckdb { 6 | 7 | namespace duckdb_s2 { 8 | 9 | // S2 Provides a number of options. This is where default options get set, 10 | // which should perhaps be configurable from a session. 11 | inline void InitBooleanOperationOptions(S2BooleanOperation::Options* options) { 12 | options->set_polygon_model(S2BooleanOperation::PolygonModel::CLOSED); 13 | options->set_polyline_model(S2BooleanOperation::PolylineModel::CLOSED); 14 | } 15 | 16 | inline void InitGlobalOptions(s2geography::GlobalOptions* options) { 17 | InitBooleanOperationOptions(&options->boolean_operation); 18 | } 19 | 20 | } // namespace duckdb_s2 21 | } // namespace duckdb 22 | -------------------------------------------------------------------------------- /src/include/s2_cell_ops.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "duckdb/main/database.hpp" 4 | 5 | namespace duckdb { 6 | 7 | namespace duckdb_s2 { 8 | 9 | void RegisterS2CellOps(DatabaseInstance& instance); 10 | 11 | } 12 | } // namespace duckdb 13 | -------------------------------------------------------------------------------- /src/include/s2_data.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "duckdb/main/database.hpp" 4 | 5 | namespace duckdb { 6 | 7 | namespace duckdb_s2 { 8 | 9 | void RegisterS2Data(DatabaseInstance& instance); 10 | 11 | } 12 | } // namespace duckdb 13 | -------------------------------------------------------------------------------- /src/include/s2_dependencies.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "duckdb/main/database.hpp" 4 | 5 | namespace duckdb { 6 | 7 | namespace duckdb_s2 { 8 | 9 | void RegisterS2Dependencies(DatabaseInstance& instance); 10 | 11 | } 12 | } // namespace duckdb 13 | -------------------------------------------------------------------------------- /src/include/s2_functions_io.hpp: -------------------------------------------------------------------------------- 1 | 2 | #pragma once 3 | 4 | #include "duckdb/common/types.hpp" 5 | #include "duckdb/main/database.hpp" 6 | 7 | #include "s2geography/geoarrow.h" 8 | 9 | namespace duckdb { 10 | namespace duckdb_s2 { 11 | void ImportWKBToGeography(Vector& source, Vector& result, idx_t count, 12 | const s2geography::geoarrow::ImportOptions& options = 13 | s2geography::geoarrow::ImportOptions()); 14 | 15 | void ExportGeographyToWKB(Vector& source, Vector& result, idx_t count, 16 | const s2geography::geoarrow::ExportOptions& options = 17 | s2geography::geoarrow::ExportOptions()); 18 | 19 | void RegisterS2GeographyFunctionsIO(DatabaseInstance& instance); 20 | } // namespace duckdb_s2 21 | } // namespace duckdb 22 | -------------------------------------------------------------------------------- /src/include/s2_geography_ops.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "duckdb/main/database.hpp" 4 | 5 | #include "s2_functions_io.hpp" 6 | 7 | namespace duckdb { 8 | 9 | namespace duckdb_s2 { 10 | 11 | void RegisterS2GeographyPredicates(DatabaseInstance& instance); 12 | void RegisterS2GeographyAccessors(DatabaseInstance& instance); 13 | void RegisterS2GeographyBounds(DatabaseInstance& instance); 14 | void RegisterGeoArrowExtensions(DatabaseInstance& instance); 15 | 16 | inline void RegisterS2GeographyOps(DatabaseInstance& instance) { 17 | RegisterS2GeographyFunctionsIO(instance); 18 | RegisterS2GeographyPredicates(instance); 19 | RegisterS2GeographyAccessors(instance); 20 | RegisterS2GeographyBounds(instance); 21 | RegisterGeoArrowExtensions(instance); 22 | } 23 | 24 | } // namespace duckdb_s2 25 | } // namespace duckdb 26 | -------------------------------------------------------------------------------- /src/include/s2_geography_serde.hpp: -------------------------------------------------------------------------------- 1 | 2 | #include "duckdb.hpp" 3 | 4 | #include "s2geography/geography.h" 5 | 6 | namespace duckdb { 7 | 8 | namespace duckdb_s2 { 9 | 10 | class GeographyDecoder { 11 | public: 12 | s2geography::EncodeTag tag{}; 13 | std::vector covering{}; 14 | 15 | GeographyDecoder() = default; 16 | 17 | void DecodeTag(string_t data) { 18 | decoder_.reset(data.GetPrefix(), 4); 19 | tag.Decode(&decoder_); 20 | } 21 | 22 | void DecodeTagAndCovering(string_t data) { 23 | decoder_.reset(data.GetData(), data.GetSize()); 24 | covering.clear(); 25 | tag.Decode(&decoder_); 26 | tag.DecodeCovering(&decoder_, &covering); 27 | } 28 | 29 | std::unique_ptr Decode(string_t data) { 30 | decoder_.reset(data.GetData(), data.GetSize()); 31 | return s2geography::Geography::DecodeTagged(&decoder_); 32 | } 33 | 34 | private: 35 | Decoder decoder_{}; 36 | }; 37 | 38 | class GeographyEncoder { 39 | public: 40 | GeographyEncoder() { 41 | options_.set_coding_hint(s2coding::CodingHint::COMPACT); 42 | options_.set_enable_lazy_decode(true); 43 | options_.set_include_covering(true); 44 | } 45 | 46 | string_t Encode(const s2geography::Geography& geog) { 47 | encoder_.Resize(0); 48 | geog.EncodeTagged(&encoder_, options_); 49 | return string_t{encoder_.base(), static_cast(encoder_.length())}; 50 | } 51 | 52 | private: 53 | Encoder encoder_{}; 54 | s2geography::EncodeOptions options_{}; 55 | }; 56 | 57 | } // namespace duckdb_s2 58 | 59 | } // namespace duckdb 60 | -------------------------------------------------------------------------------- /src/include/s2_types.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "duckdb/common/types.hpp" 4 | #include "duckdb/main/database.hpp" 5 | 6 | namespace duckdb { 7 | 8 | namespace duckdb_s2 { 9 | 10 | struct Types { 11 | static LogicalType S2_CELL(); 12 | static LogicalType S2_CELL_UNION(); 13 | static LogicalType S2_CELL_CENTER(); 14 | static LogicalType GEOGRAPHY(); 15 | static LogicalType S2_BOX(); 16 | }; 17 | 18 | void RegisterTypes(DatabaseInstance& instance); 19 | 20 | } // namespace duckdb_s2 21 | } // namespace duckdb 22 | -------------------------------------------------------------------------------- /src/s2_accessors.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "duckdb/main/database.hpp" 3 | #include "duckdb/main/extension_util.hpp" 4 | 5 | #include "function_builder.hpp" 6 | 7 | #include "s2/s2earth.h" 8 | #include "s2geography/accessors.h" 9 | 10 | #include "s2/s2cell_union.h" 11 | #include "s2_geography_serde.hpp" 12 | #include "s2_types.hpp" 13 | 14 | namespace duckdb { 15 | 16 | namespace duckdb_s2 { 17 | 18 | namespace { 19 | 20 | struct S2IsEmpty { 21 | static void Register(DatabaseInstance& instance) { 22 | FunctionBuilder::RegisterScalar( 23 | instance, "s2_isempty", [](ScalarFunctionBuilder& func) { 24 | func.AddVariant([](ScalarFunctionVariantBuilder& variant) { 25 | variant.AddParameter("geog", Types::GEOGRAPHY()); 26 | variant.SetReturnType(LogicalType::BOOLEAN); 27 | variant.SetFunction(ExecuteFn); 28 | }); 29 | 30 | func.SetDescription("Returns true if the geography is empty."); 31 | func.SetExample("SELECT s2_isempty('POINT(0 0)') AS is_empty;"); 32 | 33 | func.SetTag("ext", "geography"); 34 | func.SetTag("category", "accessors"); 35 | }); 36 | } 37 | 38 | static inline void ExecuteFn(DataChunk& args, ExpressionState& state, Vector& result) { 39 | Execute(args.data[0], result, args.size()); 40 | } 41 | 42 | static void Execute(Vector& source, Vector& result, idx_t count) { 43 | GeographyDecoder decoder; 44 | 45 | UnaryExecutor::Execute(source, result, count, [&](string_t geog_str) { 46 | decoder.DecodeTag(geog_str); 47 | return decoder.tag.flags & s2geography::EncodeTag::kFlagEmpty; 48 | }); 49 | } 50 | }; 51 | 52 | struct S2IsValid { 53 | static void Register(DatabaseInstance& instance) { 54 | FunctionBuilder::RegisterScalar( 55 | instance, "s2_is_valid", [](ScalarFunctionBuilder& func) { 56 | func.AddVariant([](ScalarFunctionVariantBuilder& variant) { 57 | variant.AddParameter("geog", Types::GEOGRAPHY()); 58 | variant.SetReturnType(LogicalType::BOOLEAN); 59 | variant.SetFunction(ExecuteFn); 60 | }); 61 | 62 | func.SetDescription(R"( 63 | Returns true if the geography is valid. 64 | 65 | The most common reasons for invalid geographies are repeated points, 66 | an inadequate number of points, and/or crossing edges. 67 | )"); 68 | func.SetExample(R"( 69 | SELECT s2_is_valid(s2_geogfromtext_novalidate('LINESTRING (0 0, 1 1)')) AS valid; 70 | ---- 71 | SELECT s2_is_valid(s2_geogfromtext_novalidate('LINESTRING (0 0, 0 0, 1 1)')) AS valid; 72 | )"); 73 | 74 | func.SetTag("ext", "geography"); 75 | func.SetTag("category", "accessors"); 76 | }); 77 | } 78 | 79 | static inline void ExecuteFn(DataChunk& args, ExpressionState& state, Vector& result) { 80 | Execute(args.data[0], result, args.size()); 81 | } 82 | 83 | static void Execute(Vector& source, Vector& result, idx_t count) { 84 | GeographyDecoder decoder; 85 | S2Error error; 86 | 87 | UnaryExecutor::Execute(source, result, count, [&](string_t geog_str) { 88 | decoder.DecodeTag(geog_str); 89 | if (decoder.tag.flags & s2geography::EncodeTag::kFlagEmpty) { 90 | return true; 91 | } else if (decoder.tag.kind == s2geography::GeographyKind::CELL_CENTER) { 92 | return true; 93 | } 94 | 95 | auto geog = decoder.Decode(geog_str); 96 | return !s2geography::s2_find_validation_error(*geog, &error); 97 | }); 98 | } 99 | }; 100 | 101 | struct S2IsValidReason { 102 | static void Register(DatabaseInstance& instance) { 103 | FunctionBuilder::RegisterScalar( 104 | instance, "s2_is_valid_reason", [](ScalarFunctionBuilder& func) { 105 | func.AddVariant([](ScalarFunctionVariantBuilder& variant) { 106 | variant.AddParameter("geog", Types::GEOGRAPHY()); 107 | variant.SetReturnType(LogicalType::VARCHAR); 108 | variant.SetFunction(ExecuteFn); 109 | }); 110 | 111 | func.SetDescription(R"( 112 | Returns the error string for invalid geographies or the empty string ("") otherwise. 113 | )"); 114 | func.SetExample(R"( 115 | SELECT s2_is_valid_reason(s2_geogfromtext_novalidate('LINESTRING (0 0, 1 1)')) AS valid; 116 | ---- 117 | SELECT s2_is_valid_reason(s2_geogfromtext_novalidate('LINESTRING (0 0, 0 0, 1 1)')) AS valid; 118 | )"); 119 | 120 | func.SetTag("ext", "geography"); 121 | func.SetTag("category", "accessors"); 122 | }); 123 | } 124 | 125 | static inline void ExecuteFn(DataChunk& args, ExpressionState& state, Vector& result) { 126 | Execute(args.data[0], result, args.size()); 127 | } 128 | 129 | static void Execute(Vector& source, Vector& result, idx_t count) { 130 | GeographyDecoder decoder; 131 | S2Error error; 132 | 133 | UnaryExecutor::Execute( 134 | source, result, count, [&](string_t geog_str) { 135 | decoder.DecodeTag(geog_str); 136 | if (decoder.tag.flags & s2geography::EncodeTag::kFlagEmpty) { 137 | return string_t{""}; 138 | } else if (decoder.tag.kind == s2geography::GeographyKind::CELL_CENTER) { 139 | return string_t{""}; 140 | } 141 | 142 | auto geog = decoder.Decode(geog_str); 143 | error.Clear(); 144 | if (!s2geography::s2_find_validation_error(*geog, &error)) { 145 | return string_t{""}; 146 | } else { 147 | return StringVector::AddString(result, error.text()); 148 | } 149 | }); 150 | } 151 | }; 152 | 153 | struct S2Area { 154 | static void Register(DatabaseInstance& instance) { 155 | FunctionBuilder::RegisterScalar(instance, "s2_area", [](ScalarFunctionBuilder& func) { 156 | func.AddVariant([](ScalarFunctionVariantBuilder& variant) { 157 | variant.AddParameter("geog", Types::GEOGRAPHY()); 158 | variant.SetReturnType(LogicalType::DOUBLE); 159 | variant.SetFunction(ExecuteFn); 160 | }); 161 | 162 | func.SetDescription(R"( 163 | Calculate the area of the geography in square meters. 164 | 165 | The returned area is in square meters as approximated as the area of the polygon 166 | on a perfect sphere. 167 | 168 | For non-polygon geographies, `s2_area()` returns `0.0`. 169 | )"); 170 | func.SetExample(R"( 171 | SELECT s2_area(s2_data_country('Fiji')) AS area; 172 | ---- 173 | SELECT s2_area('POINT (0 0)'::GEOGRAPHY) AS area; 174 | )"); 175 | 176 | func.SetTag("ext", "geography"); 177 | func.SetTag("category", "accessors"); 178 | }); 179 | } 180 | 181 | static inline void ExecuteFn(DataChunk& args, ExpressionState& state, Vector& result) { 182 | Execute(args.data[0], result, args.size()); 183 | } 184 | 185 | static void Execute(Vector& source, Vector& result, idx_t count) { 186 | GeographyDecoder decoder; 187 | 188 | UnaryExecutor::Execute( 189 | source, result, count, [&](string_t geog_str) { 190 | decoder.DecodeTag(geog_str); 191 | 192 | if (decoder.tag.flags & s2geography::EncodeTag::kFlagEmpty) { 193 | return 0.0; 194 | } 195 | 196 | switch (decoder.tag.kind) { 197 | case s2geography::GeographyKind::CELL_CENTER: 198 | case s2geography::GeographyKind::POINT: 199 | case s2geography::GeographyKind::POLYLINE: 200 | return 0.0; 201 | default: { 202 | auto geog = decoder.Decode(geog_str); 203 | return s2geography::s2_area(*geog) * S2Earth::RadiusMeters() * 204 | S2Earth::RadiusMeters(); 205 | } 206 | } 207 | }); 208 | } 209 | }; 210 | 211 | struct S2Perimieter { 212 | static void Register(DatabaseInstance& instance) { 213 | FunctionBuilder::RegisterScalar( 214 | instance, "s2_perimeter", [](ScalarFunctionBuilder& func) { 215 | func.AddVariant([](ScalarFunctionVariantBuilder& variant) { 216 | variant.AddParameter("geog", Types::GEOGRAPHY()); 217 | variant.SetReturnType(LogicalType::DOUBLE); 218 | variant.SetFunction(ExecuteFn); 219 | }); 220 | 221 | func.SetDescription(R"( 222 | Calculate the perimeter of the geography in meters. 223 | 224 | The returned length is in meters as approximated as the perimeter of the polygon 225 | on a perfect sphere. 226 | 227 | For non-polygon geographies, `s2_perimeter()` returns `0.0`. For a polygon with 228 | more than one ring, this function returns the sum of the perimeter of all 229 | rings. 230 | )"); 231 | func.SetExample(R"( 232 | SELECT s2_perimeter(s2_data_country('Fiji')) AS perimeter; 233 | ---- 234 | SELECT s2_perimeter('POINT (0 0)'::GEOGRAPHY) AS perimeter; 235 | )"); 236 | 237 | func.SetTag("ext", "geography"); 238 | func.SetTag("category", "accessors"); 239 | }); 240 | } 241 | 242 | static inline void ExecuteFn(DataChunk& args, ExpressionState& state, Vector& result) { 243 | Execute(args.data[0], result, args.size()); 244 | } 245 | 246 | static void Execute(Vector& source, Vector& result, idx_t count) { 247 | GeographyDecoder decoder; 248 | 249 | UnaryExecutor::Execute( 250 | source, result, count, [&](string_t geog_str) { 251 | decoder.DecodeTag(geog_str); 252 | if (decoder.tag.flags & s2geography::EncodeTag::kFlagEmpty) { 253 | return 0.0; 254 | } 255 | 256 | switch (decoder.tag.kind) { 257 | case s2geography::GeographyKind::CELL_CENTER: 258 | case s2geography::GeographyKind::POINT: 259 | case s2geography::GeographyKind::POLYLINE: 260 | return 0.0; 261 | default: { 262 | auto geog = decoder.Decode(geog_str); 263 | return s2geography::s2_perimeter(*geog) * S2Earth::RadiusMeters(); 264 | } 265 | } 266 | }); 267 | } 268 | }; 269 | 270 | struct S2Length { 271 | static void Register(DatabaseInstance& instance) { 272 | FunctionBuilder::RegisterScalar( 273 | instance, "s2_length", [](ScalarFunctionBuilder& func) { 274 | func.AddVariant([](ScalarFunctionVariantBuilder& variant) { 275 | variant.AddParameter("geog", Types::GEOGRAPHY()); 276 | variant.SetReturnType(LogicalType::DOUBLE); 277 | variant.SetFunction(ExecuteFn); 278 | }); 279 | 280 | func.SetDescription(R"( 281 | Calculate the length of the geography in meters. 282 | 283 | For non-linestring or multilinestring geographies, `s2_length()` returns `0.0`. 284 | )"); 285 | func.SetExample(R"( 286 | SELECT s2_length('POINT (0 0)'::GEOGRAPHY) AS length; 287 | ---- 288 | SELECT s2_length('LINESTRING (0 0, -64 45)'::GEOGRAPHY) AS length; 289 | ---- 290 | SELECT s2_length(s2_data_country('Canada')) AS length; 291 | )"); 292 | 293 | func.SetTag("ext", "geography"); 294 | func.SetTag("category", "accessors"); 295 | }); 296 | } 297 | 298 | static inline void ExecuteFn(DataChunk& args, ExpressionState& state, Vector& result) { 299 | Execute(args.data[0], result, args.size()); 300 | } 301 | 302 | static void Execute(Vector& source, Vector& result, idx_t count) { 303 | GeographyDecoder decoder; 304 | 305 | UnaryExecutor::Execute( 306 | source, result, count, [&](string_t geog_str) { 307 | decoder.DecodeTag(geog_str); 308 | 309 | if (decoder.tag.flags & s2geography::EncodeTag::kFlagEmpty) { 310 | return 0.0; 311 | } 312 | 313 | switch (decoder.tag.kind) { 314 | case s2geography::GeographyKind::CELL_CENTER: 315 | case s2geography::GeographyKind::POINT: 316 | case s2geography::GeographyKind::POLYGON: 317 | return 0.0; 318 | default: { 319 | auto geog = decoder.Decode(geog_str); 320 | return s2geography::s2_length(*geog) * S2Earth::RadiusMeters(); 321 | } 322 | } 323 | }); 324 | } 325 | }; 326 | 327 | struct S2XY { 328 | static void Register(DatabaseInstance& instance) { 329 | FunctionBuilder::RegisterScalar(instance, "s2_x", [](ScalarFunctionBuilder& func) { 330 | func.AddVariant([](ScalarFunctionVariantBuilder& variant) { 331 | variant.AddParameter("geog", Types::GEOGRAPHY()); 332 | variant.SetReturnType(LogicalType::DOUBLE); 333 | variant.SetFunction(ExecuteFnX); 334 | }); 335 | 336 | func.SetDescription(R"( 337 | Extract the longitude of a point geography. 338 | 339 | For geographies that are not a single point, `NaN` is returned. 340 | )"); 341 | 342 | func.SetExample(R"( 343 | SELECT s2_x('POINT (-64 45)'::GEOGRAPHY); 344 | )"); 345 | 346 | func.SetTag("ext", "geography"); 347 | func.SetTag("category", "accessors"); 348 | }); 349 | 350 | FunctionBuilder::RegisterScalar(instance, "s2_y", [](ScalarFunctionBuilder& func) { 351 | func.AddVariant([](ScalarFunctionVariantBuilder& variant) { 352 | variant.AddParameter("geog", Types::GEOGRAPHY()); 353 | variant.SetReturnType(LogicalType::DOUBLE); 354 | variant.SetFunction(ExecuteFnY); 355 | }); 356 | 357 | func.SetDescription(R"( 358 | Extract the latitude of a point geography. 359 | 360 | For geographies that are not a single point, `NaN` is returned. 361 | )"); 362 | 363 | func.SetExample(R"( 364 | SELECT s2_y('POINT (-64 45)'::GEOGRAPHY); 365 | )"); 366 | 367 | func.SetTag("ext", "geography"); 368 | func.SetTag("category", "accessors"); 369 | }); 370 | } 371 | 372 | static inline void ExecuteFnX(DataChunk& args, ExpressionState& state, Vector& result) { 373 | Execute( 374 | args.data[0], result, args.size(), [](S2LatLng ll) { return ll.lng().degrees(); }, 375 | [](const s2geography::Geography& geog) { return s2_x(geog); }); 376 | } 377 | 378 | static inline void ExecuteFnY(DataChunk& args, ExpressionState& state, Vector& result) { 379 | Execute( 380 | args.data[0], result, args.size(), [](S2LatLng ll) { return ll.lat().degrees(); }, 381 | [](const s2geography::Geography& geog) { return s2_y(geog); }); 382 | } 383 | 384 | template 385 | static void Execute(Vector& source, Vector& result, idx_t count, 386 | HandleLatLng&& handle_latlng, HandleGeog&& handle_geog) { 387 | GeographyDecoder decoder; 388 | 389 | UnaryExecutor::Execute( 390 | source, result, count, [&](string_t geog_str) { 391 | decoder.DecodeTag(geog_str); 392 | 393 | if (decoder.tag.flags & s2geography::EncodeTag::kFlagEmpty) { 394 | return static_cast(NAN); 395 | } 396 | 397 | switch (decoder.tag.kind) { 398 | case s2geography::GeographyKind::CELL_CENTER: { 399 | decoder.DecodeTagAndCovering(geog_str); 400 | S2Point center = decoder.covering[0].ToPoint(); 401 | return handle_latlng(S2LatLng(center)); 402 | } 403 | 404 | default: { 405 | auto geog = decoder.Decode(geog_str); 406 | return handle_geog(*geog); 407 | } 408 | } 409 | }); 410 | } 411 | }; 412 | 413 | struct S2Dimension { 414 | static void Register(DatabaseInstance& instance) { 415 | FunctionBuilder::RegisterScalar( 416 | instance, "s2_dimension", [](ScalarFunctionBuilder& func) { 417 | func.AddVariant([](ScalarFunctionVariantBuilder& variant) { 418 | variant.AddParameter("geog", Types::GEOGRAPHY()); 419 | variant.SetReturnType(LogicalType::INTEGER); 420 | variant.SetFunction(ExecuteFn); 421 | }); 422 | 423 | func.SetDescription(R"( 424 | Calculate the highest dimension element present in the geography. 425 | 426 | Points have a dimension of 0; linestrings have a dimension of 1; polygons have 427 | a dimension of 2. For geography collections, this will return the highest dimension 428 | value of any element in the collection (e.g., a collection containing a point and 429 | a polygon will return 2). An empty geography collection returns a value of -1. 430 | )"); 431 | func.SetExample(R"( 432 | SELECT s2_dimension('POINT (0 0)'::GEOGRAPHY); 433 | ---- 434 | SELECT s2_dimension('LINESTRING (0 0, 1 1)'::GEOGRAPHY); 435 | ---- 436 | SELECT s2_dimension(s2_data_country('Canada')); 437 | ---- 438 | SELECT s2_dimension('GEOMETRYCOLLECTION EMPTY'); 439 | ---- 440 | SELECT s2_dimension('GEOMETRYCOLLECTION (POINT (0 1), LINESTRING (0 0, 1 1))'::GEOGRAPHY); 441 | )"); 442 | 443 | func.SetTag("ext", "geography"); 444 | func.SetTag("category", "accessors"); 445 | }); 446 | } 447 | 448 | static inline void ExecuteFn(DataChunk& args, ExpressionState& state, Vector& result) { 449 | Execute(args.data[0], result, args.size()); 450 | } 451 | 452 | static void Execute(Vector& source, Vector& result, idx_t count) { 453 | GeographyDecoder decoder; 454 | 455 | UnaryExecutor::Execute( 456 | source, result, count, [&](string_t geog_str) { 457 | decoder.DecodeTag(geog_str); 458 | 459 | switch (decoder.tag.kind) { 460 | case s2geography::GeographyKind::CELL_CENTER: 461 | case s2geography::GeographyKind::POINT: 462 | return 0; 463 | case s2geography::GeographyKind::POLYLINE: 464 | return 1; 465 | case s2geography::GeographyKind::POLYGON: 466 | return 2; 467 | default: { 468 | auto geog = decoder.Decode(geog_str); 469 | return s2geography::s2_dimension(*geog); 470 | } 471 | } 472 | }); 473 | } 474 | }; 475 | 476 | struct S2NumPoints { 477 | static void Register(DatabaseInstance& instance) { 478 | FunctionBuilder::RegisterScalar( 479 | instance, "s2_num_points", [](ScalarFunctionBuilder& func) { 480 | func.AddVariant([](ScalarFunctionVariantBuilder& variant) { 481 | variant.AddParameter("geog", Types::GEOGRAPHY()); 482 | variant.SetReturnType(LogicalType::INTEGER); 483 | variant.SetFunction(ExecuteFn); 484 | }); 485 | 486 | func.SetDescription(R"( 487 | Extract the number of vertices in the geography. 488 | )"); 489 | func.SetExample(R"( 490 | SELECT s2_num_points(s2_data_country('Fiji')); 491 | ---- 492 | SELECT s2_num_points(s2_data_country('Canada')); 493 | )"); 494 | 495 | func.SetTag("ext", "geography"); 496 | func.SetTag("category", "accessors"); 497 | }); 498 | } 499 | 500 | static inline void ExecuteFn(DataChunk& args, ExpressionState& state, Vector& result) { 501 | Execute(args.data[0], result, args.size()); 502 | } 503 | 504 | static void Execute(Vector& source, Vector& result, idx_t count) { 505 | GeographyDecoder decoder; 506 | 507 | UnaryExecutor::Execute( 508 | source, result, count, [&](string_t geog_str) { 509 | decoder.DecodeTag(geog_str); 510 | 511 | if (decoder.tag.flags & s2geography::EncodeTag::kFlagEmpty) { 512 | return 0; 513 | } 514 | 515 | switch (decoder.tag.kind) { 516 | case s2geography::GeographyKind::CELL_CENTER: 517 | return 1; 518 | default: { 519 | auto geog = decoder.Decode(geog_str); 520 | return s2geography::s2_num_points(*geog); 521 | } 522 | } 523 | }); 524 | } 525 | }; 526 | 527 | } // namespace 528 | 529 | void RegisterS2GeographyAccessors(DatabaseInstance& instance) { 530 | S2IsEmpty::Register(instance); 531 | S2IsValid::Register(instance); 532 | S2IsValidReason::Register(instance); 533 | S2Area::Register(instance); 534 | S2Perimieter::Register(instance); 535 | S2Length::Register(instance); 536 | S2XY::Register(instance); 537 | S2Dimension::Register(instance); 538 | S2NumPoints::Register(instance); 539 | } 540 | 541 | } // namespace duckdb_s2 542 | } // namespace duckdb 543 | -------------------------------------------------------------------------------- /src/s2_binary_index_ops.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "duckdb/main/database.hpp" 3 | #include "duckdb/main/extension_util.hpp" 4 | 5 | #include "s2/s2cell_union.h" 6 | #include "s2/s2closest_edge_query.h" 7 | #include "s2/s2earth.h" 8 | #include "s2/s2furthest_edge_query.h" 9 | #include "s2_geography_serde.hpp" 10 | #include "s2_types.hpp" 11 | 12 | #include "s2geography/build.h" 13 | 14 | #include "function_builder.hpp" 15 | #include "global_options.hpp" 16 | 17 | namespace duckdb { 18 | 19 | namespace duckdb_s2 { 20 | 21 | namespace { 22 | using UniqueGeography = std::unique_ptr; 23 | 24 | // Handle the case where we've already computed the index on one or both 25 | // of the sides in advance 26 | template 27 | static auto DispatchShapeIndexOp(UniqueGeography lhs, UniqueGeography rhs, 28 | ShapeIndexFilter&& filter) { 29 | if (lhs->kind() == s2geography::GeographyKind::ENCODED_SHAPE_INDEX && 30 | rhs->kind() == s2geography::GeographyKind::ENCODED_SHAPE_INDEX) { 31 | auto lhs_index = 32 | reinterpret_cast(lhs.get()); 33 | auto rhs_index = 34 | reinterpret_cast(rhs.get()); 35 | return filter(lhs_index->ShapeIndex(), rhs_index->ShapeIndex()); 36 | } else if (lhs->kind() == s2geography::GeographyKind::ENCODED_SHAPE_INDEX) { 37 | auto lhs_index = 38 | reinterpret_cast(lhs.get()); 39 | s2geography::ShapeIndexGeography rhs_index(*rhs); 40 | return filter(lhs_index->ShapeIndex(), rhs_index.ShapeIndex()); 41 | } else if (rhs->kind() == s2geography::GeographyKind::ENCODED_SHAPE_INDEX) { 42 | s2geography::ShapeIndexGeography lhs_index(*lhs); 43 | auto rhs_index = 44 | reinterpret_cast(rhs.get()); 45 | return filter(lhs_index.ShapeIndex(), rhs_index->ShapeIndex()); 46 | } else { 47 | s2geography::ShapeIndexGeography lhs_index(*lhs); 48 | s2geography::ShapeIndexGeography rhs_index(*rhs); 49 | return filter(lhs_index.ShapeIndex(), rhs_index.ShapeIndex()); 50 | } 51 | } 52 | 53 | struct S2BinaryIndexOp { 54 | static void Register(DatabaseInstance& instance) { 55 | FunctionBuilder::RegisterScalar( 56 | instance, "s2_mayintersect", [](ScalarFunctionBuilder& func) { 57 | func.AddVariant([](ScalarFunctionVariantBuilder& variant) { 58 | variant.AddParameter("geog1", Types::GEOGRAPHY()); 59 | variant.AddParameter("geog2", Types::GEOGRAPHY()); 60 | variant.SetReturnType(LogicalType::BOOLEAN); 61 | variant.SetFunction(ExecuteMayIntersectFn); 62 | }); 63 | 64 | func.SetDescription(R"( 65 | Returns true if the two geographies may intersect. 66 | 67 | This function uses the internal [covering](#s2_covering) stored alongside 68 | each geography to perform a cheap check for potential intersection. 69 | )"); 70 | 71 | func.SetExample(R"( 72 | -- Definitely intersects 73 | SELECT s2_mayintersect(s2_data_country('Canada'), s2_data_city('Toronto')); 74 | ---- 75 | -- Doesn't intersect but might according to the internal coverings 76 | SELECT s2_mayintersect(s2_data_country('Canada'), s2_data_city('Chicago')); 77 | ---- 78 | -- Definitely doesn't intersect 79 | SELECT s2_mayintersect(s2_data_country('Canada'), s2_data_city('Berlin')); 80 | )"); 81 | 82 | func.SetTag("ext", "geography"); 83 | func.SetTag("category", "predicates"); 84 | }); 85 | 86 | FunctionBuilder::RegisterScalar( 87 | instance, "s2_intersects", [](ScalarFunctionBuilder& func) { 88 | func.AddVariant([](ScalarFunctionVariantBuilder& variant) { 89 | variant.AddParameter("geog1", Types::GEOGRAPHY()); 90 | variant.AddParameter("geog2", Types::GEOGRAPHY()); 91 | variant.SetReturnType(LogicalType::BOOLEAN); 92 | variant.SetFunction(ExecuteIntersectsFn); 93 | }); 94 | 95 | func.SetDescription(R"( 96 | Returns true if the two geographies intersect. 97 | )"); 98 | 99 | func.SetExample(R"( 100 | SELECT s2_intersects(s2_data_country('Canada'), s2_data_city('Toronto')); 101 | ---- 102 | SELECT s2_intersects(s2_data_country('Canada'), s2_data_city('Chicago')); 103 | )"); 104 | 105 | func.SetTag("ext", "geography"); 106 | func.SetTag("category", "predicates"); 107 | }); 108 | 109 | FunctionBuilder::RegisterScalar( 110 | instance, "s2_contains", [](ScalarFunctionBuilder& func) { 111 | func.AddVariant([](ScalarFunctionVariantBuilder& variant) { 112 | variant.AddParameter("geog1", Types::GEOGRAPHY()); 113 | variant.AddParameter("geog2", Types::GEOGRAPHY()); 114 | variant.SetReturnType(LogicalType::BOOLEAN); 115 | variant.SetFunction(ExecuteContainsFn); 116 | }); 117 | 118 | func.SetDescription(R"( 119 | Returns true if the first geography contains the second. 120 | )"); 121 | 122 | func.SetExample(R"( 123 | SELECT s2_contains(s2_data_country('Canada'), s2_data_city('Toronto')); 124 | ---- 125 | SELECT s2_contains(s2_data_city('Toronto'), s2_data_country('Canada')); 126 | ---- 127 | SELECT s2_contains(s2_data_country('Canada'), s2_data_city('Chicago')); 128 | )"); 129 | 130 | func.SetTag("ext", "geography"); 131 | func.SetTag("category", "predicates"); 132 | }); 133 | 134 | FunctionBuilder::RegisterScalar( 135 | instance, "s2_equals", [](ScalarFunctionBuilder& func) { 136 | func.AddVariant([](ScalarFunctionVariantBuilder& variant) { 137 | variant.AddParameter("geog1", Types::GEOGRAPHY()); 138 | variant.AddParameter("geog2", Types::GEOGRAPHY()); 139 | variant.SetReturnType(LogicalType::BOOLEAN); 140 | variant.SetFunction(ExecuteEqualsFn); 141 | }); 142 | 143 | func.SetDescription(R"( 144 | Returns true if the two geographies are equal. 145 | 146 | Note that this test of equality will pass for *geometrically* equal geographies 147 | that may have the same edges but that are ordered differently. 148 | )"); 149 | func.SetExample(R"( 150 | SELECT s2_equals(s2_data_country('Canada'), s2_data_country('Canada')); 151 | ---- 152 | SELECT s2_equals(s2_data_city('Toronto'), s2_data_country('Canada')); 153 | )"); 154 | 155 | func.SetTag("ext", "geography"); 156 | func.SetTag("category", "predicates"); 157 | }); 158 | 159 | FunctionBuilder::RegisterScalar( 160 | instance, "s2_intersection", [](ScalarFunctionBuilder& func) { 161 | func.AddVariant([](ScalarFunctionVariantBuilder& variant) { 162 | variant.AddParameter("geog1", Types::GEOGRAPHY()); 163 | variant.AddParameter("geog2", Types::GEOGRAPHY()); 164 | variant.SetReturnType(Types::GEOGRAPHY()); 165 | variant.SetFunction(ExecuteIntersectionFn); 166 | }); 167 | 168 | func.SetDescription(R"( 169 | Returns the intersection of two geographies. 170 | )"); 171 | 172 | func.SetExample(R"( 173 | SELECT s2_intersection( 174 | 'POLYGON ((0 0, 10 0, 10 10, 0 10, 0 0))', 175 | 'POLYGON ((5 5, 15 5, 15 15, 5 15, 5 5))' 176 | ) as intersection 177 | )"); 178 | 179 | func.SetTag("ext", "geography"); 180 | func.SetTag("category", "overlay"); 181 | }); 182 | 183 | FunctionBuilder::RegisterScalar( 184 | instance, "s2_difference", [](ScalarFunctionBuilder& func) { 185 | func.AddVariant([](ScalarFunctionVariantBuilder& variant) { 186 | variant.AddParameter("geog1", Types::GEOGRAPHY()); 187 | variant.AddParameter("geog2", Types::GEOGRAPHY()); 188 | variant.SetReturnType(Types::GEOGRAPHY()); 189 | variant.SetFunction(ExecuteDifferenceFn); 190 | }); 191 | 192 | func.SetDescription(R"( 193 | Returns the difference of two geographies. 194 | )"); 195 | 196 | func.SetExample(R"( 197 | SELECT s2_difference( 198 | 'POLYGON ((0 0, 10 0, 10 10, 0 10, 0 0))', 199 | 'POLYGON ((5 5, 15 5, 15 15, 5 15, 5 5))' 200 | ) as difference 201 | )"); 202 | 203 | func.SetTag("ext", "geography"); 204 | func.SetTag("category", "overlay"); 205 | }); 206 | 207 | FunctionBuilder::RegisterScalar( 208 | instance, "s2_union", [](ScalarFunctionBuilder& func) { 209 | func.AddVariant([](ScalarFunctionVariantBuilder& variant) { 210 | variant.AddParameter("geog1", Types::GEOGRAPHY()); 211 | variant.AddParameter("geog2", Types::GEOGRAPHY()); 212 | variant.SetReturnType(Types::GEOGRAPHY()); 213 | variant.SetFunction(ExecuteUnionFn); 214 | }); 215 | 216 | func.SetDescription(R"( 217 | Returns the union of two geographies. 218 | )"); 219 | 220 | func.SetExample(R"( 221 | SELECT s2_union( 222 | 'POLYGON ((0 0, 10 0, 10 10, 0 10, 0 0))', 223 | 'POLYGON ((5 5, 15 5, 15 15, 5 15, 5 5))' 224 | ) as union_ 225 | )"); 226 | 227 | func.SetTag("ext", "geography"); 228 | func.SetTag("category", "overlay"); 229 | }); 230 | } 231 | 232 | static void ExecuteMayIntersectFn(DataChunk& args, ExpressionState& state, 233 | Vector& result) { 234 | return ExecutePredicateFn( 235 | args, state, result, 236 | [](UniqueGeography lhs, UniqueGeography rhs) { return true; }); 237 | } 238 | 239 | static void ExecuteIntersectsFn(DataChunk& args, ExpressionState& state, 240 | Vector& result) { 241 | S2BooleanOperation::Options options; 242 | InitBooleanOperationOptions(&options); 243 | 244 | return ExecutePredicateFn( 245 | args, state, result, [&options](UniqueGeography lhs, UniqueGeography rhs) { 246 | return DispatchShapeIndexOp( 247 | std::move(lhs), std::move(rhs), 248 | [&options](const S2ShapeIndex& lhs_index, const S2ShapeIndex& rhs_index) { 249 | return S2BooleanOperation::Intersects(lhs_index, rhs_index, options); 250 | }); 251 | }); 252 | } 253 | 254 | static void ExecuteContainsFn(DataChunk& args, ExpressionState& state, Vector& result) { 255 | // Note: Polygon containment when there is a partial shared edge might 256 | // need to be calculated differently. 257 | S2BooleanOperation::Options options; 258 | InitBooleanOperationOptions(&options); 259 | 260 | return ExecutePredicateFn( 261 | args, state, result, [&options](UniqueGeography lhs, UniqueGeography rhs) { 262 | return DispatchShapeIndexOp( 263 | std::move(lhs), std::move(rhs), 264 | [&options](const S2ShapeIndex& lhs_index, const S2ShapeIndex& rhs_index) { 265 | return S2BooleanOperation::Contains(lhs_index, rhs_index, options); 266 | }); 267 | }); 268 | } 269 | 270 | static void ExecuteEqualsFn(DataChunk& args, ExpressionState& state, Vector& result) { 271 | S2BooleanOperation::Options options; 272 | InitBooleanOperationOptions(&options); 273 | 274 | return ExecutePredicateFn( 275 | args, state, result, [&options](UniqueGeography lhs, UniqueGeography rhs) { 276 | return DispatchShapeIndexOp( 277 | std::move(lhs), std::move(rhs), 278 | [&options](const S2ShapeIndex& lhs_index, const S2ShapeIndex& rhs_index) { 279 | return S2BooleanOperation::Equals(lhs_index, rhs_index, options); 280 | }); 281 | }); 282 | } 283 | 284 | template 285 | static void ExecutePredicateFn(DataChunk& args, ExpressionState& state, Vector& result, 286 | Filter&& filter) { 287 | ExecutePredicate(args.data[0], args.data[1], result, args.size(), filter); 288 | } 289 | 290 | template 291 | static void ExecutePredicate(Vector& lhs, Vector& rhs, Vector& result, idx_t count, 292 | Filter&& filter) { 293 | GeographyDecoder lhs_decoder; 294 | GeographyDecoder rhs_decoder; 295 | std::vector intersection; 296 | 297 | BinaryExecutor::Execute( 298 | lhs, rhs, result, count, [&](string_t lhs_str, string_t rhs_str) { 299 | lhs_decoder.DecodeTagAndCovering(lhs_str); 300 | if (lhs_decoder.tag.flags & s2geography::EncodeTag::kFlagEmpty) { 301 | return false; 302 | } 303 | 304 | rhs_decoder.DecodeTagAndCovering(rhs_str); 305 | if (rhs_decoder.tag.flags & s2geography::EncodeTag::kFlagEmpty) { 306 | return false; 307 | } 308 | 309 | if (!CoveringMayIntersect(lhs_decoder, rhs_decoder, &intersection)) { 310 | return false; 311 | } 312 | 313 | return filter(lhs_decoder.Decode(lhs_str), rhs_decoder.Decode(rhs_str)); 314 | }); 315 | } 316 | 317 | static void ExecuteIntersectionFn(DataChunk& args, ExpressionState& state, 318 | Vector& result) { 319 | ExecuteIntersection(args.data[0], args.data[1], result, args.size()); 320 | } 321 | 322 | static void ExecuteDifferenceFn(DataChunk& args, ExpressionState& state, 323 | Vector& result) { 324 | ExecuteDifference(args.data[0], args.data[1], result, args.size()); 325 | } 326 | 327 | static void ExecuteUnionFn(DataChunk& args, ExpressionState& state, Vector& result) { 328 | ExecuteUnion(args.data[0], args.data[1], result, args.size()); 329 | } 330 | 331 | static void ExecuteIntersection(Vector& lhs, Vector& rhs, Vector& result, idx_t count) { 332 | GeographyDecoder lhs_decoder; 333 | GeographyDecoder rhs_decoder; 334 | GeographyEncoder encoder; 335 | std::vector intersection; 336 | 337 | s2geography::GlobalOptions options; 338 | InitGlobalOptions(&options); 339 | 340 | BinaryExecutor::Execute( 341 | lhs, rhs, result, count, [&](string_t lhs_str, string_t rhs_str) { 342 | lhs_decoder.DecodeTagAndCovering(lhs_str); 343 | 344 | // If the lefthand side is empty, the intersection is the righthand side 345 | if (lhs_decoder.tag.flags & s2geography::EncodeTag::kFlagEmpty) { 346 | return StringVector::AddStringOrBlob(result, rhs_str); 347 | } 348 | 349 | // If the righthand side is empty, the intersection is the lefthand side 350 | rhs_decoder.DecodeTagAndCovering(rhs_str); 351 | if (rhs_decoder.tag.flags & s2geography::EncodeTag::kFlagEmpty) { 352 | return StringVector::AddStringOrBlob(result, lhs_str); 353 | } 354 | 355 | // For definitely disjoint input, the intersection is empty 356 | if (!CoveringMayIntersect(lhs_decoder, rhs_decoder, &intersection)) { 357 | auto geog = make_uniq(); 358 | return StringVector::AddStringOrBlob(result, encoder.Encode(*geog)); 359 | } 360 | 361 | auto geog = DispatchShapeIndexOp( 362 | lhs_decoder.Decode(lhs_str), rhs_decoder.Decode(rhs_str), 363 | [&options](const S2ShapeIndex& lhs_index, const S2ShapeIndex& rhs_index) { 364 | return s2geography::s2_boolean_operation( 365 | lhs_index, rhs_index, S2BooleanOperation::OpType::INTERSECTION, 366 | options); 367 | }); 368 | 369 | return StringVector::AddStringOrBlob(result, encoder.Encode(*geog)); 370 | }); 371 | } 372 | 373 | static void ExecuteDifference(Vector& lhs, Vector& rhs, Vector& result, idx_t count) { 374 | GeographyDecoder lhs_decoder; 375 | GeographyDecoder rhs_decoder; 376 | GeographyEncoder encoder; 377 | std::vector intersection; 378 | 379 | s2geography::GlobalOptions options; 380 | InitGlobalOptions(&options); 381 | 382 | BinaryExecutor::Execute( 383 | lhs, rhs, result, count, [&](string_t lhs_str, string_t rhs_str) { 384 | lhs_decoder.DecodeTagAndCovering(lhs_str); 385 | 386 | // If the lefthand side is empty, the difference is also empty 387 | if (lhs_decoder.tag.flags & s2geography::EncodeTag::kFlagEmpty) { 388 | auto geog = make_uniq(); 389 | return StringVector::AddStringOrBlob(result, encoder.Encode(*geog)); 390 | } 391 | 392 | // If the righthand side is empty, the difference is the lefthand side 393 | rhs_decoder.DecodeTagAndCovering(rhs_str); 394 | if (rhs_decoder.tag.flags & s2geography::EncodeTag::kFlagEmpty) { 395 | return StringVector::AddStringOrBlob(result, lhs_str); 396 | } 397 | 398 | // For definitely disjoint input, the intersection is the lefthand side 399 | if (!CoveringMayIntersect(lhs_decoder, rhs_decoder, &intersection)) { 400 | auto geog = make_uniq(); 401 | return StringVector::AddStringOrBlob(result, lhs_str); 402 | } 403 | 404 | auto geog = DispatchShapeIndexOp( 405 | lhs_decoder.Decode(lhs_str), rhs_decoder.Decode(rhs_str), 406 | [&options](const S2ShapeIndex& lhs_index, const S2ShapeIndex& rhs_index) { 407 | return s2geography::s2_boolean_operation( 408 | lhs_index, rhs_index, S2BooleanOperation::OpType::DIFFERENCE, 409 | options); 410 | }); 411 | 412 | return StringVector::AddStringOrBlob(result, encoder.Encode(*geog)); 413 | }); 414 | } 415 | 416 | static void ExecuteUnion(Vector& lhs, Vector& rhs, Vector& result, idx_t count) { 417 | GeographyDecoder lhs_decoder; 418 | GeographyDecoder rhs_decoder; 419 | GeographyEncoder encoder; 420 | std::vector intersection; 421 | 422 | s2geography::GlobalOptions options; 423 | InitGlobalOptions(&options); 424 | 425 | BinaryExecutor::Execute( 426 | lhs, rhs, result, count, [&](string_t lhs_str, string_t rhs_str) { 427 | lhs_decoder.DecodeTagAndCovering(lhs_str); 428 | 429 | // If the lefthand side is empty, the union is the righthand side 430 | if (lhs_decoder.tag.flags & s2geography::EncodeTag::kFlagEmpty) { 431 | return StringVector::AddStringOrBlob(result, rhs_str); 432 | } 433 | 434 | // If the righthand side is empty, the union is the lefthand side 435 | rhs_decoder.DecodeTagAndCovering(rhs_str); 436 | if (rhs_decoder.tag.flags & s2geography::EncodeTag::kFlagEmpty) { 437 | return StringVector::AddStringOrBlob(result, lhs_str); 438 | } 439 | 440 | // (No optimization for definitely disjoint binary union) 441 | 442 | auto geog = DispatchShapeIndexOp( 443 | lhs_decoder.Decode(lhs_str), rhs_decoder.Decode(rhs_str), 444 | [&options](const S2ShapeIndex& lhs_index, const S2ShapeIndex& rhs_index) { 445 | return s2geography::s2_boolean_operation( 446 | lhs_index, rhs_index, S2BooleanOperation::OpType::UNION, options); 447 | }); 448 | 449 | return StringVector::AddStringOrBlob(result, encoder.Encode(*geog)); 450 | }); 451 | } 452 | 453 | static bool CoveringMayIntersect(const GeographyDecoder& lhs, 454 | const GeographyDecoder& rhs, 455 | std::vector* intersection_scratch) { 456 | // We don't currently omit coverings but in case we do by accident, 457 | // an omitted covering *might* intersect since it was just not generated. 458 | if (lhs.covering.empty() || rhs.covering.empty()) { 459 | return true; 460 | } 461 | 462 | S2CellUnion::GetIntersection(lhs.covering, rhs.covering, intersection_scratch); 463 | return !intersection_scratch->empty(); 464 | } 465 | }; 466 | 467 | struct S2DWithin { 468 | static void Register(DatabaseInstance& instance) { 469 | FunctionBuilder::RegisterScalar( 470 | instance, "s2_dwithin", [](ScalarFunctionBuilder& func) { 471 | func.AddVariant([](ScalarFunctionVariantBuilder& variant) { 472 | variant.AddParameter("geog1", Types::GEOGRAPHY()); 473 | variant.AddParameter("geog2", Types::GEOGRAPHY()); 474 | variant.AddParameter("distance", LogicalType::DOUBLE); 475 | variant.SetReturnType(LogicalType::BOOLEAN); 476 | variant.SetFunction(ExecuteFn); 477 | }); 478 | 479 | func.SetDescription(R"( 480 | Return true if two geographies are within a given distance (in meters). 481 | )"); 482 | func.SetExample(R"( 483 | SELECT s2_dwithin( 484 | s2_data_city('Vancouver'), 485 | s2_data_country('United States of America'), 486 | 30000 487 | ) AS is_within; 488 | ---- 489 | SELECT s2_dwithin( 490 | s2_data_city('Vancouver'), 491 | s2_data_country('United States of America'), 492 | 40000 493 | ) AS is_within; 494 | )"); 495 | 496 | func.SetTag("ext", "geography"); 497 | func.SetTag("category", "accessors"); 498 | }); 499 | } 500 | 501 | static inline void ExecuteFn(DataChunk& args, ExpressionState& state, Vector& result) { 502 | Execute(args.data[0], args.data[1], args.data[2], result, args.size()); 503 | } 504 | 505 | static void Execute(Vector& lhs, Vector& rhs, Vector& dist, Vector& result, 506 | idx_t count) { 507 | GeographyDecoder lhs_decoder; 508 | GeographyDecoder rhs_decoder; 509 | 510 | TernaryExecutor::Execute( 511 | lhs, rhs, dist, result, count, 512 | [&](string_t geog1_str, string_t geog2_str, double distance_meters) { 513 | lhs_decoder.DecodeTag(geog1_str); 514 | rhs_decoder.DecodeTag(geog2_str); 515 | 516 | // If either geography is empty, the result is false 517 | if (lhs_decoder.tag.flags & s2geography::EncodeTag::kFlagEmpty || 518 | rhs_decoder.tag.flags & s2geography::EncodeTag::kFlagEmpty) { 519 | return false; 520 | } 521 | 522 | double distance_radians = distance_meters / S2Earth::RadiusMeters(); 523 | 524 | // If we have two snapped cell centers, just calculate the distance directly 525 | if (lhs_decoder.tag.kind == s2geography::GeographyKind::CELL_CENTER && 526 | rhs_decoder.tag.kind == s2geography::GeographyKind::CELL_CENTER) { 527 | S2CellId cell_id1(LittleEndian::Load64(geog1_str.GetData() + 4)); 528 | S2CellId cell_id2(LittleEndian::Load64(geog2_str.GetData() + 4)); 529 | S1ChordAngle distance(cell_id1.ToPoint(), cell_id2.ToPoint()); 530 | return distance.radians() <= distance_radians; 531 | } 532 | 533 | // Otherwise, decode and use the S2ClosestEdgeQuery 534 | auto geog1 = lhs_decoder.Decode(geog1_str); 535 | auto geog2 = rhs_decoder.Decode(geog2_str); 536 | 537 | return DispatchShapeIndexOp( 538 | std::move(geog1), std::move(geog2), 539 | [&](const S2ShapeIndex& lhs, const S2ShapeIndex& rhs) { 540 | S2ClosestEdgeQuery query(&lhs); 541 | S2ClosestEdgeQuery::ShapeIndexTarget target(&rhs); 542 | return query.IsDistanceLessOrEqual( 543 | &target, S1ChordAngle::Radians(distance_radians)); 544 | }); 545 | }); 546 | } 547 | }; 548 | 549 | struct S2Distance { 550 | static void Register(DatabaseInstance& instance) { 551 | FunctionBuilder::RegisterScalar( 552 | instance, "s2_distance", [](ScalarFunctionBuilder& func) { 553 | func.AddVariant([](ScalarFunctionVariantBuilder& variant) { 554 | variant.AddParameter("geog1", Types::GEOGRAPHY()); 555 | variant.AddParameter("geog2", Types::GEOGRAPHY()); 556 | variant.SetReturnType(LogicalType::DOUBLE); 557 | variant.SetFunction(ExecuteDistanceFn); 558 | }); 559 | 560 | func.SetDescription(R"( 561 | Calculate the shortest distance between two geographies. 562 | )"); 563 | func.SetExample(R"( 564 | SELECT s2_distance( 565 | s2_data_city('Vancouver'), 566 | s2_data_country('United States of America') 567 | ) AS distance; 568 | )"); 569 | 570 | func.SetTag("ext", "geography"); 571 | func.SetTag("category", "accessors"); 572 | }); 573 | 574 | FunctionBuilder::RegisterScalar( 575 | instance, "s2_max_distance", [](ScalarFunctionBuilder& func) { 576 | func.AddVariant([](ScalarFunctionVariantBuilder& variant) { 577 | variant.AddParameter("geog1", Types::GEOGRAPHY()); 578 | variant.AddParameter("geog2", Types::GEOGRAPHY()); 579 | variant.SetReturnType(LogicalType::DOUBLE); 580 | variant.SetFunction(ExecuteMaxDistanceFn); 581 | }); 582 | 583 | func.SetDescription(R"( 584 | Calculate the farthest distance between two geographies. 585 | )"); 586 | func.SetExample(R"( 587 | SELECT s2_max_distance( 588 | s2_data_city('Vancouver'), 589 | s2_data_country('United States of America') 590 | ) AS distance; 591 | )"); 592 | 593 | func.SetTag("ext", "geography"); 594 | func.SetTag("category", "accessors"); 595 | }); 596 | } 597 | 598 | static inline void ExecuteDistanceFn(DataChunk& args, ExpressionState& state, 599 | Vector& result) { 600 | Execute(args.data[0], args.data[1], result, args.size(), 601 | [&](const S2ShapeIndex& lhs, const S2ShapeIndex& rhs) { 602 | S2ClosestEdgeQuery query(&lhs); 603 | S2ClosestEdgeQuery::ShapeIndexTarget target(&rhs); 604 | return query.FindClosestEdge(&target).distance().radians() * 605 | S2Earth::RadiusMeters(); 606 | }); 607 | } 608 | 609 | static inline void ExecuteMaxDistanceFn(DataChunk& args, ExpressionState& state, 610 | Vector& result) { 611 | Execute(args.data[0], args.data[1], result, args.size(), 612 | [&](const S2ShapeIndex& lhs, const S2ShapeIndex& rhs) { 613 | S2FurthestEdgeQuery query(&lhs); 614 | S2FurthestEdgeQuery::ShapeIndexTarget target(&rhs); 615 | return query.FindFurthestEdge(&target).distance().radians() * 616 | S2Earth::RadiusMeters(); 617 | }); 618 | } 619 | 620 | template 621 | static void Execute(Vector& lhs, Vector& rhs, Vector& result, idx_t count, Op&& op) { 622 | GeographyDecoder lhs_decoder; 623 | GeographyDecoder rhs_decoder; 624 | 625 | BinaryExecutor::Execute( 626 | lhs, rhs, result, count, [&](string_t geog1_str, string_t geog2_str) { 627 | lhs_decoder.DecodeTag(geog1_str); 628 | rhs_decoder.DecodeTag(geog2_str); 629 | 630 | // If either geography is empty, the result is Inf 631 | if (lhs_decoder.tag.flags & s2geography::EncodeTag::kFlagEmpty || 632 | rhs_decoder.tag.flags & s2geography::EncodeTag::kFlagEmpty) { 633 | return std::numeric_limits::infinity(); 634 | } 635 | 636 | // If we have two snapped cell centers, just calculate the distance directly 637 | if (lhs_decoder.tag.kind == s2geography::GeographyKind::CELL_CENTER && 638 | rhs_decoder.tag.kind == s2geography::GeographyKind::CELL_CENTER) { 639 | S2CellId cell_id1(LittleEndian::Load64(geog1_str.GetData() + 4)); 640 | S2CellId cell_id2(LittleEndian::Load64(geog2_str.GetData() + 4)); 641 | S1ChordAngle distance(cell_id1.ToPoint(), cell_id2.ToPoint()); 642 | return distance.radians() * S2Earth::RadiusMeters(); 643 | } 644 | 645 | // Otherwise, decode and use s2_distance() 646 | auto geog1 = lhs_decoder.Decode(geog1_str); 647 | auto geog2 = rhs_decoder.Decode(geog2_str); 648 | 649 | return DispatchShapeIndexOp(std::move(geog1), std::move(geog2), op); 650 | }); 651 | } 652 | }; 653 | 654 | } // namespace 655 | 656 | void RegisterS2GeographyPredicates(DatabaseInstance& instance) { 657 | S2BinaryIndexOp::Register(instance); 658 | S2Distance::Register(instance); 659 | S2DWithin::Register(instance); 660 | } 661 | 662 | } // namespace duckdb_s2 663 | } // namespace duckdb 664 | -------------------------------------------------------------------------------- /src/s2_bounds.cpp: -------------------------------------------------------------------------------- 1 | 2 | 3 | #include "duckdb/common/vector_operations/generic_executor.hpp" 4 | #include "duckdb/main/database.hpp" 5 | #include "duckdb/main/extension_util.hpp" 6 | 7 | #include "s2/s2cell_union.h" 8 | #include "s2/s2region_coverer.h" 9 | #include "s2_geography_serde.hpp" 10 | #include "s2_types.hpp" 11 | 12 | #include "function_builder.hpp" 13 | 14 | namespace duckdb { 15 | 16 | namespace duckdb_s2 { 17 | 18 | namespace { 19 | 20 | struct S2Covering { 21 | static void Register(DatabaseInstance& instance) { 22 | FunctionBuilder::RegisterScalar( 23 | instance, "s2_covering", [](ScalarFunctionBuilder& func) { 24 | func.AddVariant([](ScalarFunctionVariantBuilder& variant) { 25 | variant.AddParameter("geog", Types::GEOGRAPHY()); 26 | variant.SetReturnType(Types::S2_CELL_UNION()); 27 | variant.SetFunction(ExecuteFn); 28 | }); 29 | 30 | func.SetDescription(R"( 31 | Returns the S2 cell covering of the geography. 32 | 33 | A covering is a deterministic S2_CELL_UNION (i.e., list of S2_CELLs) that 34 | completely covers a geography. This is useful as a compact approximation 35 | of a geography that can be used to select possible candidates for intersection. 36 | 37 | Note that an S2_CELL_UNION is a thin wrapper around a LIST of S2_CELL, such 38 | that DuckDB LIST functions can be used to unnest, extract, or otherwise 39 | interact with the result. 40 | 41 | See the [Cell Operators](#cellops) section for ways to interact with cells. 42 | )"); 43 | func.SetExample(R"( 44 | SELECT s2_covering(s2_data_country('Germany')) AS covering; 45 | ---- 46 | -- Find countries that might contain Berlin 47 | SELECT name as country, cell FROM ( 48 | SELECT name, UNNEST(s2_covering(geog)) as cell 49 | FROM s2_data_countries() 50 | ) WHERE 51 | s2_cell_contains(cell, s2_data_city('Berlin')::S2_CELL_CENTER::S2_CELL); 52 | )"); 53 | 54 | func.SetTag("ext", "geography"); 55 | func.SetTag("category", "bounds"); 56 | }); 57 | 58 | FunctionBuilder::RegisterScalar( 59 | instance, "s2_covering_fixed_level", [](ScalarFunctionBuilder& func) { 60 | func.AddVariant([](ScalarFunctionVariantBuilder& variant) { 61 | variant.AddParameter("geog", Types::GEOGRAPHY()); 62 | variant.AddParameter("fixed_level", LogicalType::INTEGER); 63 | variant.SetReturnType(Types::S2_CELL_UNION()); 64 | variant.SetFunction(ExecuteFnFixedLevel); 65 | }); 66 | 67 | func.SetDescription( 68 | R"( 69 | Returns the S2 cell covering of the geography with a fixed level. 70 | 71 | See `[s2_covering](#s2_covering)` for further detail and examples. 72 | )"); 73 | func.SetExample(R"( 74 | SELECT s2_covering_fixed_level(s2_data_country('Germany'), 3) AS covering; 75 | ---- 76 | SELECT s2_covering_fixed_level(s2_data_country('Germany'), 4) AS covering; 77 | )"); 78 | 79 | func.SetTag("ext", "geography"); 80 | func.SetTag("category", "bounds"); 81 | }); 82 | } 83 | 84 | static inline void ExecuteFn(DataChunk& args, ExpressionState& state, Vector& result) { 85 | S2RegionCoverer coverer; 86 | Execute(args.data[0], result, args.size(), coverer); 87 | } 88 | 89 | static inline void ExecuteFnFixedLevel(DataChunk& args, ExpressionState& state, 90 | Vector& result) { 91 | Vector& max_cells_param = args.data[1]; 92 | if (max_cells_param.GetVectorType() != VectorType::CONSTANT_VECTOR) { 93 | throw InvalidInputException("s2_covering_fixed_level(): level must be a constant"); 94 | } 95 | 96 | int fixed_level = max_cells_param.GetValue(0).GetValue(); 97 | if (fixed_level < 0 || fixed_level > S2CellId::kMaxLevel) { 98 | throw InvalidInputException( 99 | "s2_covering_fixed_level(): level must be between 0 and 30"); 100 | } 101 | 102 | S2RegionCoverer coverer; 103 | coverer.mutable_options()->set_fixed_level(fixed_level); 104 | Execute(args.data[0], result, args.size(), coverer); 105 | } 106 | 107 | static void Execute(Vector& source, Vector& result, idx_t count, 108 | S2RegionCoverer& coverer) { 109 | ListVector::Reserve(result, count * coverer.options().max_cells()); 110 | uint64_t offset = 0; 111 | 112 | GeographyDecoder decoder; 113 | 114 | UnaryExecutor::Execute( 115 | source, result, count, [&](string_t geog_str) { 116 | decoder.DecodeTag(geog_str); 117 | if (decoder.tag.flags & s2geography::EncodeTag::kFlagEmpty) { 118 | return list_entry_t{0, 0}; 119 | } 120 | 121 | switch (decoder.tag.kind) { 122 | case s2geography::GeographyKind::CELL_CENTER: { 123 | decoder.DecodeTagAndCovering(geog_str); 124 | S2CellId cell_id = 125 | decoder.covering[0].parent(coverer.options().max_level()); 126 | ListVector::PushBack(result, Value::UBIGINT(cell_id.id())); 127 | list_entry_t out{offset, 1}; 128 | offset += 1; 129 | return out; 130 | } 131 | 132 | default: { 133 | auto geog = decoder.Decode(geog_str); 134 | S2CellUnion covering = coverer.GetCovering(*geog->Region()); 135 | for (const auto cell_id : covering) { 136 | ListVector::PushBack(result, Value::UBIGINT(cell_id.id())); 137 | } 138 | 139 | list_entry_t out{offset, covering.size()}; 140 | offset += out.length; 141 | return out; 142 | } 143 | } 144 | }); 145 | } 146 | }; 147 | 148 | struct S2BoundsRect { 149 | static void Register(DatabaseInstance& instance) { 150 | FunctionBuilder::RegisterScalar( 151 | instance, "s2_bounds_box", [](ScalarFunctionBuilder& func) { 152 | func.AddVariant([](ScalarFunctionVariantBuilder& variant) { 153 | variant.AddParameter("geog", Types::GEOGRAPHY()); 154 | variant.SetReturnType(Types::S2_BOX()); 155 | variant.SetFunction(ExecuteFn); 156 | }); 157 | 158 | func.SetDescription( 159 | R"( 160 | Returns the bounds of the input geography as a box with Cartesian edges. 161 | 162 | The output xmin may be greater than xmax if the geography crosses the 163 | antimeridian. 164 | )"); 165 | func.SetExample(R"( 166 | SELECT s2_bounds_box(s2_data_country('Germany')) as rect; 167 | ---- 168 | SELECT s2_bounds_box(s2_data_country('Fiji')) as rect; 169 | )"); 170 | 171 | func.SetTag("ext", "geography"); 172 | func.SetTag("category", "bounds"); 173 | }); 174 | } 175 | 176 | static inline void ExecuteFn(DataChunk& args, ExpressionState& state, Vector& result) { 177 | auto count = args.size(); 178 | auto& input = args.data[0]; 179 | 180 | using BOX_TYPE = StructTypeQuaternary; 181 | using GEOGRAPHY_TYPE = PrimitiveType; 182 | 183 | GeographyDecoder decoder; 184 | 185 | GenericExecutor::ExecuteUnary( 186 | input, result, count, [&](GEOGRAPHY_TYPE& blob) { 187 | decoder.DecodeTag(blob.val); 188 | S2LatLngRect out; 189 | if (decoder.tag.flags & s2geography::EncodeTag::kFlagEmpty) { 190 | out = S2LatLngRect::Empty(); 191 | } else if (decoder.tag.kind == s2geography::GeographyKind::CELL_CENTER) { 192 | uint64_t cell_id = LittleEndian::Load64(blob.val.GetData() + 4); 193 | S2CellId cell(cell_id); 194 | out = S2LatLngRect::FromPoint(cell.ToLatLng()); 195 | } else { 196 | auto geog = decoder.Decode(blob.val); 197 | out = geog->Region()->GetRectBound(); 198 | } 199 | return BOX_TYPE{out.lng_lo().degrees(), out.lat_lo().degrees(), 200 | out.lng_hi().degrees(), out.lat_hi().degrees()}; 201 | }); 202 | } 203 | }; 204 | 205 | // Needs to be trivially everythingable, so we can't just use S2LatLngRect 206 | struct BoundsAggState { 207 | R1Interval lat; 208 | S1Interval lng; 209 | 210 | void Init() { 211 | auto rect = S2LatLngRect::Empty(); 212 | lat = rect.lat(); 213 | lng = rect.lng(); 214 | } 215 | 216 | void Union(const S2LatLngRect& other) { 217 | auto rect = S2LatLngRect(lat, lng).Union(other); 218 | lat = rect.lat(); 219 | lng = rect.lng(); 220 | } 221 | 222 | void Union(const BoundsAggState& other) { Union(S2LatLngRect(other.lat, other.lng)); } 223 | }; 224 | 225 | struct S2BoundsRectAgg { 226 | template 227 | static void Initialize(STATE& state) { 228 | state.Init(); 229 | } 230 | 231 | template 232 | static void Combine(const STATE& source, STATE& target, AggregateInputData&) { 233 | target.Union(source); 234 | } 235 | 236 | template 237 | static void Operation(STATE& state, const INPUT_TYPE& input, AggregateUnaryInput&) { 238 | GeographyDecoder decoder; 239 | decoder.DecodeTag(input); 240 | if (decoder.tag.flags & s2geography::EncodeTag::kFlagEmpty) { 241 | return; 242 | } 243 | 244 | if (decoder.tag.kind == s2geography::GeographyKind::CELL_CENTER) { 245 | uint64_t cell_id = LittleEndian::Load64(input.GetData() + 4); 246 | S2CellId cell(cell_id); 247 | S2LatLng pt = cell.ToLatLng(); 248 | S2LatLngRect rect(pt, pt); 249 | state.Union(rect); 250 | } else { 251 | auto geog = decoder.Decode(input); 252 | S2LatLngRect rect = geog->Region()->GetRectBound(); 253 | state.Union(rect); 254 | } 255 | } 256 | 257 | template 258 | static void ConstantOperation(STATE& state, const INPUT_TYPE& input, 259 | AggregateUnaryInput& agg, idx_t) { 260 | Operation(state, input, agg); 261 | } 262 | 263 | template 264 | static void Finalize(STATE& state, T& target, AggregateFinalizeData& finalize_data) { 265 | auto rect = S2LatLngRect(state.lat, state.lng); 266 | 267 | auto& struct_vec = StructVector::GetEntries(finalize_data.result); 268 | auto min_x_data = FlatVector::GetData(*struct_vec[0]); 269 | auto min_y_data = FlatVector::GetData(*struct_vec[1]); 270 | auto max_x_data = FlatVector::GetData(*struct_vec[2]); 271 | auto max_y_data = FlatVector::GetData(*struct_vec[3]); 272 | 273 | idx_t i = finalize_data.result_idx; 274 | min_x_data[i] = rect.lng_lo().degrees(); 275 | min_y_data[i] = rect.lat_lo().degrees(); 276 | max_x_data[i] = rect.lng_hi().degrees(); 277 | max_y_data[i] = rect.lat_hi().degrees(); 278 | } 279 | 280 | static bool IgnoreNull() { return true; } 281 | }; 282 | 283 | void RegisterAgg(DatabaseInstance& instance) { 284 | auto function = AggregateFunction::UnaryAggregate(Types::GEOGRAPHY(), 286 | Types::S2_BOX()); 287 | 288 | // Register the function 289 | function.name = "s2_bounds_box_agg"; 290 | ExtensionUtil::RegisterFunction(instance, function); 291 | } 292 | 293 | struct S2BoxLngLatAsWkb { 294 | static void Register(DatabaseInstance& instance) { 295 | FunctionBuilder::RegisterScalar( 296 | instance, "s2_box_wkb", [](ScalarFunctionBuilder& func) { 297 | func.AddVariant([](ScalarFunctionVariantBuilder& variant) { 298 | variant.AddParameter("box", Types::S2_BOX()); 299 | variant.SetReturnType(LogicalType::BLOB); 300 | variant.SetFunction(ExecuteFn); 301 | }); 302 | 303 | func.SetDescription( 304 | R"( 305 | Serialize a S2_BOX as WKB for export. 306 | )"); 307 | func.SetExample(R"( 308 | SELECT s2_box_wkb(s2_bounds_box('POINT (0 1)'::GEOGRAPHY)) as rect; 309 | )"); 310 | 311 | func.SetTag("ext", "geography"); 312 | func.SetTag("category", "bounds"); 313 | }); 314 | } 315 | 316 | static inline void ExecuteFn(DataChunk& args, ExpressionState& state, Vector& result) { 317 | using BOX_TYPE = StructTypeQuaternary; 318 | using GEOGRAPHY_TYPE = PrimitiveType; 319 | 320 | // We need two WKB outputs: one for a normal box and one for a box that wraps 321 | // over the antimeridian. 322 | Encoder encoder; 323 | encoder.Ensure(92 + 1); 324 | encoder.put8(0x01); 325 | encoder.put32(3); 326 | encoder.put32(1); 327 | encoder.put32(5); 328 | size_t encoder_coord_offset = encoder.length(); 329 | for (int i = 0; i < 10; i++) { 330 | encoder.put64(0); 331 | } 332 | char* coords = const_cast(encoder.base() + encoder_coord_offset); 333 | 334 | Encoder multi_encoder; 335 | multi_encoder.Ensure(93 * 2 + 8 + 1); 336 | multi_encoder.put8(0x01); 337 | multi_encoder.put32(6); 338 | multi_encoder.put32(2); 339 | multi_encoder.put8(0x01); 340 | multi_encoder.put32(3); 341 | multi_encoder.put32(1); 342 | multi_encoder.put32(5); 343 | size_t multi_encoder_coord_offset_east = multi_encoder.length(); 344 | for (int i = 0; i < 10; i++) { 345 | multi_encoder.put64(0); 346 | } 347 | 348 | multi_encoder.put8(0x01); 349 | multi_encoder.put32(3); 350 | multi_encoder.put32(1); 351 | multi_encoder.put32(5); 352 | size_t multi_encoder_coord_offset_west = multi_encoder.length(); 353 | for (int i = 0; i < 10; i++) { 354 | multi_encoder.put64(0); 355 | } 356 | char* multi_coords_east = 357 | const_cast(multi_encoder.base() + multi_encoder_coord_offset_east); 358 | char* multi_coords_west = 359 | const_cast(multi_encoder.base() + multi_encoder_coord_offset_west); 360 | 361 | auto count = args.size(); 362 | auto& source = args.data[0]; 363 | GenericExecutor::ExecuteUnary( 364 | source, result, count, [&](BOX_TYPE& box) { 365 | auto xmin = box.a_val; 366 | auto ymin = box.b_val; 367 | auto xmax = box.c_val; 368 | auto ymax = box.d_val; 369 | if (xmax >= xmin) { 370 | PopulateCoordsFromValues(coords, xmin, ymin, xmax, ymax); 371 | return StringVector::AddStringOrBlob( 372 | result, string_t(encoder.base(), encoder.length())); 373 | } else { 374 | PopulateCoordsFromValues(multi_coords_east, xmin, ymin, 180, ymax); 375 | PopulateCoordsFromValues(multi_coords_west, -180, ymin, xmax, ymax); 376 | return StringVector::AddStringOrBlob( 377 | result, string_t(multi_encoder.base(), multi_encoder.length())); 378 | } 379 | }); 380 | } 381 | 382 | static void PopulateCoordsFromValues(char* coords, double xmin, double ymin, 383 | double xmax, double ymax) { 384 | LittleEndian::Store(xmin, coords + 0 * sizeof(double)); 385 | LittleEndian::Store(ymin, coords + 1 * sizeof(double)); 386 | LittleEndian::Store(xmax, coords + 2 * sizeof(double)); 387 | LittleEndian::Store(ymin, coords + 3 * sizeof(double)); 388 | LittleEndian::Store(xmax, coords + 4 * sizeof(double)); 389 | LittleEndian::Store(ymax, coords + 5 * sizeof(double)); 390 | LittleEndian::Store(xmin, coords + 6 * sizeof(double)); 391 | LittleEndian::Store(ymax, coords + 7 * sizeof(double)); 392 | LittleEndian::Store(xmin, coords + 8 * sizeof(double)); 393 | LittleEndian::Store(ymin, coords + 9 * sizeof(double)); 394 | } 395 | }; 396 | 397 | struct S2BoxStruct { 398 | static void Register(DatabaseInstance& instance) { 399 | FunctionBuilder::RegisterScalar( 400 | instance, "s2_box_struct", [](ScalarFunctionBuilder& func) { 401 | func.AddVariant([](ScalarFunctionVariantBuilder& variant) { 402 | variant.AddParameter("box", Types::S2_BOX()); 403 | variant.SetReturnType(LogicalType::STRUCT({{"xmin", LogicalType::DOUBLE}, 404 | {"ymin", LogicalType::DOUBLE}, 405 | {"xmax", LogicalType::DOUBLE}, 406 | {"ymax", LogicalType::DOUBLE}})); 407 | variant.SetFunction(ExecuteFn); 408 | }); 409 | 410 | func.SetDescription( 411 | R"( 412 | Return a S2_BOX storage as a struct(xmin, ymin, xmax, ymax). 413 | )"); 414 | func.SetExample(R"( 415 | SELECT s2_box_struct(s2_bounds_box('POINT (0 1)'::GEOGRAPHY)) as rect; 416 | )"); 417 | 418 | func.SetTag("ext", "geography"); 419 | func.SetTag("category", "bounds"); 420 | }); 421 | } 422 | 423 | static inline void ExecuteFn(DataChunk& args, ExpressionState& state, Vector& result) { 424 | auto& struct_vec_src = StructVector::GetEntries(args.data[0]); 425 | auto& struct_vec_dst = StructVector::GetEntries(result); 426 | for (int i = 0; i < 4; i++) { 427 | struct_vec_dst[i]->Reference(*struct_vec_src[i]); 428 | } 429 | 430 | if (args.size() == 1) { 431 | result.SetVectorType(VectorType::CONSTANT_VECTOR); 432 | } 433 | } 434 | }; 435 | 436 | struct S2Box { 437 | static void Register(DatabaseInstance& instance) { 438 | FunctionBuilder::RegisterScalar(instance, "s2_box", [](ScalarFunctionBuilder& func) { 439 | func.AddVariant([](ScalarFunctionVariantBuilder& variant) { 440 | variant.AddParameter("west", LogicalType::DOUBLE); 441 | variant.AddParameter("south", LogicalType::DOUBLE); 442 | variant.AddParameter("east", LogicalType::DOUBLE); 443 | variant.AddParameter("north", LogicalType::DOUBLE); 444 | variant.SetReturnType(Types::S2_BOX()); 445 | variant.SetFunction(ExecuteFn); 446 | }); 447 | 448 | func.SetDescription( 449 | R"( 450 | Create a S2_BOX from xmin (west), ymin (south), xmax (east), and ymax (north). 451 | 452 | Note that any box where ymin > ymax is considered EMPTY for the purposes of 453 | comparison. 454 | )"); 455 | func.SetExample(R"( 456 | SELECT s2_box(5.989, 47.302, 15.017, 54.983) as box; 457 | ---- 458 | -- xmin (west) can be greater than xmax (east) (e.g., box for Fiji) 459 | SELECT s2_box(177.285, -18.288, 177.285, -16.0209) as box; 460 | )"); 461 | 462 | func.SetTag("ext", "geography"); 463 | func.SetTag("category", "bounds"); 464 | }); 465 | } 466 | 467 | static void ExecuteFn(DataChunk& args, ExpressionState& state, Vector& result) { 468 | auto count = args.size(); 469 | 470 | auto& xmin = args.data[0]; 471 | auto& ymin = args.data[1]; 472 | auto& xmax = args.data[2]; 473 | auto& ymax = args.data[3]; 474 | 475 | xmin.Flatten(count); 476 | ymin.Flatten(count); 477 | xmax.Flatten(count); 478 | ymax.Flatten(count); 479 | 480 | auto& children = StructVector::GetEntries(result); 481 | auto& xmin_child = children[0]; 482 | auto& ymin_child = children[1]; 483 | auto& xmax_child = children[2]; 484 | auto& ymax_child = children[3]; 485 | 486 | xmin_child->Reference(xmin); 487 | ymin_child->Reference(ymin); 488 | xmax_child->Reference(xmax); 489 | ymax_child->Reference(ymax); 490 | 491 | if (count == 1) { 492 | result.SetVectorType(VectorType::CONSTANT_VECTOR); 493 | } 494 | } 495 | }; 496 | 497 | struct S2BoxIntersects { 498 | static void Register(DatabaseInstance& instance) { 499 | FunctionBuilder::RegisterScalar( 500 | instance, "s2_box_intersects", [](ScalarFunctionBuilder& func) { 501 | func.AddVariant([](ScalarFunctionVariantBuilder& variant) { 502 | variant.AddParameter("box1", Types::S2_BOX()); 503 | variant.AddParameter("box2", Types::S2_BOX()); 504 | variant.SetReturnType(LogicalType::BOOLEAN); 505 | variant.SetFunction(ExecuteFn); 506 | }); 507 | 508 | func.SetDescription( 509 | R"( 510 | Return true if two boxes have any points in common. 511 | )"); 512 | func.SetExample(R"( 513 | SELECT s2_box_intersects( 514 | s2_bounds_box(s2_data_country('Germany')), 515 | s2_bounds_box(s2_data_country('France')) 516 | ); 517 | ---- 518 | SELECT s2_box_intersects( 519 | s2_bounds_box(s2_data_country('Germany')), 520 | s2_bounds_box(s2_data_country('Canada')) 521 | ); 522 | )"); 523 | 524 | func.SetTag("ext", "geography"); 525 | func.SetTag("category", "bounds"); 526 | }); 527 | } 528 | 529 | static inline void ExecuteFn(DataChunk& args, ExpressionState& state, Vector& result) { 530 | using BOX_TYPE = StructTypeQuaternary; 531 | using BOOL_TYPE = PrimitiveType; 532 | Vector& lhs_vec = args.data[0]; 533 | Vector& rhs_vec = args.data[1]; 534 | idx_t count = args.size(); 535 | 536 | GenericExecutor::ExecuteBinary( 537 | lhs_vec, rhs_vec, result, count, [&](BOX_TYPE& lhs, BOX_TYPE& rhs) { 538 | S2LatLngRect lhs_rect(S2LatLng::FromDegrees(lhs.b_val, lhs.a_val), 539 | S2LatLng::FromDegrees(lhs.d_val, lhs.c_val)); 540 | S2LatLngRect rhs_rect(S2LatLng::FromDegrees(rhs.b_val, rhs.a_val), 541 | S2LatLng::FromDegrees(rhs.d_val, rhs.c_val)); 542 | return lhs_rect.Intersects(rhs_rect); 543 | }); 544 | } 545 | }; 546 | 547 | struct S2BoxUnion { 548 | static void Register(DatabaseInstance& instance) { 549 | FunctionBuilder::RegisterScalar( 550 | instance, "s2_box_union", [](ScalarFunctionBuilder& func) { 551 | func.AddVariant([](ScalarFunctionVariantBuilder& variant) { 552 | variant.AddParameter("box1", Types::S2_BOX()); 553 | variant.AddParameter("box2", Types::S2_BOX()); 554 | variant.SetReturnType(Types::S2_BOX()); 555 | variant.SetFunction(ExecuteFn); 556 | }); 557 | 558 | func.SetDescription( 559 | R"( 560 | Return the smallest possible box that contains both input boxes. 561 | )"); 562 | func.SetExample(R"( 563 | SELECT s2_box_union( 564 | s2_bounds_box(s2_data_country('Germany')), 565 | s2_bounds_box(s2_data_country('France')) 566 | ); 567 | )"); 568 | 569 | func.SetTag("ext", "geography"); 570 | func.SetTag("category", "bounds"); 571 | }); 572 | } 573 | 574 | static inline void ExecuteFn(DataChunk& args, ExpressionState& state, Vector& result) { 575 | using BOX_TYPE = StructTypeQuaternary; 576 | Vector& lhs_vec = args.data[0]; 577 | Vector& rhs_vec = args.data[1]; 578 | idx_t count = args.size(); 579 | 580 | GenericExecutor::ExecuteBinary( 581 | lhs_vec, rhs_vec, result, count, [&](BOX_TYPE& lhs, BOX_TYPE& rhs) { 582 | S2LatLngRect lhs_rect(S2LatLng::FromDegrees(lhs.b_val, lhs.a_val), 583 | S2LatLng::FromDegrees(lhs.d_val, lhs.c_val)); 584 | S2LatLngRect rhs_rect(S2LatLng::FromDegrees(rhs.b_val, rhs.a_val), 585 | S2LatLng::FromDegrees(rhs.d_val, rhs.c_val)); 586 | S2LatLngRect out = lhs_rect.Union(rhs_rect); 587 | return BOX_TYPE{out.lng_lo().degrees(), out.lat_lo().degrees(), 588 | out.lng_hi().degrees(), out.lat_hi().degrees()}; 589 | }); 590 | } 591 | }; 592 | 593 | } // namespace 594 | 595 | void RegisterS2GeographyBounds(DatabaseInstance& instance) { 596 | S2Covering::Register(instance); 597 | S2BoundsRect::Register(instance); 598 | S2BoxLngLatAsWkb::Register(instance); 599 | S2BoxStruct::Register(instance); 600 | S2Box::Register(instance); 601 | S2BoxIntersects::Register(instance); 602 | S2BoxUnion::Register(instance); 603 | 604 | RegisterAgg(instance); 605 | } 606 | 607 | } // namespace duckdb_s2 608 | } // namespace duckdb 609 | -------------------------------------------------------------------------------- /src/s2_data.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "duckdb/common/string_util.hpp" 3 | #include "duckdb/function/table_function.hpp" 4 | #include "duckdb/main/extension_util.hpp" 5 | 6 | #include 7 | #include 8 | 9 | #include "s2_data_static.hpp" 10 | #include "s2_geography_serde.hpp" 11 | #include "s2_types.hpp" 12 | 13 | #include "function_builder.hpp" 14 | 15 | namespace duckdb { 16 | 17 | namespace duckdb_s2 { 18 | 19 | namespace { 20 | 21 | class S2DataFunctionData : public TableFunctionData { 22 | public: 23 | S2DataFunctionData() {} 24 | idx_t offset{0}; 25 | }; 26 | 27 | static inline duckdb::unique_ptr S2DataCitiesBind( 28 | ClientContext& context, TableFunctionBindInput& input, 29 | vector& return_types, vector& names) { 30 | names.push_back("name"); 31 | names.push_back("population"); 32 | names.push_back("geog"); 33 | return_types.push_back(LogicalType::VARCHAR); 34 | return_types.push_back(LogicalType::INTEGER); 35 | return_types.push_back(Types::GEOGRAPHY()); 36 | return make_uniq(); 37 | } 38 | 39 | void S2DataCitiesScan(ClientContext& context, TableFunctionInput& data_p, 40 | DataChunk& output) { 41 | auto& data = data_p.bind_data->CastNoConst(); 42 | idx_t n_cities = static_cast(kCities.size()); 43 | 44 | if (data.offset >= n_cities) { 45 | return; 46 | } 47 | 48 | idx_t start = data.offset; 49 | idx_t end = start + STANDARD_VECTOR_SIZE; 50 | if (end > n_cities) { 51 | end = n_cities; 52 | } 53 | 54 | s2geography::WKTReader reader; 55 | GeographyEncoder encoder; 56 | Vector& names = output.data[0]; 57 | Vector& populations = output.data[1]; 58 | Vector& geogs = output.data[2]; 59 | 60 | // There seems to be some issue with constructing a Value from 61 | // invalid unicode (i.e., a blob), and it's unclear if SetValue() 62 | // will automatically call AddString(). So, we do this manually. 63 | auto geogs_data = reinterpret_cast(geogs.GetData()); 64 | 65 | for (idx_t i = start; i < end; i++) { 66 | const City& city = kCities[i]; 67 | names.SetValue(i - start, StringVector::AddString(names, city.name)); 68 | populations.SetValue(i - start, city.population); 69 | 70 | auto geog = reader.read_feature(city.geog_wkt); 71 | string_t encoded = StringVector::AddStringOrBlob(geogs, encoder.Encode(*geog)); 72 | geogs_data[i] = encoded; 73 | } 74 | 75 | data.offset = end; 76 | output.SetCardinality(end - start); 77 | } 78 | 79 | static inline duckdb::unique_ptr S2DataCountriesBind( 80 | ClientContext& context, TableFunctionBindInput& input, 81 | vector& return_types, vector& names) { 82 | names.push_back("name"); 83 | names.push_back("continent"); 84 | names.push_back("geog"); 85 | return_types.push_back(LogicalType::VARCHAR); 86 | return_types.push_back(LogicalType::VARCHAR); 87 | return_types.push_back(Types::GEOGRAPHY()); 88 | return make_uniq(); 89 | } 90 | 91 | void S2DataCountriesScan(ClientContext& context, TableFunctionInput& data_p, 92 | DataChunk& output) { 93 | auto& data = data_p.bind_data->CastNoConst(); 94 | idx_t n_cities = static_cast(kCountries.size()); 95 | 96 | if (data.offset >= n_cities) { 97 | return; 98 | } 99 | 100 | idx_t start = data.offset; 101 | idx_t end = start + STANDARD_VECTOR_SIZE; 102 | if (end > n_cities) { 103 | end = n_cities; 104 | } 105 | 106 | s2geography::WKTReader reader; 107 | GeographyEncoder encoder; 108 | Vector& names = output.data[0]; 109 | Vector& continents = output.data[1]; 110 | Vector& geogs = output.data[2]; 111 | 112 | // There seems to be some issue with constructing a Value from 113 | // invalid unicode (i.e., a blob), and it's unclear if SetValue() 114 | // will automatically call AddString(). So, we do this manually. 115 | auto geogs_data = reinterpret_cast(geogs.GetData()); 116 | 117 | for (idx_t i = start; i < end; i++) { 118 | const Country& country = kCountries[i]; 119 | names.SetValue(i - start, StringVector::AddString(names, country.name)); 120 | continents.SetValue(i - start, StringVector::AddString(names, country.continent)); 121 | 122 | auto geog = reader.read_feature(country.geog_wkt); 123 | string_t encoded = StringVector::AddStringOrBlob(geogs, encoder.Encode(*geog)); 124 | geogs_data[i] = encoded; 125 | } 126 | 127 | data.offset = end; 128 | output.SetCardinality(end - start); 129 | } 130 | 131 | template 132 | const std::vector& ItemList(); 133 | 134 | template <> 135 | const std::vector& ItemList() { 136 | return kCountries; 137 | } 138 | 139 | template <> 140 | const std::vector& ItemList() { 141 | return kCities; 142 | } 143 | 144 | template 145 | struct S2DataScalar { 146 | static void Register(DatabaseInstance& instance, const char* fn_name) { 147 | FunctionBuilder::RegisterScalar(instance, fn_name, [](ScalarFunctionBuilder& func) { 148 | func.AddVariant([](ScalarFunctionVariantBuilder& variant) { 149 | variant.AddParameter("name", LogicalType::VARCHAR); 150 | variant.SetReturnType(Types::GEOGRAPHY()); 151 | variant.SetFunction(ExecuteFn); 152 | }); 153 | 154 | func.SetDescription(R"( 155 | Get an example city or country from [`s2_data_cities()`](#s2_data_cities) 156 | or [`s2_data_countries()`](#s2_data_countries) by name. 157 | )"); 158 | func.SetExample(R"( 159 | SELECT s2_data_city('Toronto') as city; 160 | ---- 161 | SELECT s2_data_country('Fiji') as country; 162 | )"); 163 | 164 | func.SetTag("ext", "geography"); 165 | func.SetTag("category", "data"); 166 | }); 167 | } 168 | 169 | static void ExecuteFn(DataChunk& args, ExpressionState& state, Vector& result) { 170 | s2geography::WKTReader reader; 171 | GeographyEncoder encoder; 172 | 173 | std::unordered_map cache; 174 | for (const T& item : ItemList()) { 175 | cache.insert({item.name, item.geog_wkt}); 176 | } 177 | 178 | UnaryExecutor::Execute( 179 | args.data[0], result, args.size(), [&](string_t name) { 180 | std::string name_str(name.GetData(), name.GetSize()); 181 | auto item = cache.find(name_str); 182 | if (item == cache.end()) { 183 | throw InvalidInputException(std::string("No entry for item '") + name_str + 184 | "'"); 185 | } 186 | 187 | auto geog = reader.read_feature(item->second); 188 | return StringVector::AddStringOrBlob(result, encoder.Encode(*geog)); 189 | }); 190 | } 191 | }; 192 | 193 | } // namespace 194 | 195 | void RegisterS2Data(DatabaseInstance& instance) { 196 | TableFunction cities_func("s2_data_cities", {}, S2DataCitiesScan, S2DataCitiesBind); 197 | ExtensionUtil::RegisterFunction(instance, cities_func); 198 | 199 | TableFunction countries_func("s2_data_countries", {}, S2DataCountriesScan, 200 | S2DataCountriesBind); 201 | ExtensionUtil::RegisterFunction(instance, countries_func); 202 | 203 | S2DataScalar::Register(instance, "s2_data_city"); 204 | S2DataScalar::Register(instance, "s2_data_country"); 205 | } 206 | 207 | } // namespace duckdb_s2 208 | } // namespace duckdb 209 | -------------------------------------------------------------------------------- /src/s2_dependencies.cpp: -------------------------------------------------------------------------------- 1 | #include "duckdb/function/table_function.hpp" 2 | #include "duckdb/main/extension_util.hpp" 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #include "geoarrow/geoarrow.h" 9 | #include "nanoarrow/nanoarrow.h" 10 | 11 | namespace duckdb { 12 | 13 | namespace duckdb_s2 { 14 | 15 | namespace { 16 | class S2DependenciesFunctionData : public TableFunctionData { 17 | public: 18 | S2DependenciesFunctionData() : finished(false) {} 19 | bool finished{false}; 20 | }; 21 | 22 | static inline duckdb::unique_ptr S2DependenciesBind( 23 | ClientContext& context, TableFunctionBindInput& input, 24 | vector& return_types, vector& names) { 25 | names.push_back("dependency"); 26 | names.push_back("version"); 27 | return_types.push_back(LogicalType::VARCHAR); 28 | return_types.push_back(LogicalType::VARCHAR); 29 | return make_uniq(); 30 | } 31 | 32 | void S2DependenciesScan(ClientContext& context, TableFunctionInput& data_p, 33 | DataChunk& output) { 34 | auto& data = data_p.bind_data->CastNoConst(); 35 | if (data.finished) { 36 | return; 37 | } 38 | 39 | output.SetValue(0, 0, "openssl"); 40 | output.SetValue(1, 0, 41 | std::string() + std::to_string(OPENSSL_VERSION_MAJOR) + "." + 42 | std::to_string(OPENSSL_VERSION_MINOR) + "." + 43 | std::to_string(OPENSSL_VERSION_PATCH)); 44 | output.SetValue(0, 1, "abseil-cpp"); 45 | output.SetValue(1, 1, 46 | std::string() + std::to_string(ABSL_LTS_RELEASE_VERSION) + "." + 47 | std::to_string(ABSL_LTS_RELEASE_PATCH_LEVEL)); 48 | output.SetValue(0, 2, "s2geometry"); 49 | output.SetValue(1, 2, 50 | std::string() + std::to_string(S2_VERSION_MAJOR) + "." + 51 | std::to_string(S2_VERSION_MINOR) + "." + 52 | std::to_string(S2_VERSION_PATCH)); 53 | 54 | output.SetValue(0, 3, "nanoarrow"); 55 | output.SetValue(1, 3, std::string() + ArrowNanoarrowVersion()); 56 | 57 | output.SetValue(0, 4, "geoarrow"); 58 | output.SetValue(1, 4, std::string() + GeoArrowVersion()); 59 | 60 | output.SetCardinality(5); 61 | data.finished = true; 62 | } 63 | 64 | } // namespace 65 | 66 | void RegisterS2Dependencies(DatabaseInstance& instance) { 67 | TableFunction versions_func("s2_dependencies", {}, S2DependenciesScan, 68 | S2DependenciesBind); 69 | ExtensionUtil::RegisterFunction(instance, versions_func); 70 | } 71 | 72 | } // namespace duckdb_s2 73 | } // namespace duckdb 74 | -------------------------------------------------------------------------------- /src/s2_functions_io.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "duckdb/main/database.hpp" 3 | #include "duckdb/main/extension_util.hpp" 4 | 5 | #include "s2/encoded_s2shape_index.h" 6 | #include "s2/s2shape_index_region.h" 7 | #include "s2/s2shapeutil_coding.h" 8 | #include "s2geography/geography.h" 9 | 10 | #include "s2_geography_serde.hpp" 11 | #include "s2_types.hpp" 12 | #include "s2geography/geoarrow.h" 13 | #include "s2geography/wkb.h" 14 | #include "s2geography/wkt-reader.h" 15 | #include "s2geography/wkt-writer.h" 16 | 17 | #include "function_builder.hpp" 18 | 19 | namespace duckdb { 20 | 21 | namespace duckdb_s2 { 22 | 23 | struct S2GeogFromText { 24 | static void Register(DatabaseInstance& instance) { 25 | FunctionBuilder::RegisterScalar( 26 | instance, "s2_geogfromtext", [](ScalarFunctionBuilder& func) { 27 | func.AddVariant([](ScalarFunctionVariantBuilder& variant) { 28 | variant.AddParameter("wkt", LogicalType::VARCHAR); 29 | variant.SetReturnType(Types::GEOGRAPHY()); 30 | variant.SetFunction(ExecuteFn); 31 | }); 32 | 33 | func.SetDescription(R"( 34 | Returns the geography from a WKT string. 35 | 36 | This is an alias for the cast from VARCHAR to GEOGRAPHY. This 37 | function assumes spherical edges. 38 | )"); 39 | func.SetExample(R"( 40 | SELECT s2_geogfromtext('POINT (0 1)'); 41 | ---- 42 | SELECT 'POINT (0 1)'::GEOGRAPHY; 43 | )"); 44 | 45 | func.SetTag("ext", "geography"); 46 | func.SetTag("category", "conversion"); 47 | }); 48 | 49 | FunctionBuilder::RegisterScalar( 50 | instance, "s2_geogfromtext_novalidate", [](ScalarFunctionBuilder& func) { 51 | func.AddVariant([](ScalarFunctionVariantBuilder& variant) { 52 | variant.AddParameter("wkt", LogicalType::VARCHAR); 53 | variant.SetReturnType(Types::GEOGRAPHY()); 54 | variant.SetFunction(ExecuteFnNovalidate); 55 | }); 56 | 57 | func.SetDescription(R"( 58 | Returns the geography from a WKT string skipping validation. 59 | 60 | This is useful to determine which of some set of geometries is not valid and 61 | why. 62 | )"); 63 | func.SetExample(R"( 64 | SELECT s2_geogfromtext_novalidate('LINESTRING (0 0, 0 0, 1 1)'); 65 | )"); 66 | 67 | func.SetTag("ext", "geography"); 68 | func.SetTag("category", "conversion"); 69 | }); 70 | 71 | ExtensionUtil::RegisterCastFunction(instance, LogicalType::VARCHAR, 72 | Types::GEOGRAPHY(), BoundCastInfo(ExecuteCast), 73 | 1); 74 | } 75 | 76 | static inline void ExecuteFn(DataChunk& args, ExpressionState& state, Vector& result) { 77 | Execute(args.data[0], result, args.size()); 78 | } 79 | 80 | static inline void ExecuteFnNovalidate(DataChunk& args, ExpressionState& state, 81 | Vector& result) { 82 | s2geography::geoarrow::ImportOptions options; 83 | options.set_check(false); 84 | Execute(args.data[0], result, args.size(), options); 85 | } 86 | 87 | static inline bool ExecuteCast(Vector& source, Vector& result, idx_t count, 88 | CastParameters& parameters) { 89 | Execute(source, result, count); 90 | return true; 91 | } 92 | 93 | static inline void Execute(Vector& source, Vector& result, idx_t count, 94 | const s2geography::geoarrow::ImportOptions& options = 95 | s2geography::geoarrow::ImportOptions()) { 96 | GeographyEncoder encoder; 97 | s2geography::WKTReader reader(options); 98 | 99 | UnaryExecutor::Execute(source, result, count, [&](string_t wkt) { 100 | auto geog = reader.read_feature(wkt.GetData(), wkt.GetSize()); 101 | return StringVector::AddStringOrBlob(result, encoder.Encode(*geog)); 102 | }); 103 | } 104 | }; 105 | 106 | struct S2AsText { 107 | static void Register(DatabaseInstance& instance) { 108 | FunctionBuilder::RegisterScalar( 109 | instance, "s2_astext", [](ScalarFunctionBuilder& func) { 110 | func.AddVariant([](ScalarFunctionVariantBuilder& variant) { 111 | variant.AddParameter("geog", Types::GEOGRAPHY()); 112 | variant.SetReturnType(LogicalType::VARCHAR); 113 | variant.SetFunction(ExecuteFn); 114 | }); 115 | 116 | func.SetDescription(R"( 117 | Returns the well-known text (WKT) string of the geography. 118 | 119 | Note that because the internal representation of the GEOGRAPHY type is either 120 | an S2_CELL_CENTER or a unit vector, WKT typically does not roundtrip through a 121 | GEOGRAPHY unless the output is rounded using `[s2_format()`][#s2_format]. 122 | 123 | The output contains spherical edges. If edges are large and the consumer does 124 | not know that the edges are spherical, this may cause issues. 125 | 126 | Calling this function has the same effect as casting to VARCHAR. 127 | )"); 128 | func.SetExample(R"( 129 | SELECT s2_astext(s2_data_city('Vancouver')); 130 | )"); 131 | 132 | func.SetTag("ext", "geography"); 133 | func.SetTag("category", "conversion"); 134 | }); 135 | 136 | FunctionBuilder::RegisterScalar( 137 | instance, "s2_format", [](ScalarFunctionBuilder& func) { 138 | func.AddVariant([](ScalarFunctionVariantBuilder& variant) { 139 | variant.AddParameter("geog", Types::GEOGRAPHY()); 140 | variant.AddParameter("precision", LogicalType::TINYINT); 141 | variant.SetReturnType(LogicalType::VARCHAR); 142 | variant.SetFunction(ExecuteFnPrec); 143 | }); 144 | 145 | func.SetDescription( 146 | R"( 147 | Returns the WKT string of the geography with a given precision. 148 | 149 | See [`s2_astext()`](#s2_text) for parameter-free lossless output. Like `s2_text()`, 150 | this function exports spherical edges. 151 | )"); 152 | func.SetExample(R"( 153 | SELECT s2_format(s2_data_city('Vancouver'), 1); 154 | )"); 155 | 156 | func.SetTag("ext", "geography"); 157 | func.SetTag("category", "conversion"); 158 | }); 159 | 160 | ExtensionUtil::RegisterCastFunction(instance, Types::GEOGRAPHY(), 161 | LogicalType::VARCHAR, BoundCastInfo(ExecuteCast), 162 | 1); 163 | } 164 | 165 | static inline void ExecuteFn(DataChunk& args, ExpressionState& state, Vector& result) { 166 | Execute(args.data[0], result, args.size()); 167 | } 168 | 169 | static inline void ExecuteFnPrec(DataChunk& args, ExpressionState& state, 170 | Vector& result) { 171 | Vector& precision = args.data[1]; 172 | if (precision.GetVectorType() != VectorType::CONSTANT_VECTOR) { 173 | throw InvalidInputException("Can't use s2_format() with non-constant precision"); 174 | } 175 | 176 | Execute(args.data[0], result, args.size(), precision.GetValue(0).GetValue()); 177 | } 178 | 179 | static inline bool ExecuteCast(Vector& source, Vector& result, idx_t count, 180 | CastParameters& parameters) { 181 | Execute(source, result, count); 182 | return true; 183 | } 184 | 185 | static inline void Execute(Vector& source, Vector& result, idx_t count, 186 | int8_t precision = -1) { 187 | GeographyDecoder decoder; 188 | s2geography::WKTWriter writer(precision); 189 | 190 | UnaryExecutor::Execute( 191 | source, result, count, [&](string_t geog_str) { 192 | decoder.DecodeTag(geog_str); 193 | if (decoder.tag.kind == s2geography::GeographyKind::SHAPE_INDEX) { 194 | return StringVector::AddString( 195 | result, std::string(""); 197 | } 198 | auto geog = decoder.Decode(geog_str); 199 | std::string wkt = writer.write_feature(*geog); 200 | return StringVector::AddString(result, wkt); 201 | }); 202 | } 203 | }; 204 | 205 | struct S2GeogFromWKB { 206 | static void Register(DatabaseInstance& instance) { 207 | FunctionBuilder::RegisterScalar( 208 | instance, "s2_geogfromwkb", [](ScalarFunctionBuilder& func) { 209 | func.AddVariant([](ScalarFunctionVariantBuilder& variant) { 210 | variant.AddParameter("wkb", LogicalType::BLOB); 211 | variant.SetReturnType(Types::GEOGRAPHY()); 212 | variant.SetFunction(ExecuteFn); 213 | }); 214 | 215 | func.SetDescription(R"( 216 | Converts a WKB blob to a geography. 217 | 218 | The input WKB blog is assumed to have longitude/latitude coordinates and have 219 | spherical edges. If edges are long and the input had a different edge type, 220 | the resulting GEOGRAPHY may be invalid or represent a different location than 221 | intended. 222 | )"); 223 | func.SetExample(R"( 224 | SELECT s2_geogfromwkb(s2_aswkb(s2_data_city('Toronto'))) as geog; 225 | )"); 226 | 227 | func.SetTag("ext", "geography"); 228 | func.SetTag("category", "conversion"); 229 | }); 230 | 231 | FunctionBuilder::RegisterScalar( 232 | instance, "s2_geogfromwkb_novalidate", [](ScalarFunctionBuilder& func) { 233 | func.AddVariant([](ScalarFunctionVariantBuilder& variant) { 234 | variant.AddParameter("wkb", LogicalType::BLOB); 235 | variant.SetReturnType(Types::GEOGRAPHY()); 236 | variant.SetFunction(ExecuteFnNovalidate); 237 | }); 238 | 239 | func.SetDescription(R"( 240 | Returns the geography from a WKB blob skipping validation. 241 | 242 | This is useful to determine which of some set of geometries is not valid and 243 | why (or to help make them valid). 244 | )"); 245 | func.SetExample(R"( 246 | SELECT s2_geogfromwkb_novalidate( 247 | s2_geogfromtext_novalidate('LINESTRING (0 0, 0 0, 1 1)').s2_aswkb() 248 | ); 249 | )"); 250 | 251 | func.SetTag("ext", "geography"); 252 | func.SetTag("category", "conversion"); 253 | }); 254 | } 255 | 256 | static inline void ExecuteFn(DataChunk& args, ExpressionState& state, Vector& result) { 257 | Execute(args.data[0], result, args.size()); 258 | } 259 | 260 | static inline void ExecuteFnNovalidate(DataChunk& args, ExpressionState& state, 261 | Vector& result) { 262 | s2geography::geoarrow::ImportOptions options; 263 | options.set_check(false); 264 | Execute(args.data[0], result, args.size(), options); 265 | } 266 | 267 | static inline void Execute(Vector& source, Vector& result, idx_t count, 268 | const s2geography::geoarrow::ImportOptions& options = 269 | s2geography::geoarrow::ImportOptions()) { 270 | s2geography::WKBReader reader(options); 271 | GeographyEncoder encoder; 272 | 273 | UnaryExecutor::Execute(source, result, count, [&](string_t wkb) { 274 | std::unique_ptr geog = 275 | reader.ReadFeature(std::string_view(wkb.GetData(), wkb.GetSize())); 276 | return StringVector::AddStringOrBlob(result, encoder.Encode(*geog)); 277 | }); 278 | } 279 | }; 280 | 281 | struct S2AsWKB { 282 | static void Register(DatabaseInstance& instance) { 283 | FunctionBuilder::RegisterScalar( 284 | instance, "s2_aswkb", [](ScalarFunctionBuilder& func) { 285 | func.AddVariant([](ScalarFunctionVariantBuilder& variant) { 286 | variant.AddParameter("geog", Types::GEOGRAPHY()); 287 | variant.SetReturnType(LogicalType::BLOB); 288 | variant.SetFunction(ExecuteFn); 289 | }); 290 | 291 | func.SetDescription(R"( 292 | Serialize a GEOGRAPHY as well-known binary (WKB). 293 | 294 | Note that because the internal representation of the GEOGRAPHY type is either 295 | an S2_CELL_CENTER or a unit vector, WKB typically does not roundtrip through a 296 | GEOGRAPHY. 297 | 298 | The output contains spherical edges. If edges are large and the consumer does 299 | not know that the edges are spherical, this may cause issues. 300 | )"); 301 | func.SetExample(R"( 302 | SELECT s2_aswkb(s2_data_city('Toronto')) as wkb; 303 | )"); 304 | 305 | func.SetTag("ext", "geography"); 306 | func.SetTag("category", "conversion"); 307 | }); 308 | } 309 | 310 | static inline void ExecuteFn(DataChunk& args, ExpressionState& state, Vector& result) { 311 | Execute(args.data[0], result, args.size()); 312 | } 313 | 314 | static inline void Execute(Vector& source, Vector& result, idx_t count, 315 | const s2geography::geoarrow::ExportOptions& options = 316 | s2geography::geoarrow::ExportOptions()) { 317 | GeographyDecoder decoder; 318 | s2geography::WKBWriter writer; 319 | 320 | UnaryExecutor::Execute(source, result, count, [&](string_t wkb) { 321 | std::unique_ptr geog = decoder.Decode(wkb); 322 | return StringVector::AddStringOrBlob(result, writer.WriteFeature(*geog)); 323 | }); 324 | } 325 | }; 326 | 327 | struct S2GeogPrepare { 328 | static void Register(DatabaseInstance& instance) { 329 | FunctionBuilder::RegisterScalar( 330 | instance, "s2_prepare", [](ScalarFunctionBuilder& func) { 331 | func.AddVariant([](ScalarFunctionVariantBuilder& variant) { 332 | variant.AddParameter("geog", Types::GEOGRAPHY()); 333 | variant.SetReturnType(Types::GEOGRAPHY()); 334 | variant.SetFunction(ExecuteFn); 335 | }); 336 | 337 | func.SetDescription( 338 | R"( 339 | Prepares a geography for faster predicate and overlay operations. 340 | 341 | For advanced users, this is useful for preparing input that will be subject 342 | to a large number of intersection or containment checks. This high level terms, 343 | this operation builds a cell-based index on the edges of the geography that 344 | would otherwise have to occur on every intersection check. 345 | 346 | This function returns its input for very small geographies (e.g., points) 347 | that do not benefit from this operation. 348 | )"); 349 | func.SetExample(R"( 350 | SELECT s2_prepare(s2_data_country('Fiji')); 351 | ---- 352 | CREATE TABLE countries AS 353 | SELECT name, s2_prepare(geog) as geog 354 | FROM s2_data_countries(); 355 | 356 | SELECT cities.name as city, countries.name as country 357 | FROM s2_data_cities() AS cities 358 | INNER JOIN countries ON s2_contains(countries.geog, cities.geog) 359 | LIMIT 5; 360 | )"); 361 | 362 | func.SetTag("ext", "geography"); 363 | func.SetTag("category", "conversion"); 364 | }); 365 | } 366 | 367 | static inline void ExecuteFn(DataChunk& args, ExpressionState& state, Vector& result) { 368 | Execute(args.data[0], result, args.size()); 369 | } 370 | 371 | static inline void Execute(Vector& source, Vector& result, idx_t count) { 372 | GeographyDecoder decoder; 373 | GeographyEncoder encoder; 374 | 375 | UnaryExecutor::Execute( 376 | source, result, count, [&](string_t geog_str) { 377 | decoder.DecodeTag(geog_str); 378 | 379 | // For small geographies or something that is already prepared, don't 380 | // trigger a new index. 64 bytes is arbitrary here (should be tuned). 381 | if (decoder.tag.kind == s2geography::GeographyKind::SHAPE_INDEX || 382 | geog_str.GetSize() < 64) { 383 | // Maybe a way to avoid copying geog_str? 384 | return StringVector::AddStringOrBlob(result, geog_str); 385 | } 386 | 387 | std::unique_ptr geog = decoder.Decode(geog_str); 388 | s2geography::ShapeIndexGeography index_geog(*geog); 389 | return StringVector::AddStringOrBlob(result, encoder.Encode(index_geog)); 390 | }); 391 | } 392 | }; 393 | 394 | void ImportWKBToGeography(Vector& source, Vector& result, idx_t count, 395 | const s2geography::geoarrow::ImportOptions& options) { 396 | S2GeogFromWKB::Execute(source, result, count, options); 397 | } 398 | 399 | void ExportGeographyToWKB(Vector& source, Vector& result, idx_t count, 400 | const s2geography::geoarrow::ExportOptions& options) { 401 | S2AsWKB::Execute(source, result, count, options); 402 | } 403 | 404 | void RegisterS2GeographyFunctionsIO(DatabaseInstance& instance) { 405 | S2GeogFromText::Register(instance); 406 | S2GeogFromWKB::Register(instance); 407 | S2AsText::Register(instance); 408 | S2AsWKB::Register(instance); 409 | S2GeogPrepare::Register(instance); 410 | } 411 | 412 | } // namespace duckdb_s2 413 | } // namespace duckdb 414 | -------------------------------------------------------------------------------- /src/s2_geoarrow.cpp: -------------------------------------------------------------------------------- 1 | #include "duckdb/common/arrow/arrow_converter.hpp" 2 | #include "duckdb/common/arrow/schema_metadata.hpp" 3 | #include "duckdb/function/table/arrow/arrow_duck_schema.hpp" 4 | #include "duckdb/function/table_function.hpp" 5 | #include "duckdb/main/database.hpp" 6 | #include "duckdb/main/extension_util.hpp" 7 | 8 | #include "geoarrow/geoarrow.hpp" 9 | 10 | #include "s2_functions_io.hpp" 11 | #include "s2_types.hpp" 12 | 13 | namespace duckdb { 14 | 15 | namespace duckdb_s2 { 16 | 17 | namespace { 18 | 19 | struct GeoArrowWKB { 20 | static unique_ptr GetType(const ArrowSchema& schema, 21 | const ArrowSchemaMetadata& schema_metadata) { 22 | // Validate extension metadata. This metadata also contains a CRS, which we drop 23 | // because the GEOGRAPHY type does not implement a CRS at the type level. 24 | string extension_metadata = 25 | schema_metadata.GetOption(ArrowSchemaMetadata::ARROW_METADATA_KEY); 26 | auto data_type = 27 | geoarrow::GeometryDataType::Make(GEOARROW_TYPE_WKB, extension_metadata); 28 | if (data_type.edge_type() != GEOARROW_EDGE_TYPE_SPHERICAL) { 29 | throw NotImplementedException("Can't import non-spherical edges as GEOGRAPHY"); 30 | } 31 | 32 | const auto format = string(schema.format); 33 | if (format == "z") { 34 | return make_uniq( 35 | Types::GEOGRAPHY(), make_uniq(ArrowVariableSizeType::NORMAL)); 36 | } else if (format == "Z") { 37 | return make_uniq( 38 | Types::GEOGRAPHY(), 39 | make_uniq(ArrowVariableSizeType::SUPER_SIZE)); 40 | } else if (format == "vz") { 41 | return make_uniq( 42 | Types::GEOGRAPHY(), make_uniq(ArrowVariableSizeType::VIEW)); 43 | } else { 44 | throw InvalidInputException( 45 | "Arrow storage type \"%s\" not supported for geoarrow.wkb", format.c_str()); 46 | } 47 | } 48 | 49 | static void PopulateSchema(DuckDBArrowSchemaHolder& root_holder, ArrowSchema& schema, 50 | const LogicalType& type, ClientContext& context, 51 | const ArrowTypeExtension& extension) { 52 | // Should really use WithCrsLonLat() here, but DuckDB itself chokes on non key/value 53 | // metadata https://github.com/duckdb/duckdb/issues/16321 54 | auto data_type = geoarrow::Wkb() 55 | .WithEdgeType(GEOARROW_EDGE_TYPE_SPHERICAL) 56 | .WithCrs("OGC:CRS84", GEOARROW_CRS_TYPE_AUTHORITY_CODE); 57 | 58 | ArrowSchemaMetadata schema_metadata; 59 | schema_metadata.AddOption(ArrowSchemaMetadata::ARROW_EXTENSION_NAME, 60 | data_type.extension_name()); 61 | schema_metadata.AddOption(ArrowSchemaMetadata::ARROW_METADATA_KEY, 62 | data_type.extension_metadata()); 63 | root_holder.metadata_info.emplace_back(schema_metadata.SerializeMetadata()); 64 | schema.metadata = root_holder.metadata_info.back().get(); 65 | 66 | const auto options = context.GetClientProperties(); 67 | if (options.arrow_offset_size == ArrowOffsetSize::LARGE) { 68 | schema.format = "Z"; 69 | } else { 70 | schema.format = "z"; 71 | } 72 | } 73 | 74 | static void ArrowToDuck(ClientContext& context, Vector& source, Vector& result, 75 | idx_t count) { 76 | s2geography::geoarrow::ImportOptions options; 77 | options.set_check(false); 78 | options.set_oriented(true); 79 | ImportWKBToGeography(source, result, count); 80 | } 81 | 82 | static void DuckToArrow(ClientContext& context, Vector& source, Vector& result, 83 | idx_t count) { 84 | ExportGeographyToWKB(source, result, count); 85 | } 86 | }; 87 | 88 | void RegisterArrowExtensions(DBConfig& config) { 89 | config.RegisterArrowExtension( 90 | {"geoarrow.wkb", GeoArrowWKB::PopulateSchema, GeoArrowWKB::GetType, 91 | make_shared_ptr(Types::GEOGRAPHY(), LogicalType::BLOB, 92 | GeoArrowWKB::ArrowToDuck, 93 | GeoArrowWKB::DuckToArrow)}); 94 | } 95 | 96 | class GeoArrowRegisterFunctionData final : public TableFunctionData { 97 | public: 98 | GeoArrowRegisterFunctionData() : finished(false) {} 99 | bool finished{false}; 100 | }; 101 | 102 | unique_ptr GeoArrowRegisterBind(ClientContext& context, 103 | TableFunctionBindInput& input, 104 | vector& return_types, 105 | vector& names) { 106 | names.push_back("registered"); 107 | return_types.push_back(LogicalType::BOOLEAN); 108 | return make_uniq(); 109 | } 110 | 111 | void GeoArrowRegisterScan(ClientContext& context, TableFunctionInput& data_p, 112 | DataChunk& output) { 113 | auto& data = data_p.bind_data->CastNoConst(); 114 | if (data.finished) { 115 | return; 116 | } 117 | 118 | DBConfig& config = DatabaseInstance::GetDatabase(context).config; 119 | if (config.HasArrowExtension(Types::GEOGRAPHY())) { 120 | output.SetValue(0, 0, false); 121 | } else { 122 | RegisterArrowExtensions(config); 123 | output.SetValue(0, 0, true); 124 | } 125 | 126 | output.SetCardinality(1); 127 | data.finished = true; 128 | } 129 | } // namespace 130 | 131 | void RegisterGeoArrowExtensions(DatabaseInstance& instance) { 132 | TableFunction register_func("s2_register_geoarrow_extensions", {}, GeoArrowRegisterScan, 133 | GeoArrowRegisterBind); 134 | ExtensionUtil::RegisterFunction(instance, register_func); 135 | } 136 | 137 | } // namespace duckdb_s2 138 | 139 | } // namespace duckdb 140 | -------------------------------------------------------------------------------- /src/s2_types.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "s2_types.hpp" 3 | 4 | #include "duckdb/common/types.hpp" 5 | #include "duckdb/main/database.hpp" 6 | #include "duckdb/main/extension_util.hpp" 7 | 8 | namespace duckdb { 9 | 10 | namespace duckdb_s2 { 11 | LogicalType Types::S2_CELL() { 12 | LogicalType type = LogicalType::UBIGINT; 13 | type.SetAlias("S2_CELL"); 14 | return type; 15 | } 16 | 17 | LogicalType Types::S2_CELL_UNION() { 18 | LogicalType type = LogicalType::LIST(S2_CELL()); 19 | type.SetAlias("S2_CELL_UNION"); 20 | return type; 21 | } 22 | 23 | LogicalType Types::S2_CELL_CENTER() { 24 | LogicalType type = LogicalType::UBIGINT; 25 | type.SetAlias("S2_CELL_CENTER"); 26 | return type; 27 | } 28 | 29 | LogicalType Types::GEOGRAPHY() { 30 | LogicalType type = LogicalType::BLOB; 31 | type.SetAlias("GEOGRAPHY"); 32 | return type; 33 | } 34 | 35 | LogicalType Types::S2_BOX() { 36 | LogicalType type = LogicalType::STRUCT({{"xmin", LogicalType::DOUBLE}, 37 | {"ymin", LogicalType::DOUBLE}, 38 | {"xmax", LogicalType::DOUBLE}, 39 | {"ymax", LogicalType::DOUBLE}}); 40 | type.SetAlias("S2_BOX"); 41 | return type; 42 | } 43 | 44 | void RegisterTypes(DatabaseInstance& instance) { 45 | ExtensionUtil::RegisterType(instance, "S2_CELL", Types::S2_CELL()); 46 | ExtensionUtil::RegisterType(instance, "S2_CELL_UNION", Types::S2_CELL_UNION()); 47 | ExtensionUtil::RegisterType(instance, "S2_CELL_CENTER", Types::S2_CELL_CENTER()); 48 | ExtensionUtil::RegisterType(instance, "GEOGRAPHY", Types::GEOGRAPHY()); 49 | ExtensionUtil::RegisterType(instance, "S2_BOX", Types::S2_BOX()); 50 | } 51 | 52 | } // namespace duckdb_s2 53 | } // namespace duckdb 54 | -------------------------------------------------------------------------------- /test/README.md: -------------------------------------------------------------------------------- 1 | # Testing this extension 2 | This directory contains all the tests for this extension. The `sql` directory holds tests that are written as [SQLLogicTests](https://duckdb.org/dev/sqllogictest/intro.html). DuckDB aims to have most its tests in this format as SQL statements, so for the quack extension, this should probably be the goal too. 3 | 4 | The root makefile contains targets to build and run all of these tests. To run the SQLLogicTests: 5 | ```bash 6 | make test 7 | ``` 8 | or 9 | ```bash 10 | make test_debug 11 | ``` 12 | -------------------------------------------------------------------------------- /test/python/conftest.py: -------------------------------------------------------------------------------- 1 | import glob 2 | from pathlib import Path 3 | 4 | import pytest 5 | import duckdb 6 | import warnings 7 | 8 | 9 | HERE = Path(__file__).parent 10 | 11 | 12 | def _install_dev_and_connect(): 13 | con = duckdb.connect(config={"allow_unsigned_extensions": True}) 14 | 15 | possible_builds = glob.glob( 16 | "build/**/geography/geography.duckdb_extension", 17 | recursive=True, 18 | root_dir=HERE.parent.parent, 19 | ) 20 | if possible_builds: 21 | con.install_extension(possible_builds[0], force_install=True) 22 | else: 23 | warnings.warn( 24 | "Can't find build directory for geography.duckdb_extension; skipping INSTALL" 25 | ) 26 | 27 | con.load_extension("geography") 28 | return con 29 | 30 | 31 | @pytest.fixture() 32 | def geoarrow_con(): 33 | con = _install_dev_and_connect() 34 | con.sql("""CALL s2_register_geoarrow_extensions()""") 35 | return con 36 | 37 | 38 | @pytest.fixture() 39 | def con(): 40 | return _install_dev_and_connect() 41 | -------------------------------------------------------------------------------- /test/python/requirements.txt: -------------------------------------------------------------------------------- 1 | duckdb 2 | pytest 3 | spherely 4 | numpy 5 | pyarrow 6 | -------------------------------------------------------------------------------- /test/python/test_geoarrow.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import json 3 | 4 | import pyarrow as pa 5 | import pytest 6 | import geoarrow.pyarrow as ga 7 | 8 | import duckdb 9 | 10 | HERE = Path(__file__).parent 11 | 12 | 13 | def test_export_without_register(con): 14 | tab = con.sql("""SELECT s2_geogfromtext('POINT (0 1)') as geom;""").to_arrow_table() 15 | assert tab.schema.field("geom").metadata is None 16 | 17 | 18 | def test_basic_export(geoarrow_con): 19 | tab = geoarrow_con.sql( 20 | """SELECT s2_geogfromtext('POINT (0 1)') as geom;""" 21 | ).to_arrow_table() 22 | 23 | pa_type = tab["geom"].type 24 | assert isinstance(pa_type, ga.GeometryExtensionType) 25 | assert pa_type._extension_name == "geoarrow.wkb" 26 | assert pa_type.edge_type == ga.EdgeType.SPHERICAL 27 | params = json.loads(pa_type.__arrow_ext_serialize__()) 28 | assert params["edges"] == "spherical" 29 | assert params["crs"] == "OGC:CRS84" 30 | 31 | 32 | def test_basic_import(geoarrow_con): 33 | field = pa.field( 34 | "geometry", 35 | pa.binary(), 36 | metadata={ 37 | "ARROW:extension:name": "geoarrow.wkb", 38 | "ARROW:extension:metadata": '{"edges": "spherical"}', 39 | }, 40 | ) 41 | point_wkb = ( 42 | b"\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00$@\x00\x00\x00\x00\x00\x004@" 43 | ) 44 | schema = pa.schema([field]) 45 | geo_table = pa.table( 46 | [pa.array([point_wkb])], 47 | schema=schema, 48 | ) 49 | 50 | tab = geoarrow_con.sql( 51 | """SELECT s2_astext(geometry) as wkt FROM geo_table;""" 52 | ).to_arrow_table() 53 | assert tab["wkt"].to_pylist() == ["POINT (10 20)"] 54 | 55 | 56 | def test_reject_planar_edges(geoarrow_con): 57 | # Empty metadata 58 | bad_metadata = { 59 | "ARROW:extension:name": "geoarrow.wkb", 60 | "ARROW:extension:metadata": "", 61 | } 62 | field = pa.field("geometry", pa.binary(), metadata=bad_metadata) 63 | geo_table = pa.table([pa.array([], pa.binary())], schema=pa.schema([field])) 64 | with pytest.raises( 65 | duckdb.NotImplementedException, 66 | match="Can't import non-spherical edges as GEOGRAPHY", 67 | ): 68 | geoarrow_con.sql("""SELECT * from geo_table""") 69 | 70 | 71 | def test_roundtrip_countries(geoarrow_con): 72 | countries_file = HERE.parent.parent / "data" / "countries.tsv" 73 | geo_table = geoarrow_con.sql( 74 | f"""SELECT s2_geogfromtext(geog) as geog1 FROM '{countries_file}'""" 75 | ).to_arrow_table() 76 | geo_table2 = geoarrow_con.sql( 77 | """SELECT geog1 as geog2 FROM geo_table""" 78 | ).to_arrow_table() 79 | 80 | table_both = pa.table( 81 | [geo_table["geog1"], geo_table2["geog2"]], 82 | schema=pa.schema([geo_table.schema.field(0), geo_table2.schema.field(0)]), 83 | ) 84 | areas_equal = geoarrow_con.sql( 85 | """SELECT sum(abs(s2_area(geog1) - s2_area(geog2)) < 0.1)::BIGINT AS sum_eq FROM table_both""" 86 | ).to_arrow_table() 87 | assert areas_equal == pa.table({"sum_eq": [len(table_both)]}) 88 | 89 | 90 | def test_spherely_interop(geoarrow_con): 91 | import spherely 92 | import numpy as np 93 | 94 | geo_table = geoarrow_con.sql( 95 | """SELECT geog, s2_area(geog) as area FROM s2_data_countries()""" 96 | ).to_arrow_table() 97 | geogs = spherely.from_geoarrow(geo_table["geog"].chunk(0)) 98 | np.testing.assert_array_almost_equal( 99 | spherely.area(geogs), geo_table["area"].to_numpy(), 100 | decimal=1 101 | ) 102 | -------------------------------------------------------------------------------- /test/sql/accessors.test: -------------------------------------------------------------------------------- 1 | # name: test/sql/accessors.test 2 | # description: test geography extension cell operations 3 | # group: [geography] 4 | 5 | # Require statement will ensure this test is run with this extension loaded 6 | require geography 7 | 8 | # IsEmpty 9 | query I 10 | SELECT s2_isempty('POINT EMPTY'::GEOGRAPHY) 11 | ---- 12 | true 13 | 14 | query I 15 | SELECT s2_isempty('POINT (0 1)'::GEOGRAPHY) 16 | ---- 17 | false 18 | 19 | # Validation 20 | query I 21 | SELECT s2_is_valid(s2_geogfromtext_novalidate('LINESTRING (0 0, 1 1)')); 22 | ---- 23 | true 24 | 25 | query I 26 | SELECT s2_is_valid(s2_geogfromtext_novalidate('LINESTRING (0 0, 0 0, 1 1)')); 27 | ---- 28 | false 29 | 30 | query I 31 | SELECT s2_is_valid_reason(s2_geogfromtext_novalidate('LINESTRING (0 0, 1 1)')) = ''; 32 | ---- 33 | true 34 | 35 | query I 36 | SELECT s2_is_valid_reason(s2_geogfromtext_novalidate('LINESTRING (0 0, 0 0, 1 1)')); 37 | ---- 38 | Vertices 0 and 1 are identical 39 | 40 | # Area 41 | query I 42 | SELECT s2_area('POINT EMPTY'::GEOGRAPHY) 43 | ---- 44 | 0 45 | 46 | query I 47 | SELECT s2_area('POINT (0 1)'::GEOGRAPHY) 48 | ---- 49 | 0 50 | 51 | query I 52 | SELECT s2_area('LINESTRING (0 1, 2 3)'::GEOGRAPHY) 53 | ---- 54 | 0 55 | 56 | query I 57 | SELECT s2_area('POLYGON ((0 0, 0 1, 1 0, 0 0))'::GEOGRAPHY).round() 58 | ---- 59 | 6182489131 60 | 61 | # Perimeter 62 | query I 63 | SELECT s2_perimeter('POINT EMPTY'::GEOGRAPHY) 64 | ---- 65 | 0 66 | 67 | query I 68 | SELECT s2_perimeter('POINT (0 1)'::GEOGRAPHY) 69 | ---- 70 | 0 71 | 72 | query I 73 | SELECT s2_perimeter('LINESTRING (0 1, 2 3)'::GEOGRAPHY) 74 | ---- 75 | 0 76 | 77 | query I 78 | SELECT s2_perimeter('POLYGON ((0 0, 0 1, 1 0, 0 0))'::GEOGRAPHY).round() 79 | ---- 80 | 379640 81 | 82 | # Length 83 | query I 84 | SELECT s2_length('POINT EMPTY'::GEOGRAPHY) 85 | ---- 86 | 0 87 | 88 | query I 89 | SELECT s2_length('POINT (0 1)'::GEOGRAPHY) 90 | ---- 91 | 0 92 | 93 | query I 94 | SELECT s2_length('POLYGON ((0 0, 0 1, 1 0, 0 0))'::GEOGRAPHY) 95 | ---- 96 | 0 97 | 98 | query I 99 | SELECT s2_length('LINESTRING (0 1, 2 3)'::GEOGRAPHY).round() 100 | ---- 101 | 314403 102 | 103 | query I 104 | SELECT s2_x('POINT EMPTY'::GEOGRAPHY) 105 | ---- 106 | NaN 107 | 108 | query I 109 | SELECT s2_x('LINESTRING (0 1, 1 2)'::GEOGRAPHY) 110 | ---- 111 | NaN 112 | 113 | query I 114 | SELECT s2_x('POINT (-64 45)'::GEOGRAPHY).round() 115 | ---- 116 | -64 117 | 118 | query I 119 | SELECT s2_y('POINT (-64 45)'::GEOGRAPHY).round() 120 | ---- 121 | 45 122 | 123 | query I 124 | SELECT s2_x('POINT (-64 45)'::GEOGRAPHY::S2_CELL_CENTER).round() 125 | ---- 126 | -64 127 | 128 | query I 129 | SELECT s2_y('POINT (-64 45)'::GEOGRAPHY::S2_CELL_CENTER).round() 130 | ---- 131 | 45 132 | 133 | # Dimension 134 | query I 135 | SELECT s2_dimension(s2_cellfromlonlat(-64, 45)::GEOGRAPHY); 136 | ---- 137 | 0 138 | 139 | query I 140 | SELECT s2_dimension('POINT (-64 45)'::GEOGRAPHY); 141 | ---- 142 | 0 143 | 144 | query I 145 | SELECT s2_dimension('LINESTRING (0 0, 1 1)'::GEOGRAPHY); 146 | ---- 147 | 1 148 | 149 | query I 150 | SELECT s2_dimension(s2_data_country('Canada')); 151 | ---- 152 | 2 153 | 154 | query I 155 | SELECT s2_dimension('GEOMETRYCOLLECTION EMPTY'); 156 | ---- 157 | -1 158 | 159 | query I 160 | SELECT s2_dimension('GEOMETRYCOLLECTION (POINT (0 1), LINESTRING (0 0, 1 1))'::GEOGRAPHY); 161 | ---- 162 | 1 163 | 164 | # Numpoints 165 | query I 166 | SELECT s2_num_points(s2_cellfromlonlat(-64, 45)::GEOGRAPHY); 167 | ---- 168 | 1 169 | 170 | query I 171 | SELECT s2_num_points('POINT (-64 45)'::GEOGRAPHY); 172 | ---- 173 | 1 174 | 175 | query I 176 | SELECT s2_num_points('LINESTRING (0 0, 1 1)'::GEOGRAPHY); 177 | ---- 178 | 2 179 | 180 | query I 181 | SELECT s2_num_points('GEOMETRYCOLLECTION EMPTY'); 182 | ---- 183 | 0 184 | -------------------------------------------------------------------------------- /test/sql/binary_index_ops.test: -------------------------------------------------------------------------------- 1 | # name: test/sql/predicates.test 2 | # description: test geography extension cell operations 3 | # group: [geography] 4 | 5 | # Require statement will ensure this test is run with this extension loaded 6 | require geography 7 | 8 | # Check that empties are never preselected for intersection 9 | query I 10 | SELECT s2_mayintersect('POINT (-64 45)'::GEOGRAPHY, 'POINT (-64 45)'::GEOGRAPHY); 11 | ---- 12 | true 13 | 14 | query I 15 | SELECT s2_mayintersect('POINT (-64 45)'::GEOGRAPHY, 'POINT (-64 46)'::GEOGRAPHY); 16 | ---- 17 | false 18 | 19 | query I 20 | SELECT s2_mayintersect('POINT (-64 45)'::GEOGRAPHY, 'POINT EMPTY'::GEOGRAPHY); 21 | ---- 22 | false 23 | 24 | query I 25 | SELECT s2_mayintersect('POINT EMPTY'::GEOGRAPHY, 'POINT (-64 46)'::GEOGRAPHY); 26 | ---- 27 | false 28 | 29 | # Check that a true intersection can be detected 30 | query I 31 | SELECT s2_intersects(s2_data_city('Vancouver'), s2_data_country('Canada')); 32 | ---- 33 | true 34 | 35 | # Check that even when an item is preselected, the predicate is evaluated 36 | query I 37 | SELECT s2_mayintersect(s2_data_city('Chicago'), s2_data_country('Canada')); 38 | ---- 39 | true 40 | 41 | query I 42 | SELECT s2_intersects(s2_data_city('Chicago'), s2_data_country('Canada')); 43 | ---- 44 | false 45 | 46 | # Check argument order for containment 47 | # (point doesn't contain a polygon but polygon contains point) 48 | query I 49 | SELECT s2_contains(s2_data_city('Toronto'), s2_data_country('Canada')); 50 | ---- 51 | false 52 | 53 | query I 54 | SELECT s2_contains(s2_data_country('Canada'), s2_data_city('Toronto')); 55 | ---- 56 | true 57 | 58 | # Check equals operator 59 | query I 60 | SELECT s2_equals(s2_data_country('Canada'), s2_data_country('Canada')); 61 | ---- 62 | true 63 | 64 | # Intersection of two definitely disjoint geographies 65 | query I 66 | SELECT s2_intersection('POINT (-64 45)'::GEOGRAPHY, 'POINT (-64 46)'::GEOGRAPHY); 67 | ---- 68 | GEOMETRYCOLLECTION EMPTY 69 | 70 | # Intersection where rhs is EMPTY 71 | query I 72 | SELECT s2_intersection('POINT (-64 45)'::GEOGRAPHY, 'POINT EMPTY'::GEOGRAPHY).s2_format(6); 73 | ---- 74 | POINT (-64 45) 75 | 76 | # Intersection where lhs is EMPTY 77 | query I 78 | SELECT s2_intersection('POINT EMPTY'::GEOGRAPHY, 'POINT (-64 45)'::GEOGRAPHY).s2_format(6); 79 | ---- 80 | POINT (-64 45) 81 | 82 | # Intersection that must go through the S2BooleanOperation 83 | query I 84 | SELECT s2_intersection('POINT (-64 45)'::GEOGRAPHY, 'POINT (-64 45)'::GEOGRAPHY).s2_format(6); 85 | ---- 86 | POINT (-64 45) 87 | 88 | # Difference of two definitely disjoint geographies 89 | query I 90 | SELECT s2_difference('POINT (-64 45)'::GEOGRAPHY, 'POINT (-64 46)'::GEOGRAPHY).s2_format(6); 91 | ---- 92 | POINT (-64 45) 93 | 94 | # Difference where rhs is EMPTY 95 | query I 96 | SELECT s2_difference('POINT (-64 45)'::GEOGRAPHY, 'POINT EMPTY'::GEOGRAPHY).s2_format(6); 97 | ---- 98 | POINT (-64 45) 99 | 100 | # Difference where lhs is EMPTY 101 | query I 102 | SELECT s2_difference('POINT EMPTY'::GEOGRAPHY, 'POINT (-64 45)'::GEOGRAPHY); 103 | ---- 104 | GEOMETRYCOLLECTION EMPTY 105 | 106 | # Difference that must go through the S2BooleanOperation 107 | query I 108 | SELECT s2_difference('POINT (-64 45)'::GEOGRAPHY, 'POINT (-64 45)'::GEOGRAPHY).s2_format(6); 109 | ---- 110 | GEOMETRYCOLLECTION EMPTY 111 | 112 | # Union where rhs is EMPTY 113 | query I 114 | SELECT s2_union('POINT (-64 45)'::GEOGRAPHY, 'POINT EMPTY'::GEOGRAPHY).s2_format(6); 115 | ---- 116 | POINT (-64 45) 117 | 118 | # Union where lhs is EMPTY 119 | query I 120 | SELECT s2_union('POINT EMPTY'::GEOGRAPHY, 'POINT (-64 45)'::GEOGRAPHY).s2_format(6); 121 | ---- 122 | POINT (-64 45) 123 | 124 | # Union that must go through the S2BooleanOperation 125 | query I 126 | SELECT s2_union('POINT (-64 45)'::GEOGRAPHY, 'POINT (-64 46)'::GEOGRAPHY).s2_format(6); 127 | ---- 128 | MULTIPOINT ((-64 45), (-64 46)) 129 | 130 | # Within Distance 131 | 132 | # Normal (via S2ShapeIndex) 133 | query I 134 | SELECT s2_dwithin(s2_data_city('Vancouver'), s2_data_city('Toronto'), 3000000); 135 | ---- 136 | false 137 | 138 | query I 139 | SELECT s2_dwithin(s2_data_city('Vancouver'), s2_data_city('Toronto'), 4000000); 140 | ---- 141 | true 142 | 143 | # Points snapped to cell centers 144 | query I 145 | SELECT s2_dwithin(s2_data_city('Vancouver')::S2_CELL_CENTER, s2_data_city('Toronto')::S2_CELL_CENTER, 3000000); 146 | ---- 147 | false 148 | 149 | query I 150 | SELECT s2_dwithin(s2_data_city('Vancouver')::S2_CELL_CENTER, s2_data_city('Toronto')::S2_CELL_CENTER, 4000000); 151 | ---- 152 | true 153 | 154 | # Empty LHS 155 | query I 156 | SELECT s2_dwithin('POINT EMPTY'::GEOGRAPHY, s2_data_city('Toronto'), 'inf'); 157 | ---- 158 | false 159 | 160 | # Empty RHS 161 | query I 162 | SELECT s2_dwithin(s2_data_city('Toronto'), 'POINT EMPTY'::GEOGRAPHY, 'inf'); 163 | ---- 164 | false 165 | 166 | # Distance 167 | 168 | # Normal (via S2ShapeIndex) 169 | query I 170 | SELECT s2_distance(s2_data_city('Vancouver'), s2_data_city('Toronto')); 171 | ---- 172 | 3354018.3461295413 173 | 174 | # Points snapped to cell centers 175 | query I 176 | SELECT s2_distance(s2_data_city('Vancouver')::S2_CELL_CENTER, s2_data_city('Toronto')::S2_CELL_CENTER); 177 | ---- 178 | 3354018.3501422736 179 | 180 | # Empty LHS 181 | query I 182 | SELECT s2_distance('POINT EMPTY'::GEOGRAPHY, s2_data_city('Toronto')); 183 | ---- 184 | inf 185 | 186 | # Empty RHS 187 | query I 188 | SELECT s2_distance(s2_data_city('Toronto'), 'POINT EMPTY'::GEOGRAPHY); 189 | ---- 190 | inf 191 | 192 | # Max Distance 193 | 194 | # Normal (via S2ShapeIndex) 195 | query I 196 | SELECT s2_max_distance(s2_data_city('Vancouver')::S2_CELL_CENTER, s2_data_city('Toronto')); 197 | ---- 198 | 3354018.3461295413 199 | 200 | # Points snapped to cell centers 201 | query I 202 | SELECT s2_max_distance(s2_data_city('Vancouver')::S2_CELL_CENTER, s2_data_city('Toronto')::S2_CELL_CENTER); 203 | ---- 204 | 3354018.3501422736 205 | 206 | # Empty LHS 207 | query I 208 | SELECT s2_max_distance('POINT EMPTY'::GEOGRAPHY, s2_data_city('Toronto')); 209 | ---- 210 | inf 211 | 212 | # Empty RHS 213 | query I 214 | SELECT s2_max_distance(s2_data_city('Toronto'), 'POINT EMPTY'::GEOGRAPHY); 215 | ---- 216 | inf 217 | 218 | # Make sure distance and max_distance are different 219 | query I 220 | SELECT s2_max_distance(s2_data_city('Vancouver'), s2_data_country('Fiji')) > s2_distance(s2_data_city('Vancouver'), s2_data_country('Fiji')) 221 | ---- 222 | true 223 | -------------------------------------------------------------------------------- /test/sql/bounds.test: -------------------------------------------------------------------------------- 1 | # name: test/sql/bounds.test 2 | # description: test geography extension cell operations 3 | # group: [geography] 4 | 5 | # Require statement will ensure this test is run with this extension loaded 6 | require geography 7 | 8 | query I 9 | SELECT s2_covering(s2_data_country('Fiji')); 10 | ---- 11 | [3/13002011, 3/1300232, 3/130030, 3/130031, 3/130033, 3/130100, 3/2032333, 3/20330000000] 12 | 13 | query I 14 | SELECT s2_covering_fixed_level(s2_data_country('Fiji'), 5); 15 | ---- 16 | [3/13002, 3/13003, 3/13010, 3/20323, 3/20330] 17 | 18 | # Check optimization for the cell center geography 19 | query I 20 | SELECT s2_covering('POINT (-64 45)'::GEOGRAPHY::S2_CELL_CENTER); 21 | ---- 22 | [2/112230310012123001312232330210] 23 | 24 | query I 25 | SELECT s2_covering_fixed_level('POINT (-64 45)'::GEOGRAPHY::S2_CELL_CENTER, 5); 26 | ---- 27 | [2/11223] 28 | 29 | statement error 30 | SELECT s2_covering_fixed_level(geog, UNNEST([1, 2])) from s2_data_countries(); 31 | ---- 32 | Invalid Input Error: s2_covering_fixed_level(): level must be a constant 33 | 34 | # s2_bounds_box() 35 | # Check empty input optimization 36 | query I 37 | SELECT s2_bounds_box('POINT EMPTY'::GEOGRAPHY); 38 | ---- 39 | {'xmin': 180.0, 'ymin': 57.29577951308232, 'xmax': -180.0, 'ymax': 0.0} 40 | 41 | # Check point-as-cell-center optimization 42 | query I 43 | SELECT s2_cellfromlonlat(-64, 45).s2_bounds_box(); 44 | ---- 45 | {'xmin': -63.99999997805, 'ymin': 45.0000000116166, 'xmax': -63.99999997805, 'ymax': 45.0000000116166} 46 | 47 | # Check normal encoded geography 48 | query I 49 | SELECT s2_bounds_box('MULTIPOINT (0 1, 2 3)'::GEOGRAPHY); 50 | ---- 51 | {'xmin': 0.0, 'ymin': 1.0, 'xmax': 1.9999999999999996, 'ymax': 3.0000000000000004} 52 | 53 | # s2_bounds_box_agg() 54 | 55 | # Check empty input optimization 56 | query I 57 | SELECT s2_bounds_box_agg('POINT EMPTY'::GEOGRAPHY); 58 | ---- 59 | {'xmin': 180.0, 'ymin': 57.29577951308232, 'xmax': -180.0, 'ymax': 0.0} 60 | 61 | # Check point-as-cell-center optimization 62 | query I 63 | SELECT s2_bounds_box_agg(s2_cellfromlonlat(-64, 45)); 64 | ---- 65 | {'xmin': -63.99999997805, 'ymin': 45.0000000116166, 'xmax': -63.99999997805, 'ymax': 45.0000000116166} 66 | 67 | # Check normal encoded geography 68 | query I 69 | SELECT s2_bounds_box_agg('MULTIPOINT (0 1, 2 3)'::GEOGRAPHY); 70 | ---- 71 | {'xmin': 0.0, 'ymin': 1.0, 'xmax': 1.9999999999999996, 'ymax': 3.0000000000000004} 72 | 73 | # With some actual aggregation 74 | query I 75 | SELECT s2_bounds_box_agg(geog) FROM s2_data_cities(); 76 | ---- 77 | {'xmin': -123.12359, 'ymin': -41.29998789999999, 'xmax': -171.738642, 'ymax': 64.1500236} 78 | 79 | query I 80 | SELECT s2_bounds_box_agg(geog) FROM s2_data_countries(); 81 | ---- 82 | {'xmin': -180.0, 'ymin': -90.0, 'xmax': 180.0, 'ymax': 83.64513000000002} 83 | 84 | 85 | # Test the box exporters 86 | query I 87 | SELECT s2_bounds_box(s2_data_country('Germany')).s2_box_wkb().s2_geogfromwkb().s2_format(4); 88 | ---- 89 | POLYGON ((5.9887 47.3025, 15.017 47.3025, 15.017 54.9831, 5.9887 54.9831, 5.9887 47.3025)) 90 | 91 | query I 92 | SELECT s2_bounds_box(s2_data_country('Fiji')).s2_box_wkb().s2_geogfromwkb().s2_format(4); 93 | ---- 94 | MULTIPOLYGON (((177.285 -18.288, 180 -18.288, 180 -16.0209, 177.285 -16.0209, 177.285 -18.288)), ((-180 -18.288, -179.7933 -18.288, -179.7933 -16.0209, -180 -16.0209, -180 -18.288))) 95 | 96 | query I 97 | SELECT s2_bounds_box(s2_data_country('Germany')).s2_box_struct(); 98 | ---- 99 | {'xmin': 5.988658, 'ymin': 47.30248799999997, 'xmax': 15.016996000000002, 'ymax': 54.983104000000026} 100 | 101 | # Test box constructor 102 | query I 103 | SELECT s2_box(5.989, 47.302, 15.017, 54.983); 104 | ---- 105 | {'xmin': 5.989, 'ymin': 47.302, 'xmax': 15.017, 'ymax': 54.983} 106 | 107 | # s2_box_intersects() 108 | query I 109 | SELECT s2_box_intersects(s2_bounds_box(s2_data_country('Germany')), s2_bounds_box(s2_data_country('France'))); 110 | ---- 111 | true 112 | 113 | query I 114 | SELECT s2_box_intersects(s2_bounds_box(s2_data_country('Germany')), s2_bounds_box(s2_data_country('Canada'))); 115 | ---- 116 | false 117 | 118 | # s2_box_union() 119 | query I 120 | SELECT s2_box_union(s2_box(0, 1, 2, 3), s2_box(4, 5, 6, 7)); 121 | ---- 122 | {'xmin': 0.0, 'ymin': 1.0, 'xmax': 6.000000000000001, 'ymax': 7.0} 123 | 124 | query I 125 | SELECT s2_box_union(s2_box(179, 1, 180, 3), s2_box(-180, 5, -179, 7)); 126 | ---- 127 | {'xmin': 179.0, 'ymin': 1.0, 'xmax': -179.0, 'ymax': 7.0} 128 | -------------------------------------------------------------------------------- /test/sql/cell_ops.test: -------------------------------------------------------------------------------- 1 | # name: test/sql/cell_ops.test 2 | # description: test geography extension cell operations 3 | # group: [geography] 4 | 5 | # Require statement will ensure this test is run with this extension loaded 6 | require geography 7 | 8 | # Cell/center casts 9 | query I 10 | SELECT '2/112230310012123001312232330210'::S2_CELL_CENTER::S2_CELL; 11 | ---- 12 | 2/112230310012123001312232330210 13 | 14 | query I 15 | SELECT '2/112230310012123001312232330210'::S2_CELL::S2_CELL_CENTER; 16 | ---- 17 | 2/112230310012123001312232330210 18 | 19 | query I 20 | SELECT 'POINT (-64 45)'::GEOGRAPHY::S2_CELL_CENTER; 21 | ---- 22 | 2/112230310012123001312232330210 23 | 24 | # Cell union casts 25 | query I 26 | SELECT '2/'::S2_CELL::S2_CELL_UNION 27 | ---- 28 | [2/] 29 | 30 | query I 31 | SELECT 'invalid'::S2_CELL::S2_CELL_UNION 32 | ---- 33 | [] 34 | 35 | # Geography casts 36 | query I 37 | SELECT ('2/112230310012123001312232330210'::S2_CELL_CENTER::GEOGRAPHY).s2_format(6); 38 | ---- 39 | POINT (-64 45) 40 | 41 | # Should this be NULL or error? 42 | query I 43 | SELECT 'not valid'::S2_CELL_CENTER::GEOGRAPHY; 44 | ---- 45 | POINT EMPTY 46 | 47 | query I 48 | SELECT 'POINT (-64 45)'::GEOGRAPHY::S2_CELL_CENTER; 49 | ---- 50 | 2/112230310012123001312232330210 51 | 52 | statement error 53 | SELECT 'LINESTRING (-64 45, 0 0)'::GEOGRAPHY::S2_CELL_CENTER::VARCHAR; 54 | ---- 55 | Invalid Input Error: Can't convert geography that is not empty nor a single point to S2_CELL_CENTER 56 | 57 | query I 58 | SELECT ('2/0'::S2_CELL::S2_CELL_UNION::GEOGRAPHY).s2_format(6); 59 | ---- 60 | POLYGON ((45 35.26439, 90 45, 0 90, 0 45, 45 35.26439)) 61 | 62 | # Cell union from storage gets sorted on the way in 63 | 64 | # (Except this cast, which isn't called even though it's regiestered) 65 | #query I 66 | #SELECT ['1/'::S2_CELL, '0/'::S2_CELL]::S2_CELL_UNION; 67 | #---- 68 | #[0/, 1/] 69 | 70 | query I 71 | SELECT ['1/'::S2_CELL::UBIGINT, '0/'::S2_CELL::UBIGINT]::S2_CELL_UNION; 72 | ---- 73 | [0/, 1/] 74 | 75 | query I 76 | SELECT ['1/'::S2_CELL::BIGINT, '0/'::S2_CELL::BIGINT]::S2_CELL_UNION; 77 | ---- 78 | [0/, 1/] 79 | 80 | 81 | # cell from lon/lat 82 | query I 83 | SELECT s2_cellfromlonlat(-64, 45) 84 | ---- 85 | 2/112230310012123001312232330210 86 | 87 | query I 88 | SELECT s2_cellfromlonlat('NaN', 'NaN') 89 | ---- 90 | Invalid: ffffffffffffffff 91 | 92 | # Special-cased WKB reader for cell center (should work for anything 93 | # with exactly one valid point) 94 | query I 95 | SELECT s2_aswkb('POINT (-64 45)'::GEOGRAPHY).s2_cellfromwkb() 96 | ---- 97 | 2/112230310012123001312232330210 98 | 99 | query I 100 | SELECT s2_aswkb('GEOMETRYCOLLECTION(POINT (-64 45))'::GEOGRAPHY).s2_cellfromwkb() 101 | ---- 102 | 2/112230310012123001312232330210 103 | 104 | query I 105 | SELECT s2_aswkb('GEOMETRYCOLLECTION(POLYGON EMPTY, POINT (-64 45))'::GEOGRAPHY).s2_cellfromwkb() 106 | ---- 107 | 2/112230310012123001312232330210 108 | 109 | # Also should work for empty things 110 | query I 111 | SELECT s2_aswkb('POINT EMPTY'::GEOGRAPHY).s2_cellfromwkb() 112 | ---- 113 | Invalid: ffffffffffffffff 114 | 115 | query I 116 | SELECT s2_aswkb('LINESTRING EMPTY'::GEOGRAPHY).s2_cellfromwkb() 117 | ---- 118 | Invalid: ffffffffffffffff 119 | 120 | query I 121 | SELECT s2_aswkb('POLYGON EMPTY'::GEOGRAPHY).s2_cellfromwkb() 122 | ---- 123 | Invalid: ffffffffffffffff 124 | 125 | query I 126 | SELECT s2_aswkb('GEOMETRYCOLLECTION (POINT EMPTY, LINESTRING EMPTY, POLYGON EMPTY)'::GEOGRAPHY).s2_cellfromwkb() 127 | ---- 128 | Invalid: ffffffffffffffff 129 | 130 | # ..but error for non-things that aren't exactly one point 131 | statement error 132 | SELECT s2_aswkb('MULTIPOINT (-64 45, 0 0)'::GEOGRAPHY).s2_cellfromwkb() 133 | ---- 134 | Invalid Input Error: Can't parse WKB with more than one point to S2_CELL_CENTER 135 | 136 | statement error 137 | SELECT s2_aswkb('LINESTRING (-64 45, 0 0)'::GEOGRAPHY).s2_cellfromwkb() 138 | ---- 139 | Invalid Input Error: Can't parse WKB with non-point input to S2_CELL_CENTER 140 | 141 | # Getting an arbitrary cell from WKB input should work for any type but 142 | # never parse past the first point 143 | query I 144 | SELECT s2_aswkb('LINESTRING (-64 45, 0 0)'::GEOGRAPHY).s2_arbitrarycellfromwkb() 145 | ---- 146 | 2/112230310012123001312232330210 147 | 148 | query I 149 | SELECT s2_aswkb('POLYGON ((-64 45, 0 0, 0 1, 0 2))'::GEOGRAPHY).s2_arbitrarycellfromwkb() 150 | ---- 151 | 2/112230310012123001312232330210 152 | 153 | query I 154 | SELECT s2_aswkb('GEOMETRYCOLLECTION(POLYGON EMPTY, LINESTRING (-64 45, 0 0))'::GEOGRAPHY).s2_arbitrarycellfromwkb() 155 | ---- 156 | 2/112230310012123001312232330210 157 | 158 | query I 159 | SELECT s2_aswkb('GEOMETRYCOLLECTION (POINT EMPTY, LINESTRING EMPTY, POLYGON EMPTY)'::GEOGRAPHY).s2_arbitrarycellfromwkb() 160 | ---- 161 | Invalid: ffffffffffffffff 162 | 163 | # Cast cell to geography 164 | query I 165 | SELECT ('2/1122303'::S2_CELL::GEOGRAPHY).s2_format(6); 166 | ---- 167 | POLYGON ((-63.470273 44.898834, -64.286611 45.098989, -64.779066 44.581998, -63.973842 44.388923, -63.470273 44.898834)) 168 | 169 | # Cell token string representation 170 | query I 171 | SELECT s2_cell_token('2/112230310012123001312232330210'::S2_CELL); 172 | ---- 173 | 4b59a0cd83b5de49 174 | 175 | query I 176 | SELECT s2_cell_from_token('4b59a0cd83b5de49'); 177 | ---- 178 | 2/112230310012123001312232330210 179 | 180 | # Cell level 181 | # Should this be NULL or error? 182 | query I 183 | SELECT s2_cell_from_token('foofy')::VARCHAR; 184 | ---- 185 | Invalid: 0000000000000000 186 | 187 | # Cell ops 188 | query I 189 | SELECT s2_cell_level('2/112230310012123001312232330210'::S2_CELL); 190 | ---- 191 | 30 192 | 193 | # Not sure if this should error or not (ffffffffffffffff can be used in indexes) 194 | query I 195 | SELECT s2_cell_level('not valid'::S2_CELL); 196 | ---- 197 | -1 198 | 199 | # Cell parent/child/neighbor 200 | query I 201 | SELECT ('2/112230310012123001312232330210'::S2_CELL).s2_cell_parent(0) 202 | ---- 203 | 2/ 204 | 205 | query I 206 | SELECT ('2/112230310012123001312232330210'::S2_CELL).s2_cell_parent(-30) 207 | ---- 208 | 2/ 209 | 210 | query I 211 | SELECT ('2/'::S2_CELL).s2_cell_parent(1); 212 | ---- 213 | Invalid: ffffffffffffffff 214 | 215 | query I 216 | SELECT ('2/'::S2_CELL).s2_cell_child(0); 217 | ---- 218 | 2/0 219 | 220 | query I 221 | SELECT ('2/'::S2_CELL).s2_cell_child(-1); 222 | ---- 223 | Invalid: ffffffffffffffff 224 | 225 | query I 226 | SELECT ('2/'::S2_CELL).s2_cell_edge_neighbor(0); 227 | ---- 228 | 1/ 229 | 230 | query I 231 | SELECT ('2/'::S2_CELL).s2_cell_edge_neighbor(-1); 232 | ---- 233 | Invalid: ffffffffffffffff 234 | 235 | # Min/max ranges 236 | query I 237 | SELECT ('2/'::S2_CELL).s2_cell_range_min() 238 | ---- 239 | 2/000000000000000000000000000000 240 | 241 | query I 242 | SELECT ('2/'::S2_CELL).s2_cell_range_max() 243 | ---- 244 | 2/333333333333333333333333333333 245 | 246 | query I 247 | SELECT ('foofy'::S2_CELL).s2_cell_range_min() 248 | ---- 249 | Invalid: ffffffffffffffff 250 | 251 | query I 252 | SELECT ('foofy'::S2_CELL).s2_cell_range_max() 253 | ---- 254 | Invalid: ffffffffffffffff 255 | 256 | # Cell predicates 257 | query I 258 | SELECT s2_cell_contains('2/'::S2_CELL, '2/0'::S2_CELL); 259 | ---- 260 | true 261 | 262 | query I 263 | SELECT s2_cell_contains('2/0'::S2_CELL, '2/'::S2_CELL); 264 | ---- 265 | false 266 | 267 | query I 268 | SELECT s2_cell_contains('invalid'::S2_CELL, '2/'::S2_CELL); 269 | ---- 270 | false 271 | 272 | query I 273 | SELECT s2_cell_contains('2/'::S2_CELL, 'invalid'::S2_CELL); 274 | ---- 275 | false 276 | 277 | query I 278 | SELECT s2_cell_intersects('2/'::S2_CELL, '2/0'::S2_CELL); 279 | ---- 280 | true 281 | 282 | query I 283 | SELECT s2_cell_intersects('2/0'::S2_CELL, '2/'::S2_CELL); 284 | ---- 285 | true 286 | 287 | query I 288 | SELECT s2_cell_intersects('2/'::S2_CELL, '3/'::S2_CELL); 289 | ---- 290 | false 291 | 292 | query I 293 | SELECT s2_cell_intersects('invalid'::S2_CELL, '2/'::S2_CELL); 294 | ---- 295 | false 296 | 297 | query I 298 | SELECT s2_cell_intersects('2/'::S2_CELL, 'invalid'::S2_CELL); 299 | ---- 300 | false 301 | 302 | # Sanity check of a few of these concept using the example data 303 | query I 304 | SELECT sum((s2_cellfromlonlat(s2_x(geog), s2_y(geog))::S2_CELL).s2_intersects(geog)::INTEGER) FROM s2_data_cities(); 305 | ---- 306 | 243 307 | 308 | query I 309 | SELECT sum((s2_arbitrarycellfromwkb(geog.s2_aswkb())::S2_CELL).s2_intersects(geog)::INTEGER) FROM s2_data_countries(); 310 | ---- 311 | 177 312 | 313 | query I 314 | SELECT sum((s2_cellfromwkb(geog.s2_aswkb())::S2_CELL).s2_intersects(geog)::INTEGER) FROM s2_data_cities(); 315 | ---- 316 | 243 317 | -------------------------------------------------------------------------------- /test/sql/data.test: -------------------------------------------------------------------------------- 1 | # name: test/sql/functions_io.test 2 | # description: test geography extension cell operations 3 | # group: [geography] 4 | 5 | # Require statement will ensure this test is run with this extension loaded 6 | require geography 7 | 8 | statement error 9 | SELECT s2_data_city('Halifax'); 10 | ---- 11 | Invalid Input Error: No entry for item 'Halifax' 12 | 13 | query I 14 | SELECT s2_data_city('Vancouver').s2_format(6); 15 | ---- 16 | POINT (-123.12359 49.275362) 17 | 18 | # Check that the content of cities.tsv is the same as the content of s2_data_cities() 19 | statement ok 20 | CREATE TABLE cities_tsv AS SELECT name, population, geog::GEOGRAPHY as geog FROM '__WORKING_DIRECTORY__/data/cities.tsv'; 21 | 22 | query I 23 | SELECT count(*) from cities_tsv; 24 | ---- 25 | 243 26 | 27 | query I 28 | SELECT sum((cities_tsv.geog.s2_format(9) = cities.geog.s2_format(9))::INTEGER) FROM cities_tsv INNER JOIN s2_data_cities() as cities ON cities_tsv.name = cities.name; 29 | ---- 30 | 243 31 | 32 | # Check that the content of countries.tsv is the same as the content of s2_data_countries() 33 | statement ok 34 | CREATE TABLE countries_tsv AS SELECT name, continent, geog::GEOGRAPHY as geog FROM '__WORKING_DIRECTORY__/data/countries.tsv'; 35 | 36 | query I 37 | SELECT count(*) from countries_tsv; 38 | ---- 39 | 177 40 | 41 | query I 42 | SELECT sum((countries_tsv.geog.s2_format(9) = countries.geog.s2_format(9))::INTEGER) FROM countries_tsv INNER JOIN s2_data_countries() as countries ON countries_tsv.name = countries.name; 43 | ---- 44 | 177 45 | -------------------------------------------------------------------------------- /test/sql/extension.test: -------------------------------------------------------------------------------- 1 | # name: test/sql/geography.test 2 | # description: test geography extension 3 | # group: [geography] 4 | 5 | # Before we load the extension, this will fail 6 | statement error 7 | SELECT * from s2_dependencies() 8 | ---- 9 | Catalog Error: Table Function with name s2_dependencies does not exist! 10 | 11 | # Require statement will ensure this test is run with this extension loaded 12 | require geography 13 | 14 | query I 15 | SELECT dependency FROM s2_dependencies() ORDER BY dependency 16 | ---- 17 | abseil-cpp 18 | geoarrow 19 | nanoarrow 20 | openssl 21 | s2geometry 22 | -------------------------------------------------------------------------------- /test/sql/functions_io.test: -------------------------------------------------------------------------------- 1 | # name: test/sql/functions_io.test 2 | # description: test geography extension cell operations 3 | # group: [geography] 4 | 5 | # Require statement will ensure this test is run with this extension loaded 6 | require geography 7 | 8 | # WKT roundtrip 9 | query I 10 | SELECT ('POINT (-64 45)'::GEOGRAPHY).s2_format(6) 11 | ---- 12 | POINT (-64 45) 13 | 14 | query I 15 | SELECT s2_geogfromtext('POINT (-64 45)').s2_format(6) 16 | ---- 17 | POINT (-64 45) 18 | 19 | query I 20 | SELECT s2_geogfromtext_novalidate('LINESTRING (0 0, 0 0, 1 1)').s2_format(6) 21 | ---- 22 | LINESTRING (0 0, 0 0, 1 1) 23 | 24 | # WKB parse 25 | query I 26 | SELECT s2_geogfromwkb('\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x3e\x40\x00\x00\x00\x00\x00\x00\x24\x40').s2_format(6); 27 | ---- 28 | POINT (30 10) 29 | 30 | # WKB roundtrip 31 | query I 32 | SELECT ('POINT (-64 45)'::GEOGRAPHY).s2_aswkb().s2_geogfromwkb().s2_format(6) 33 | ---- 34 | POINT (-64 45) 35 | 36 | # Read WKB skipping validation 37 | query I 38 | SELECT s2_geogfromwkb_novalidate(s2_geogfromtext_novalidate('LINESTRING (0 0, 0 0, 1 1)').s2_aswkb()).s2_format(6); 39 | ---- 40 | LINESTRING (0 0, 0 0, 1 1) 41 | 42 | # Prepare of small geographies doesn't trigger an index 43 | query I 44 | SELECT ('POINT (30 10)'::GEOGRAPHY).s2_prepare().s2_format(6); 45 | ---- 46 | POINT (30 10) 47 | 48 | # Prepare of bigger geographies should trigger an index 49 | query I 50 | SELECT ('LINESTRING (0 0, 1 1, 2 2, 3 3, 4 4)'::GEOGRAPHY).s2_prepare(); 51 | ---- 52 | 53 | -------------------------------------------------------------------------------- /test/sql/geoarrow.test: -------------------------------------------------------------------------------- 1 | require geography 2 | 3 | query I 4 | SELECT * FROM s2_register_geoarrow_extensions(); 5 | ---- 6 | true 7 | -------------------------------------------------------------------------------- /test_local.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Cheap version of make test that works with cmake 4 | SOURCE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" 5 | SOURCE_DIR_NAME="$(basename "${SOURCE_DIR}")" 6 | 7 | "$SOURCE_DIR/build/test/unittest" "*/${SOURCE_DIR_NAME}/*" 8 | -------------------------------------------------------------------------------- /vcpkg.json: -------------------------------------------------------------------------------- 1 | { 2 | "dependencies": [ 3 | "openssl", 4 | {"name": "abseil", "features": ["cxx17"]} 5 | ], 6 | "vcpkg-configuration": { 7 | "overlay-ports": [ 8 | "./vcpkg_ports" 9 | ] 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /vcpkg_ports/abseil/portfile.cmake: -------------------------------------------------------------------------------- 1 | if(NOT VCPKG_TARGET_IS_WINDOWS) 2 | vcpkg_check_linkage(ONLY_STATIC_LIBRARY) 3 | endif() 4 | 5 | if(WIN32 AND NOT MSVC) 6 | set(EXTRA_MINGW_OPTIONS "-DCMAKE_CXX_FLAGS=-DABSL_FORCE_WAITER_MODE=4") 7 | endif() 8 | 9 | vcpkg_from_github( 10 | OUT_SOURCE_PATH 11 | SOURCE_PATH 12 | REPO 13 | abseil/abseil-cpp 14 | REF 15 | "${VERSION}" 16 | SHA512 17 | 14390380655c41483a98487e3b012110dd8d1743fdd68d8cde7e0d7c2730312d564b15726d8c9d2fff237d2fce3983bbbb5213f59612c7c6feaeb402dff9609f 18 | HEAD_REF 19 | master) 20 | 21 | vcpkg_check_features(OUT_FEATURE_OPTIONS FEATURE_OPTIONS FEATURES cxx17 22 | ABSL_USE_CXX17) 23 | 24 | # With ABSL_PROPAGATE_CXX_STD=ON abseil automatically detect if it is being 25 | # compiled with C++14 or C++17, and modifies the installed `absl/base/options.h` 26 | # header accordingly. This works even if CMAKE_CXX_STANDARD is not set. Abseil 27 | # uses the compiler default behavior to update `absl/base/options.h` as needed. 28 | if(ABSL_USE_CXX17) 29 | set(ABSL_USE_CXX17_OPTION "-DCMAKE_CXX_STANDARD=17") 30 | endif() 31 | 32 | vcpkg_cmake_configure( 33 | SOURCE_PATH 34 | "${SOURCE_PATH}" 35 | DISABLE_PARALLEL_CONFIGURE 36 | OPTIONS 37 | -DABSL_PROPAGATE_CXX_STD=ON 38 | ${ABSL_USE_CXX17_OPTION} 39 | ${EXTRA_MINGW_OPTIONS}) 40 | 41 | vcpkg_cmake_install() 42 | vcpkg_cmake_config_fixup(PACKAGE_NAME absl CONFIG_PATH lib/cmake/absl) 43 | vcpkg_fixup_pkgconfig() 44 | 45 | vcpkg_copy_pdbs() 46 | file( 47 | REMOVE_RECURSE 48 | "${CURRENT_PACKAGES_DIR}/debug/share" 49 | "${CURRENT_PACKAGES_DIR}/debug/include" 50 | "${CURRENT_PACKAGES_DIR}/include/absl/copts" 51 | "${CURRENT_PACKAGES_DIR}/include/absl/strings/testdata" 52 | "${CURRENT_PACKAGES_DIR}/include/absl/time/internal/cctz/testdata") 53 | 54 | if(VCPKG_LIBRARY_LINKAGE STREQUAL "dynamic") 55 | file(GLOB_RECURSE headers "${CURRENT_PACKAGES_DIR}/include/absl/*.h") 56 | foreach(header IN LISTS ${headers}) 57 | vcpkg_replace_string("${header}" "!defined(ABSL_CONSUME_DLL)" "0") 58 | vcpkg_replace_string("${header}" "defined(ABSL_CONSUME_DLL)" "1") 59 | endforeach() 60 | endif() 61 | 62 | vcpkg_install_copyright(FILE_LIST "${SOURCE_PATH}/LICENSE") 63 | -------------------------------------------------------------------------------- /vcpkg_ports/abseil/vcpkg.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "abseil", 3 | "version": "20230802.1", 4 | "description": [ 5 | "an open-source collection designed to augment the C++ standard library.", 6 | "Abseil is an open-source collection of C++ library code designed to augment the C++ standard library. The Abseil library code is collected from Google's own C++ code base, has been extensively tested and used in production, and is the same code we depend on in our daily coding lives.", 7 | "In some cases, Abseil provides pieces missing from the C++ standard; in others, Abseil provides alternatives to the standard for special needs we've found through usage in the Google code base. We denote those cases clearly within the library code we provide you.", 8 | "Abseil is not meant to be a competitor to the standard library; we've just found that many of these utilities serve a purpose within our code base, and we now want to provide those resources to the C++ community as a whole." 9 | ], 10 | "homepage": "https://github.com/abseil/abseil-cpp", 11 | "license": "Apache-2.0", 12 | "dependencies": [ 13 | { 14 | "name": "vcpkg-cmake", 15 | "host": true 16 | }, 17 | { 18 | "name": "vcpkg-cmake-config", 19 | "host": true 20 | } 21 | ], 22 | "features": { 23 | "cxx17": { 24 | "description": "Enable compiler C++17." 25 | } 26 | } 27 | } 28 | --------------------------------------------------------------------------------