├── .github └── workflows │ └── ci.yaml ├── .gitignore ├── Cargo.lock ├── Cargo.toml ├── LICENSE ├── README.md ├── benches ├── geo_lib.rs ├── geoarrow.rs ├── geos_lib.rs └── util.rs ├── scripts └── setup │ ├── README.md │ ├── pixi.lock │ └── pixi.toml └── src ├── function ├── as_ewkt.rs ├── as_geojson.rs ├── as_mvt_geom.rs ├── as_text.rs ├── boundary.rs ├── box2d.rs ├── buffer.rs ├── covered_by.rs ├── covers.rs ├── equals.rs ├── extent.rs ├── geom_from_text.rs ├── geom_from_wkb.rs ├── geometry_type.rs ├── intersects.rs ├── make_envelope.rs ├── mod.rs ├── split.rs ├── srid.rs └── translate.rs ├── geo ├── array.rs ├── box.rs ├── builder.rs ├── dialect.rs ├── index.rs └── mod.rs └── lib.rs /.github/workflows/ci.yaml: -------------------------------------------------------------------------------- 1 | name: Rust 2 | 3 | on: 4 | push: 5 | branches: 6 | - master 7 | pull_request: 8 | 9 | jobs: 10 | lint-test: 11 | name: Lint and Test 12 | runs-on: ubuntu-latest 13 | steps: 14 | - uses: actions/checkout@v4 15 | with: 16 | submodules: 'recursive' 17 | 18 | - name: Install Rust 19 | uses: dtolnay/rust-toolchain@stable 20 | with: 21 | components: rustfmt, clippy 22 | 23 | - uses: Swatinem/rust-cache@v2 24 | 25 | - name: Install Pixi 26 | run: | 27 | curl -fsSL https://pixi.sh/install.sh | bash 28 | echo "$HOME/.pixi/bin" >> $GITHUB_PATH 29 | echo "GDAL_HOME=$(pwd)/scripts/setup/.pixi/envs/default" >> "$GITHUB_ENV" 30 | echo "LD_LIBRARY_PATH=$(pwd)/scripts/setup/.pixi/envs/default/lib" >> "$GITHUB_ENV" 31 | echo "GEOS_LIB_DIR=$(pwd)/scripts/setup/.pixi/envs/default/lib" >> "$GITHUB_ENV" 32 | echo "GEOS_VERSION=3.12.1" >> "$GITHUB_ENV" 33 | echo "PKG_CONFIG_PATH=$(pwd)/scripts/setup/.pixi/envs/default/lib/pkgconfig" >> "$GITHUB_ENV" 34 | 35 | - name: Install build requirements 36 | run: | 37 | cd scripts/setup 38 | pixi install 39 | 40 | - name: Run cargo fmt 41 | run: cargo fmt --all -- --check 42 | 43 | - name: Run cargo clippy 44 | run: cargo clippy --all --all-features --tests -- -D warnings 45 | 46 | - name: Run cargo test 47 | run: cargo test --all 48 | 49 | - name: Run cargo test with all features 50 | run: cargo test --all --all-features 51 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | .idea 3 | .vscode 4 | .pixi -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "datafusion-geo" 3 | version = "0.1.0" 4 | edition = "2021" 5 | authors = ["lewiszlw "] 6 | description = "Add geo functionality extension to datafusion query engine." 7 | license = "MIT" 8 | repository = "https://github.com/systemxlabs/datafusion-geo" 9 | homepage = "https://github.com/systemxlabs/datafusion-geo" 10 | readme = "README.md" 11 | 12 | [features] 13 | geos = ["dep:geos", "geozero/with-geos"] 14 | 15 | [dependencies] 16 | arrow-schema = "50" 17 | arrow-array = "50" 18 | arrow-buffer = "50" 19 | datafusion-common = "36" 20 | datafusion-expr = "36" 21 | geo = "0.28" 22 | geos = { version = "8.3", features = ["v3_10_0", "geo"], optional = true } 23 | #geozero = { version = "0.12", features = ["with-wkb"] } 24 | geozero = { git = "https://github.com/georust/geozero.git", rev = "3378dda305ec88cabb092d458f8a61a140f60827", features = ["with-wkb"] } 25 | rayon = "1.9" 26 | rstar = "0.12.0" 27 | 28 | [dev-dependencies] 29 | arrow = "50" 30 | datafusion = "36" 31 | tokio = { version = "1.36", features = ["full"] } 32 | criterion = { version = "0.5.1", features = ["async_tokio"] } 33 | geoarrow = { git = "https://github.com/geoarrow/geoarrow-rs.git", rev = "0e4473e546248d2c2cbfb44df76d508660761261" } 34 | 35 | [[bench]] 36 | name = "geo_lib" 37 | path = "benches/geo_lib.rs" 38 | harness = false 39 | 40 | [[bench]] 41 | name = "geos_lib" 42 | path = "benches/geos_lib.rs" 43 | harness = false 44 | required-features = ["geos"] 45 | 46 | [[bench]] 47 | name = "geoarrow" 48 | path = "benches/geoarrow.rs" 49 | harness = false 50 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 SystemX Labs 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # datafusion-geo 2 | ![License](https://img.shields.io/badge/license-MIT-blue.svg) 3 | [![Crates.io](https://img.shields.io/crates/v/datafusion-geo.svg)](https://crates.io/crates/datafusion-geo) 4 | 5 | Add geo functionality extension to datafusion query engine. 6 | 7 | 8 | **Goals** 9 | 1. Support multiple wkb dialects 10 | 2. Provide DataFusion user defined functions similar with PostGIS 11 | 12 | P.S. Please see each function unit test to know how to use them. 13 | 14 | ## Useful Links 15 | 1. Ewkb format: https://github.com/postgis/postgis/blob/master/doc/ZMSgeoms.txt 16 | 2. PostGIS functions: https://postgis.net/docs/manual-dev/PostGIS_Special_Functions_Index.html -------------------------------------------------------------------------------- /benches/geo_lib.rs: -------------------------------------------------------------------------------- 1 | use criterion::{criterion_group, criterion_main, Criterion}; 2 | use datafusion::prelude::SessionContext; 3 | use datafusion_expr::ScalarUDF; 4 | use datafusion_geo::function::{GeomFromTextUdf, IntersectsUdf}; 5 | 6 | mod util; 7 | 8 | async fn geo_computation(ctx: SessionContext, sql: &str) { 9 | #[cfg(feature = "geos")] 10 | { 11 | panic!("geo bench needs disabling geos feature flag") 12 | } 13 | let df = ctx.sql(sql).await.unwrap(); 14 | let _ = df.collect().await.unwrap(); 15 | } 16 | 17 | fn criterion_benchmark(c: &mut Criterion) { 18 | let rt = util::create_tokio_runtime(); 19 | let ctx = util::create_session_with_data(); 20 | ctx.register_udf(ScalarUDF::from(IntersectsUdf::new())); 21 | ctx.register_udf(ScalarUDF::from(GeomFromTextUdf::new())); 22 | let sql = "select ST_Intersects(geom, ST_GeomFromText('POINT(10 11)')) from geom_table"; 23 | c.bench_function(&format!("geo_bench with sql: {}", sql), |b| { 24 | b.to_async(&rt).iter(|| geo_computation(ctx.clone(), sql)) 25 | }); 26 | } 27 | 28 | criterion_group!(benches, criterion_benchmark); 29 | criterion_main!(benches); 30 | -------------------------------------------------------------------------------- /benches/geoarrow.rs: -------------------------------------------------------------------------------- 1 | #![allow(deprecated)] 2 | 3 | use arrow_array::cast::downcast_array; 4 | use arrow_array::{Array, ArrayRef, BinaryArray, BooleanArray, Int64Array, StringArray}; 5 | use arrow_schema::DataType; 6 | use criterion::{criterion_group, criterion_main, Criterion}; 7 | use datafusion::physical_expr::functions::make_scalar_function; 8 | use datafusion::prelude::SessionContext; 9 | use datafusion_common::{exec_err, DataFusionError}; 10 | use datafusion_expr::{ 11 | create_udf, ReturnTypeFunction, ScalarUDF, Signature, TypeSignature, Volatility, 12 | }; 13 | use datafusion_geo::DFResult; 14 | use geo::Intersects; 15 | use geoarrow::array::WKBArray; 16 | use geoarrow::geo_traits::GeometryTrait; 17 | use geoarrow::trait_::GeometryArrayAccessor; 18 | use geoarrow::GeometryArrayTrait; 19 | use geozero::wkt::WktStr; 20 | use geozero::{CoordDimensions, ToWkb}; 21 | use std::sync::Arc; 22 | 23 | mod util; 24 | 25 | async fn geoarrow_intersects(ctx: SessionContext, sql: &str) { 26 | let df = ctx.sql(sql).await.unwrap(); 27 | let _ = df.collect().await.unwrap(); 28 | } 29 | 30 | fn criterion_benchmark(c: &mut Criterion) { 31 | let rt = util::create_tokio_runtime(); 32 | let ctx = util::create_session_with_data(); 33 | ctx.register_udf(geom_from_text()); 34 | ctx.register_udf(intersects()); 35 | 36 | let sql = "select ST_Intersects(geom, ST_GeomFromText('POINT(10 11)')) from geoarrow_table"; 37 | c.bench_function(&format!("geoarrow_bench with sql: {}", sql), |b| { 38 | b.to_async(&rt) 39 | .iter(|| geoarrow_intersects(ctx.clone(), sql)) 40 | }); 41 | } 42 | 43 | criterion_group!(benches, criterion_benchmark); 44 | criterion_main!(benches); 45 | 46 | pub fn geom_from_text() -> ScalarUDF { 47 | let st_geomfromtext = make_scalar_function(st_geomfromtext); 48 | 49 | let signature = Signature::one_of( 50 | vec![ 51 | TypeSignature::Exact(vec![DataType::Utf8]), 52 | TypeSignature::Exact(vec![DataType::Utf8, DataType::Int64]), 53 | ], 54 | Volatility::Immutable, 55 | ); 56 | let return_type: ReturnTypeFunction = Arc::new(move |_| Ok(Arc::new(DataType::Binary))); 57 | 58 | ScalarUDF::new( 59 | "st_geomfromtext", 60 | &signature, 61 | &return_type, 62 | &st_geomfromtext, 63 | ) 64 | } 65 | 66 | fn st_geomfromtext(args: &[ArrayRef]) -> DFResult> { 67 | let wkt_arr = args[0].as_ref(); 68 | let wkt_arr = downcast_array::(wkt_arr); 69 | 70 | if args.len() == 2 { 71 | let srid_arr = args[1].as_ref(); 72 | let srid_arr = downcast_array::(srid_arr); 73 | 74 | let wkb_array: WKBArray<_> = wkt_arr 75 | .iter() 76 | .zip(srid_arr.iter()) 77 | .map(|(wkt_opt, srid_opt)| { 78 | wkt_opt.and_then(|str| { 79 | let wkt = WktStr(str); 80 | wkt.to_ewkb(CoordDimensions::xy(), srid_opt.map(|a| a as i32)) 81 | .ok() 82 | }) 83 | }) 84 | .collect::() 85 | .into(); 86 | 87 | Ok(wkb_array.into_array_ref()) 88 | } else { 89 | let wkb_array: WKBArray<_> = wkt_arr 90 | .iter() 91 | .map(|opt| { 92 | opt.and_then(|str| { 93 | let wkt = WktStr(str); 94 | wkt.to_wkb(CoordDimensions::xy()).ok() 95 | }) 96 | }) 97 | .collect::() 98 | .into(); 99 | 100 | Ok(wkb_array.into_array_ref()) 101 | } 102 | } 103 | 104 | pub fn intersects() -> ScalarUDF { 105 | let intersects = |args: &[ArrayRef]| -> datafusion::error::Result> { 106 | if args.len() != 2 { 107 | return exec_err!("st_intersects must have only three args."); 108 | } 109 | let Ok(wkb_array_a) = WKBArray::::try_from(&args[0] as &dyn Array) else { 110 | return exec_err!("st_intersects input 0 can not convert to WKBArray."); 111 | }; 112 | 113 | let Ok(wkb_array_b) = WKBArray::::try_from(&args[1] as &dyn Array) else { 114 | return exec_err!("st_intersects input 1 can not convert to WKBArray."); 115 | }; 116 | let result: BooleanArray = wkb_array_a 117 | .iter_geo() 118 | .zip(wkb_array_b.iter_geo()) 119 | .map(|opt| match opt { 120 | (Some(geom_a), Some(geom_b)) => Some(geom_a.intersects(&geom_b)), 121 | _ => None, 122 | }) 123 | .collect(); 124 | Ok(Arc::new(result)) 125 | }; 126 | 127 | let translate = make_scalar_function(intersects); 128 | 129 | create_udf( 130 | "st_intersects", 131 | vec![DataType::Binary, DataType::Binary], 132 | Arc::new(DataType::Boolean), 133 | Volatility::Immutable, 134 | translate, 135 | ) 136 | } 137 | -------------------------------------------------------------------------------- /benches/geos_lib.rs: -------------------------------------------------------------------------------- 1 | use criterion::{criterion_group, criterion_main, Criterion}; 2 | use datafusion::prelude::SessionContext; 3 | use datafusion_expr::ScalarUDF; 4 | use datafusion_geo::function::{GeomFromTextUdf, IntersectsUdf}; 5 | 6 | mod util; 7 | 8 | async fn geos_computation(ctx: SessionContext, sql: &str) { 9 | #[cfg(not(feature = "geos"))] 10 | { 11 | panic!("geos bench needs enabling geos feature flag") 12 | } 13 | let df = ctx.sql(sql).await.unwrap(); 14 | let _ = df.collect().await.unwrap(); 15 | } 16 | 17 | fn criterion_benchmark(c: &mut Criterion) { 18 | let rt = util::create_tokio_runtime(); 19 | let ctx = util::create_session_with_data(); 20 | ctx.register_udf(ScalarUDF::from(IntersectsUdf::new())); 21 | ctx.register_udf(ScalarUDF::from(GeomFromTextUdf::new())); 22 | let sql = "select ST_Intersects(geom, ST_GeomFromText('POINT(10 11)')) from geom_table"; 23 | c.bench_function(&format!("geos_bench with sql: {}", sql), |b| { 24 | b.to_async(&rt).iter(|| geos_computation(ctx.clone(), sql)) 25 | }); 26 | } 27 | 28 | criterion_group!(benches, criterion_benchmark); 29 | criterion_main!(benches); 30 | -------------------------------------------------------------------------------- /benches/util.rs: -------------------------------------------------------------------------------- 1 | use arrow_array::RecordBatch; 2 | use arrow_schema::{DataType, Field, Schema}; 3 | use datafusion::datasource::MemTable; 4 | use datafusion::prelude::SessionContext; 5 | use datafusion_geo::geo::GeometryArrayBuilder; 6 | use geo::line_string; 7 | use geoarrow::array::WKBArray; 8 | use geoarrow::trait_::IntoArrow; 9 | use std::sync::Arc; 10 | use tokio::runtime::Runtime; 11 | 12 | pub fn create_tokio_runtime() -> Runtime { 13 | tokio::runtime::Builder::new_multi_thread() 14 | .worker_threads(8) 15 | .enable_all() 16 | .build() 17 | .unwrap() 18 | } 19 | 20 | pub fn create_session_with_data() -> SessionContext { 21 | let schema = Arc::new(Schema::new(vec![Field::new( 22 | "geom", 23 | DataType::Binary, 24 | true, 25 | )])); 26 | 27 | let mut linestring_vec = vec![]; 28 | for i in 0..1000000 { 29 | let i = i as f64; 30 | let linestring = line_string![ 31 | (x: i, y: i + 1.0), 32 | (x: i + 2.0, y: i + 3.0), 33 | (x: i + 4.0, y: i + 5.0), 34 | ]; 35 | linestring_vec.push(Some(geo::Geometry::LineString(linestring))); 36 | } 37 | 38 | let builder: GeometryArrayBuilder = linestring_vec.as_slice().into(); 39 | let record = RecordBatch::try_new(schema.clone(), vec![Arc::new(builder.build())]).unwrap(); 40 | 41 | let wkb_arr: WKBArray = linestring_vec.as_slice().try_into().unwrap(); 42 | let geoarrow_record = 43 | RecordBatch::try_new(schema.clone(), vec![Arc::new(wkb_arr.into_arrow())]).unwrap(); 44 | 45 | let mem_table = MemTable::try_new( 46 | schema.clone(), 47 | vec![ 48 | vec![record.clone()], 49 | vec![record.clone()], 50 | vec![record.clone()], 51 | ], 52 | ) 53 | .unwrap(); 54 | let geoarrow_mem_table = MemTable::try_new( 55 | schema.clone(), 56 | vec![ 57 | vec![geoarrow_record.clone()], 58 | vec![geoarrow_record.clone()], 59 | vec![geoarrow_record.clone()], 60 | ], 61 | ) 62 | .unwrap(); 63 | 64 | let ctx = SessionContext::new(); 65 | ctx.register_table("geom_table", Arc::new(mem_table)) 66 | .unwrap(); 67 | ctx.register_table("geoarrow_table", Arc::new(geoarrow_mem_table)) 68 | .unwrap(); 69 | ctx 70 | } 71 | -------------------------------------------------------------------------------- /scripts/setup/README.md: -------------------------------------------------------------------------------- 1 | # Build support 2 | 3 | ``` 4 | cargo install pixi 5 | pixi install 6 | ``` 7 | 8 | ```bash 9 | # export GDAL_HOME="$(pwd)/.pixi/envs/default" 10 | export LD_LIBRARY_PATH="$(pwd)/.pixi/envs/default/lib:$LD_LIBRARY_PATH" 11 | export GEOS_LIB_DIR="$(pwd)/.pixi/envs/default/lib:$GEOS_LIB_DIR" 12 | export GEOS_VERSION=3.12.1 13 | export PKG_CONFIG_PATH="$(pwd)/.pixi/envs/default/lib/pkgconfig:$PKG_CONFIG_PATH" 14 | ``` -------------------------------------------------------------------------------- /scripts/setup/pixi.lock: -------------------------------------------------------------------------------- 1 | version: 4 2 | environments: 3 | default: 4 | channels: 5 | - url: https://conda.anaconda.org/conda-forge/ 6 | packages: 7 | linux-64: 8 | - conda: https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2 9 | - conda: https://conda.anaconda.org/conda-forge/linux-64/_openmp_mutex-4.5-2_gnu.tar.bz2 10 | - conda: https://conda.anaconda.org/conda-forge/linux-64/geos-3.12.1-h59595ed_0.conda 11 | - conda: https://conda.anaconda.org/conda-forge/linux-64/libgcc-ng-13.2.0-h807b86a_5.conda 12 | - conda: https://conda.anaconda.org/conda-forge/linux-64/libgomp-13.2.0-h807b86a_5.conda 13 | - conda: https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-ng-13.2.0-h7e041cc_5.conda 14 | osx-arm64: 15 | - conda: https://conda.anaconda.org/conda-forge/osx-arm64/geos-3.12.1-h965bd2d_0.conda 16 | - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libcxx-16.0.6-h4653b0c_0.conda 17 | packages: 18 | - kind: conda 19 | name: _libgcc_mutex 20 | version: '0.1' 21 | build: conda_forge 22 | subdir: linux-64 23 | url: https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2 24 | sha256: fe51de6107f9edc7aa4f786a70f4a883943bc9d39b3bb7307c04c41410990726 25 | md5: d7c89558ba9fa0495403155b64376d81 26 | license: None 27 | size: 2562 28 | timestamp: 1578324546067 29 | - kind: conda 30 | name: _openmp_mutex 31 | version: '4.5' 32 | build: 2_gnu 33 | build_number: 16 34 | subdir: linux-64 35 | url: https://conda.anaconda.org/conda-forge/linux-64/_openmp_mutex-4.5-2_gnu.tar.bz2 36 | sha256: fbe2c5e56a653bebb982eda4876a9178aedfc2b545f25d0ce9c4c0b508253d22 37 | md5: 73aaf86a425cc6e73fcf236a5a46396d 38 | depends: 39 | - _libgcc_mutex 0.1 conda_forge 40 | - libgomp >=7.5.0 41 | constrains: 42 | - openmp_impl 9999 43 | license: BSD-3-Clause 44 | license_family: BSD 45 | size: 23621 46 | timestamp: 1650670423406 47 | - kind: conda 48 | name: geos 49 | version: 3.12.1 50 | build: h59595ed_0 51 | subdir: linux-64 52 | url: https://conda.anaconda.org/conda-forge/linux-64/geos-3.12.1-h59595ed_0.conda 53 | sha256: 2593b255cb9c4639d6ea261c47aaed1380216a366546f0468e95c36c2afd1c1a 54 | md5: 8c0f4f71f5a59ceb0c6fa9f51501066d 55 | depends: 56 | - libgcc-ng >=12 57 | - libstdcxx-ng >=12 58 | license: LGPL-2.1-only 59 | size: 1736070 60 | timestamp: 1699778102442 61 | - kind: conda 62 | name: geos 63 | version: 3.12.1 64 | build: h965bd2d_0 65 | subdir: osx-arm64 66 | url: https://conda.anaconda.org/conda-forge/osx-arm64/geos-3.12.1-h965bd2d_0.conda 67 | sha256: 9cabd90e43caf8fe63a80909775f1ac76814f0666bf6fe7ba836d077a6d4dcf3 68 | md5: 0f28efe509ee998b3a09e571191d406a 69 | depends: 70 | - __osx >=10.9 71 | - libcxx >=16.0.6 72 | license: LGPL-2.1-only 73 | size: 1376991 74 | timestamp: 1699778806863 75 | - kind: conda 76 | name: libcxx 77 | version: 16.0.6 78 | build: h4653b0c_0 79 | subdir: osx-arm64 80 | url: https://conda.anaconda.org/conda-forge/osx-arm64/libcxx-16.0.6-h4653b0c_0.conda 81 | sha256: 11d3fb51c14832d9e4f6d84080a375dec21ea8a3a381a1910e67ff9cedc20355 82 | md5: 9d7d724faf0413bf1dbc5a85935700c8 83 | license: Apache-2.0 WITH LLVM-exception 84 | license_family: Apache 85 | size: 1160232 86 | timestamp: 1686896993785 87 | - kind: conda 88 | name: libgcc-ng 89 | version: 13.2.0 90 | build: h807b86a_5 91 | build_number: 5 92 | subdir: linux-64 93 | url: https://conda.anaconda.org/conda-forge/linux-64/libgcc-ng-13.2.0-h807b86a_5.conda 94 | sha256: d32f78bfaac282cfe5205f46d558704ad737b8dbf71f9227788a5ca80facaba4 95 | md5: d4ff227c46917d3b4565302a2bbb276b 96 | depends: 97 | - _libgcc_mutex 0.1 conda_forge 98 | - _openmp_mutex >=4.5 99 | constrains: 100 | - libgomp 13.2.0 h807b86a_5 101 | license: GPL-3.0-only WITH GCC-exception-3.1 102 | license_family: GPL 103 | size: 770506 104 | timestamp: 1706819192021 105 | - kind: conda 106 | name: libgomp 107 | version: 13.2.0 108 | build: h807b86a_5 109 | build_number: 5 110 | subdir: linux-64 111 | url: https://conda.anaconda.org/conda-forge/linux-64/libgomp-13.2.0-h807b86a_5.conda 112 | sha256: 0d3d4b1b0134283ea02d58e8eb5accf3655464cf7159abf098cc694002f8d34e 113 | md5: d211c42b9ce49aee3734fdc828731689 114 | depends: 115 | - _libgcc_mutex 0.1 conda_forge 116 | license: GPL-3.0-only WITH GCC-exception-3.1 117 | license_family: GPL 118 | size: 419751 119 | timestamp: 1706819107383 120 | - kind: conda 121 | name: libstdcxx-ng 122 | version: 13.2.0 123 | build: h7e041cc_5 124 | build_number: 5 125 | subdir: linux-64 126 | url: https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-ng-13.2.0-h7e041cc_5.conda 127 | sha256: a56c5b11f1e73a86e120e6141a42d9e935a99a2098491ac9e15347a1476ce777 128 | md5: f6f6600d18a4047b54f803cf708b868a 129 | license: GPL-3.0-only WITH GCC-exception-3.1 130 | license_family: GPL 131 | size: 3834139 132 | timestamp: 1706819252496 133 | -------------------------------------------------------------------------------- /scripts/setup/pixi.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "rust-build" 3 | version = "0.1.0" 4 | channels = ["conda-forge"] 5 | platforms = ["osx-arm64", "linux-64"] 6 | 7 | [tasks] 8 | 9 | [dependencies] 10 | # libgdal = ">=3.8" 11 | geos = ">=3.12.1" 12 | # protobuf = "4.23.4.*" 13 | # proj = ">=9.3.1" -------------------------------------------------------------------------------- /src/function/as_ewkt.rs: -------------------------------------------------------------------------------- 1 | use crate::geo::GeometryArray; 2 | use crate::DFResult; 3 | use arrow_array::cast::AsArray; 4 | use arrow_array::{GenericBinaryArray, LargeStringArray, OffsetSizeTrait, StringArray}; 5 | use arrow_schema::DataType; 6 | use datafusion_common::{internal_datafusion_err, DataFusionError}; 7 | use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, TypeSignature, Volatility}; 8 | use geozero::{GeozeroGeometry, ToWkt}; 9 | use std::any::Any; 10 | use std::sync::Arc; 11 | 12 | #[derive(Debug)] 13 | pub struct AsEwktUdf { 14 | signature: Signature, 15 | aliases: Vec, 16 | } 17 | 18 | impl AsEwktUdf { 19 | pub fn new() -> Self { 20 | Self { 21 | signature: Signature::one_of( 22 | vec![ 23 | TypeSignature::Exact(vec![DataType::Binary]), 24 | TypeSignature::Exact(vec![DataType::LargeBinary]), 25 | ], 26 | Volatility::Immutable, 27 | ), 28 | aliases: vec!["st_asewkt".to_string()], 29 | } 30 | } 31 | } 32 | 33 | impl ScalarUDFImpl for AsEwktUdf { 34 | fn as_any(&self) -> &dyn Any { 35 | self 36 | } 37 | 38 | fn name(&self) -> &str { 39 | "ST_AsEWKT" 40 | } 41 | 42 | fn signature(&self) -> &Signature { 43 | &self.signature 44 | } 45 | 46 | fn return_type(&self, arg_types: &[DataType]) -> datafusion_common::Result { 47 | match arg_types[0] { 48 | DataType::Binary => Ok(DataType::Utf8), 49 | DataType::LargeBinary => Ok(DataType::LargeUtf8), 50 | _ => unreachable!(), 51 | } 52 | } 53 | 54 | fn invoke(&self, args: &[ColumnarValue]) -> datafusion_common::Result { 55 | let arr = args[0].clone().into_array(1)?; 56 | match args[0].data_type() { 57 | DataType::Binary => { 58 | let wkb_arr = arr.as_binary::(); 59 | 60 | let mut wkt_vec = vec![]; 61 | for i in 0..wkb_arr.geom_len() { 62 | wkt_vec.push(to_ewkt::(wkb_arr, i)?); 63 | } 64 | 65 | Ok(ColumnarValue::Array(Arc::new(StringArray::from(wkt_vec)))) 66 | } 67 | DataType::LargeBinary => { 68 | let wkb_arr = arr.as_binary::(); 69 | 70 | let mut wkt_vec = vec![]; 71 | for i in 0..wkb_arr.geom_len() { 72 | wkt_vec.push(to_ewkt::(wkb_arr, i)?); 73 | } 74 | 75 | Ok(ColumnarValue::Array(Arc::new(LargeStringArray::from( 76 | wkt_vec, 77 | )))) 78 | } 79 | _ => unreachable!(), 80 | } 81 | } 82 | 83 | fn aliases(&self) -> &[String] { 84 | &self.aliases 85 | } 86 | } 87 | 88 | fn to_ewkt( 89 | wkb_arr: &GenericBinaryArray, 90 | geom_index: usize, 91 | ) -> DFResult> { 92 | let geom = wkb_arr.geos_value(geom_index)?; 93 | let ewkt = match geom { 94 | Some(geom) => Some( 95 | geom.to_ewkt(geom.srid()) 96 | .map_err(|_| internal_datafusion_err!("Failed to convert geometry to ewkt"))?, 97 | ), 98 | None => None, 99 | }; 100 | Ok(ewkt) 101 | } 102 | 103 | impl Default for AsEwktUdf { 104 | fn default() -> Self { 105 | Self::new() 106 | } 107 | } 108 | 109 | #[cfg(test)] 110 | mod tests { 111 | use crate::function::{AsEwktUdf, GeomFromTextUdf}; 112 | use arrow::util::pretty::pretty_format_batches; 113 | use datafusion::logical_expr::ScalarUDF; 114 | use datafusion::prelude::SessionContext; 115 | 116 | #[tokio::test] 117 | async fn as_ewkt() { 118 | let ctx = SessionContext::new(); 119 | ctx.register_udf(ScalarUDF::from(GeomFromTextUdf::new())); 120 | ctx.register_udf(ScalarUDF::from(AsEwktUdf::new())); 121 | let df = ctx 122 | .sql("select ST_AsEWKT(ST_GeomFromText('POINT(-71.064544 42.28787)', 4269))") 123 | .await 124 | .unwrap(); 125 | assert_eq!( 126 | pretty_format_batches(&df.collect().await.unwrap()) 127 | .unwrap() 128 | .to_string(), 129 | "+----------------------------------------------------------------------------+ 130 | | ST_AsEWKT(ST_GeomFromText(Utf8(\"POINT(-71.064544 42.28787)\"),Int64(4269))) | 131 | +----------------------------------------------------------------------------+ 132 | | SRID=4269;POINT(-71.064544 42.28787) | 133 | +----------------------------------------------------------------------------+" 134 | ); 135 | } 136 | } 137 | -------------------------------------------------------------------------------- /src/function/as_geojson.rs: -------------------------------------------------------------------------------- 1 | use crate::geo::GeometryArray; 2 | use crate::DFResult; 3 | use arrow_array::cast::AsArray; 4 | use arrow_array::{GenericBinaryArray, LargeStringArray, OffsetSizeTrait, StringArray}; 5 | use arrow_schema::DataType; 6 | use datafusion_common::{internal_datafusion_err, DataFusionError}; 7 | use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, TypeSignature, Volatility}; 8 | use geozero::ToJson; 9 | use std::any::Any; 10 | use std::sync::Arc; 11 | 12 | #[derive(Debug)] 13 | pub struct AsGeoJsonUdf { 14 | signature: Signature, 15 | aliases: Vec, 16 | } 17 | 18 | impl AsGeoJsonUdf { 19 | pub fn new() -> Self { 20 | Self { 21 | signature: Signature::one_of( 22 | vec![ 23 | TypeSignature::Exact(vec![DataType::Binary]), 24 | TypeSignature::Exact(vec![DataType::LargeBinary]), 25 | ], 26 | Volatility::Immutable, 27 | ), 28 | aliases: vec!["st_asgeojson".to_string()], 29 | } 30 | } 31 | } 32 | 33 | impl ScalarUDFImpl for AsGeoJsonUdf { 34 | fn as_any(&self) -> &dyn Any { 35 | self 36 | } 37 | 38 | fn name(&self) -> &str { 39 | "ST_AsGeoJSON" 40 | } 41 | 42 | fn signature(&self) -> &Signature { 43 | &self.signature 44 | } 45 | 46 | fn return_type(&self, arg_types: &[DataType]) -> datafusion_common::Result { 47 | match arg_types[0] { 48 | DataType::Binary => Ok(DataType::Utf8), 49 | DataType::LargeBinary => Ok(DataType::LargeUtf8), 50 | _ => unreachable!(), 51 | } 52 | } 53 | 54 | fn invoke(&self, args: &[ColumnarValue]) -> datafusion_common::Result { 55 | let arr = args[0].clone().into_array(1)?; 56 | match args[0].data_type() { 57 | DataType::Binary => { 58 | let wkb_arr = arr.as_binary::(); 59 | 60 | let mut json_vec = vec![]; 61 | for i in 0..wkb_arr.geom_len() { 62 | json_vec.push(to_geojson::(wkb_arr, i)?); 63 | } 64 | 65 | Ok(ColumnarValue::Array(Arc::new(StringArray::from(json_vec)))) 66 | } 67 | DataType::LargeBinary => { 68 | let wkb_arr = arr.as_binary::(); 69 | 70 | let mut json_vec = vec![]; 71 | for i in 0..wkb_arr.geom_len() { 72 | json_vec.push(to_geojson::(wkb_arr, i)?); 73 | } 74 | 75 | Ok(ColumnarValue::Array(Arc::new(LargeStringArray::from( 76 | json_vec, 77 | )))) 78 | } 79 | _ => unreachable!(), 80 | } 81 | } 82 | 83 | fn aliases(&self) -> &[String] { 84 | &self.aliases 85 | } 86 | } 87 | 88 | fn to_geojson( 89 | wkb_arr: &GenericBinaryArray, 90 | geom_index: usize, 91 | ) -> DFResult> { 92 | let geom = { 93 | #[cfg(feature = "geos")] 94 | { 95 | wkb_arr.geos_value(geom_index)? 96 | } 97 | #[cfg(not(feature = "geos"))] 98 | { 99 | wkb_arr.geo_value(geom_index)? 100 | } 101 | }; 102 | let json = match geom { 103 | Some(geom) => Some( 104 | geom.to_json() 105 | .map_err(|_| internal_datafusion_err!("Failed to convert geometry to geo json"))?, 106 | ), 107 | None => None, 108 | }; 109 | Ok(json) 110 | } 111 | 112 | impl Default for AsGeoJsonUdf { 113 | fn default() -> Self { 114 | Self::new() 115 | } 116 | } 117 | 118 | #[cfg(test)] 119 | mod tests { 120 | use crate::function::{AsGeoJsonUdf, GeomFromTextUdf}; 121 | use arrow::util::pretty::pretty_format_batches; 122 | use datafusion::logical_expr::ScalarUDF; 123 | use datafusion::prelude::SessionContext; 124 | 125 | #[tokio::test] 126 | async fn as_geojson() { 127 | let ctx = SessionContext::new(); 128 | ctx.register_udf(ScalarUDF::from(GeomFromTextUdf::new())); 129 | ctx.register_udf(ScalarUDF::from(AsGeoJsonUdf::new())); 130 | let df = ctx 131 | .sql("select ST_AsGeoJSON(ST_GeomFromText('POINT(-71.064544 42.28787)'))") 132 | .await 133 | .unwrap(); 134 | assert_eq!( 135 | pretty_format_batches(&df.collect().await.unwrap()) 136 | .unwrap() 137 | .to_string(), 138 | "+-------------------------------------------------------------------+ 139 | | ST_AsGeoJSON(ST_GeomFromText(Utf8(\"POINT(-71.064544 42.28787)\"))) | 140 | +-------------------------------------------------------------------+ 141 | | {\"type\": \"Point\", \"coordinates\": [-71.064544,42.28787]} | 142 | +-------------------------------------------------------------------+" 143 | ); 144 | } 145 | } 146 | -------------------------------------------------------------------------------- /src/function/as_mvt_geom.rs: -------------------------------------------------------------------------------- 1 | use crate::geo::{Box2d, GeometryArray, GeometryArrayBuilder}; 2 | use crate::DFResult; 3 | use arrow_array::cast::AsArray; 4 | use arrow_array::{Array, GenericBinaryArray, OffsetSizeTrait, StructArray}; 5 | use arrow_schema::DataType; 6 | use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, TypeSignature, Volatility}; 7 | use geo::{AffineOps, AffineTransform}; 8 | use geozero::wkb::WkbDialect; 9 | use std::any::Any; 10 | use std::sync::Arc; 11 | 12 | #[derive(Debug)] 13 | pub struct AsMVTGeomUdf { 14 | signature: Signature, 15 | aliases: Vec, 16 | } 17 | 18 | impl AsMVTGeomUdf { 19 | pub fn new() -> Self { 20 | Self { 21 | signature: Signature::one_of( 22 | vec![ 23 | TypeSignature::Exact(vec![DataType::Binary, Box2d::data_type()]), 24 | TypeSignature::Exact(vec![DataType::LargeBinary, Box2d::data_type()]), 25 | ], 26 | Volatility::Immutable, 27 | ), 28 | aliases: vec!["st_asmvtgeom".to_string()], 29 | } 30 | } 31 | } 32 | 33 | impl ScalarUDFImpl for AsMVTGeomUdf { 34 | fn as_any(&self) -> &dyn Any { 35 | self 36 | } 37 | 38 | fn name(&self) -> &str { 39 | "ST_AsMVTGeom" 40 | } 41 | 42 | fn signature(&self) -> &Signature { 43 | &self.signature 44 | } 45 | 46 | fn return_type(&self, arg_types: &[DataType]) -> datafusion_common::Result { 47 | Ok(arg_types[0].clone()) 48 | } 49 | 50 | fn invoke(&self, args: &[ColumnarValue]) -> datafusion_common::Result { 51 | let arr = args[0].clone().into_array(1)?; 52 | let arr1 = args[1].clone().into_array(1)?; 53 | let box_arr = arr1.as_struct(); 54 | match args[0].data_type() { 55 | DataType::Binary => { 56 | let wkb_arr = arr.as_binary::(); 57 | Ok(ColumnarValue::Array(Arc::new(as_mvt_geom( 58 | wkb_arr, box_arr, 59 | )?))) 60 | } 61 | DataType::LargeBinary => { 62 | let wkb_arr = arr.as_binary::(); 63 | Ok(ColumnarValue::Array(Arc::new(as_mvt_geom( 64 | wkb_arr, box_arr, 65 | )?))) 66 | } 67 | _ => unreachable!(), 68 | } 69 | } 70 | 71 | fn aliases(&self) -> &[String] { 72 | &self.aliases 73 | } 74 | } 75 | 76 | fn as_mvt_geom( 77 | wkb_arr: &GenericBinaryArray, 78 | box_arr: &StructArray, 79 | ) -> DFResult> { 80 | let mut builder = GeometryArrayBuilder::::new(WkbDialect::Ewkb, wkb_arr.len()); 81 | for i in 0..wkb_arr.geom_len() { 82 | let geom = wkb_arr.geo_value(i)?; 83 | let box2d = Box2d::value(box_arr, i)?.unwrap(); 84 | 85 | match geom { 86 | Some(geom) => { 87 | let width = box2d.xmax - box2d.xmin; 88 | let height = box2d.ymax - box2d.ymin; 89 | let fx = 4096. / width; 90 | let fy = -4096. / height; 91 | 92 | let transform = 93 | AffineTransform::new(fx, 0.0, -box2d.xmin * fx, 0.0, fy, -box2d.ymax * fy); 94 | 95 | let geom = geom.affine_transform(&transform); 96 | builder.append_geo_geometry(&Some(geom))?; 97 | } 98 | None => builder.append_null(), 99 | } 100 | } 101 | Ok(builder.build()) 102 | } 103 | 104 | impl Default for AsMVTGeomUdf { 105 | fn default() -> Self { 106 | Self::new() 107 | } 108 | } 109 | 110 | #[cfg(test)] 111 | mod tests { 112 | use crate::function::as_mvt_geom::AsMVTGeomUdf; 113 | use crate::function::box2d::Box2dUdf; 114 | use crate::function::{AsTextUdf, GeomFromTextUdf}; 115 | use arrow::util::pretty::pretty_format_batches; 116 | use datafusion::logical_expr::ScalarUDF; 117 | use datafusion::prelude::SessionContext; 118 | 119 | #[tokio::test] 120 | async fn as_mvt_geom() { 121 | let ctx = SessionContext::new(); 122 | ctx.register_udf(ScalarUDF::from(GeomFromTextUdf::new())); 123 | ctx.register_udf(ScalarUDF::from(AsTextUdf::new())); 124 | ctx.register_udf(ScalarUDF::from(AsMVTGeomUdf::new())); 125 | ctx.register_udf(ScalarUDF::from(Box2dUdf::new())); 126 | let df = ctx 127 | .sql("select ST_AsText(ST_AsMVTGeom(ST_GeomFromText('POLYGON ((0 0, 10 0, 10 5, 0 -5, 0 0))'), Box2D(ST_GeomFromText('LINESTRING(0 0, 4096 4096)'))))") 128 | .await 129 | .unwrap(); 130 | assert_eq!( 131 | pretty_format_batches(&df.collect().await.unwrap()) 132 | .unwrap() 133 | .to_string(), 134 | "+-----------------------------------------------------------------------------------------------------------------------------------------------------+ 135 | | ST_AsText(ST_AsMVTGeom(ST_GeomFromText(Utf8(\"POLYGON ((0 0, 10 0, 10 5, 0 -5, 0 0))\")),Box2D(ST_GeomFromText(Utf8(\"LINESTRING(0 0, 4096 4096)\"))))) | 136 | +-----------------------------------------------------------------------------------------------------------------------------------------------------+ 137 | | POLYGON((0 4096,10 4096,10 4091,0 4101,0 4096)) | 138 | +-----------------------------------------------------------------------------------------------------------------------------------------------------+" 139 | ); 140 | } 141 | } 142 | -------------------------------------------------------------------------------- /src/function/as_text.rs: -------------------------------------------------------------------------------- 1 | use crate::geo::GeometryArray; 2 | use crate::DFResult; 3 | use arrow_array::cast::AsArray; 4 | use arrow_array::{GenericBinaryArray, LargeStringArray, OffsetSizeTrait, StringArray}; 5 | use arrow_schema::DataType; 6 | use datafusion_common::{internal_datafusion_err, DataFusionError}; 7 | use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, TypeSignature, Volatility}; 8 | use geozero::ToWkt; 9 | use std::any::Any; 10 | use std::sync::Arc; 11 | 12 | #[derive(Debug)] 13 | pub struct AsTextUdf { 14 | signature: Signature, 15 | aliases: Vec, 16 | } 17 | 18 | impl AsTextUdf { 19 | pub fn new() -> Self { 20 | Self { 21 | signature: Signature::one_of( 22 | vec![ 23 | TypeSignature::Exact(vec![DataType::Binary]), 24 | TypeSignature::Exact(vec![DataType::LargeBinary]), 25 | ], 26 | Volatility::Immutable, 27 | ), 28 | aliases: vec!["st_astext".to_string()], 29 | } 30 | } 31 | } 32 | 33 | impl ScalarUDFImpl for AsTextUdf { 34 | fn as_any(&self) -> &dyn Any { 35 | self 36 | } 37 | 38 | fn name(&self) -> &str { 39 | "ST_AsText" 40 | } 41 | 42 | fn signature(&self) -> &Signature { 43 | &self.signature 44 | } 45 | 46 | fn return_type(&self, arg_types: &[DataType]) -> datafusion_common::Result { 47 | match arg_types[0] { 48 | DataType::Binary => Ok(DataType::Utf8), 49 | DataType::LargeBinary => Ok(DataType::LargeUtf8), 50 | _ => unreachable!(), 51 | } 52 | } 53 | 54 | fn invoke(&self, args: &[ColumnarValue]) -> datafusion_common::Result { 55 | let arr = args[0].clone().into_array(1)?; 56 | match args[0].data_type() { 57 | DataType::Binary => { 58 | let wkb_arr = arr.as_binary::(); 59 | 60 | let mut wkt_vec = vec![]; 61 | for i in 0..wkb_arr.geom_len() { 62 | wkt_vec.push(to_wkt::(wkb_arr, i)?); 63 | } 64 | 65 | Ok(ColumnarValue::Array(Arc::new(StringArray::from(wkt_vec)))) 66 | } 67 | DataType::LargeBinary => { 68 | let wkb_arr = arr.as_binary::(); 69 | 70 | let mut wkt_vec = vec![]; 71 | for i in 0..wkb_arr.geom_len() { 72 | wkt_vec.push(to_wkt::(wkb_arr, i)?); 73 | } 74 | 75 | Ok(ColumnarValue::Array(Arc::new(LargeStringArray::from( 76 | wkt_vec, 77 | )))) 78 | } 79 | _ => unreachable!(), 80 | } 81 | } 82 | 83 | fn aliases(&self) -> &[String] { 84 | &self.aliases 85 | } 86 | } 87 | 88 | fn to_wkt( 89 | wkb_arr: &GenericBinaryArray, 90 | geom_index: usize, 91 | ) -> DFResult> { 92 | let geom = { 93 | #[cfg(feature = "geos")] 94 | { 95 | wkb_arr.geos_value(geom_index)? 96 | } 97 | #[cfg(not(feature = "geos"))] 98 | { 99 | wkb_arr.geo_value(geom_index)? 100 | } 101 | }; 102 | let wkt = match geom { 103 | Some(geom) => Some( 104 | geom.to_wkt() 105 | .map_err(|_| internal_datafusion_err!("Failed to convert geometry to wkt"))?, 106 | ), 107 | None => None, 108 | }; 109 | Ok(wkt) 110 | } 111 | 112 | impl Default for AsTextUdf { 113 | fn default() -> Self { 114 | Self::new() 115 | } 116 | } 117 | 118 | #[cfg(test)] 119 | mod tests { 120 | use crate::function::{AsTextUdf, GeomFromTextUdf}; 121 | use arrow::util::pretty::pretty_format_batches; 122 | use datafusion::logical_expr::ScalarUDF; 123 | use datafusion::prelude::SessionContext; 124 | 125 | #[tokio::test] 126 | async fn as_text() { 127 | let ctx = SessionContext::new(); 128 | ctx.register_udf(ScalarUDF::from(GeomFromTextUdf::new())); 129 | ctx.register_udf(ScalarUDF::from(AsTextUdf::new())); 130 | let df = ctx 131 | .sql("select ST_AsText(ST_GeomFromText('POINT(-71.064544 42.28787)'))") 132 | .await 133 | .unwrap(); 134 | assert_eq!( 135 | pretty_format_batches(&df.collect().await.unwrap()) 136 | .unwrap() 137 | .to_string(), 138 | "+----------------------------------------------------------------+ 139 | | ST_AsText(ST_GeomFromText(Utf8(\"POINT(-71.064544 42.28787)\"))) | 140 | +----------------------------------------------------------------+ 141 | | POINT(-71.064544 42.28787) | 142 | +----------------------------------------------------------------+" 143 | ); 144 | } 145 | } 146 | -------------------------------------------------------------------------------- /src/function/boundary.rs: -------------------------------------------------------------------------------- 1 | use crate::geo::{GeometryArray, GeometryArrayBuilder}; 2 | use crate::DFResult; 3 | use arrow_array::cast::AsArray; 4 | use arrow_array::{GenericBinaryArray, OffsetSizeTrait}; 5 | use arrow_schema::DataType; 6 | use datafusion_common::{internal_datafusion_err, DataFusionError}; 7 | use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, TypeSignature, Volatility}; 8 | use geos::Geom; 9 | use geozero::wkb::WkbDialect; 10 | use std::any::Any; 11 | use std::sync::Arc; 12 | 13 | #[derive(Debug)] 14 | pub struct BoundaryUdf { 15 | signature: Signature, 16 | aliases: Vec, 17 | } 18 | 19 | impl BoundaryUdf { 20 | pub fn new() -> Self { 21 | Self { 22 | signature: Signature::one_of( 23 | vec![ 24 | TypeSignature::Exact(vec![DataType::Binary]), 25 | TypeSignature::Exact(vec![DataType::LargeBinary]), 26 | ], 27 | Volatility::Immutable, 28 | ), 29 | aliases: vec!["st_boundary".to_string()], 30 | } 31 | } 32 | } 33 | 34 | impl ScalarUDFImpl for BoundaryUdf { 35 | fn as_any(&self) -> &dyn Any { 36 | self 37 | } 38 | 39 | fn name(&self) -> &str { 40 | "ST_Boundary" 41 | } 42 | 43 | fn signature(&self) -> &Signature { 44 | &self.signature 45 | } 46 | 47 | fn return_type(&self, arg_types: &[DataType]) -> datafusion_common::Result { 48 | Ok(arg_types[0].clone()) 49 | } 50 | 51 | fn invoke(&self, args: &[ColumnarValue]) -> datafusion_common::Result { 52 | let arr = args[0].clone().into_array(1)?; 53 | match args[0].data_type() { 54 | DataType::Binary => { 55 | let wkb_arr = arr.as_binary::(); 56 | build_boundary_arr::(wkb_arr) 57 | } 58 | DataType::LargeBinary => { 59 | let wkb_arr = arr.as_binary::(); 60 | build_boundary_arr::(wkb_arr) 61 | } 62 | _ => unreachable!(), 63 | } 64 | } 65 | 66 | fn aliases(&self) -> &[String] { 67 | &self.aliases 68 | } 69 | } 70 | 71 | fn build_boundary_arr( 72 | wkb_arr: &GenericBinaryArray, 73 | ) -> DFResult { 74 | let mut builder = GeometryArrayBuilder::::new(WkbDialect::Ewkb, wkb_arr.geom_len()); 75 | for i in 0..wkb_arr.geom_len() { 76 | if let Some(geom) = wkb_arr.geos_value(i)? { 77 | builder 78 | .append_geos_geometry(&Some(geom.boundary().map_err(|e| { 79 | internal_datafusion_err!("Failed to call boundary, e: {}", e) 80 | })?))?; 81 | } else { 82 | builder.append_null(); 83 | } 84 | } 85 | 86 | Ok(ColumnarValue::Array(Arc::new(builder.build()))) 87 | } 88 | 89 | impl Default for BoundaryUdf { 90 | fn default() -> Self { 91 | Self::new() 92 | } 93 | } 94 | 95 | #[cfg(test)] 96 | mod tests { 97 | use crate::function::{AsTextUdf, BoundaryUdf, GeomFromTextUdf}; 98 | use arrow::util::pretty::pretty_format_batches; 99 | use datafusion::logical_expr::ScalarUDF; 100 | use datafusion::prelude::SessionContext; 101 | 102 | #[tokio::test] 103 | async fn boundary() { 104 | let ctx = SessionContext::new(); 105 | ctx.register_udf(ScalarUDF::from(GeomFromTextUdf::new())); 106 | ctx.register_udf(ScalarUDF::from(AsTextUdf::new())); 107 | ctx.register_udf(ScalarUDF::from(BoundaryUdf::new())); 108 | let df = ctx 109 | .sql("SELECT ST_AsText(ST_Boundary(ST_GeomFromText('POLYGON((1 1,0 0, -1 1, 1 1))')));") 110 | .await 111 | .unwrap(); 112 | assert_eq!( 113 | pretty_format_batches(&df.collect().await.unwrap()) 114 | .unwrap() 115 | .to_string(), 116 | "+--------------------------------------------------------------------------------+ 117 | | ST_AsText(ST_Boundary(ST_GeomFromText(Utf8(\"POLYGON((1 1,0 0, -1 1, 1 1))\")))) | 118 | +--------------------------------------------------------------------------------+ 119 | | LINESTRING(1 1,0 0,-1 1,1 1) | 120 | +--------------------------------------------------------------------------------+" 121 | ); 122 | } 123 | } 124 | -------------------------------------------------------------------------------- /src/function/box2d.rs: -------------------------------------------------------------------------------- 1 | use crate::geo::{build_box2d_array, Box2d, GeometryArray}; 2 | use arrow_array::cast::AsArray; 3 | use arrow_array::Array; 4 | use arrow_schema::DataType; 5 | use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility}; 6 | use geo::BoundingRect; 7 | use std::any::Any; 8 | use std::sync::Arc; 9 | 10 | #[derive(Debug)] 11 | pub struct Box2dUdf { 12 | signature: Signature, 13 | aliases: Vec, 14 | } 15 | 16 | impl Box2dUdf { 17 | pub fn new() -> Self { 18 | Self { 19 | signature: Signature::uniform( 20 | 1, 21 | vec![DataType::Binary, DataType::LargeBinary], 22 | Volatility::Immutable, 23 | ), 24 | aliases: vec!["box2d".to_string()], 25 | } 26 | } 27 | } 28 | 29 | impl ScalarUDFImpl for Box2dUdf { 30 | fn as_any(&self) -> &dyn Any { 31 | self 32 | } 33 | 34 | fn name(&self) -> &str { 35 | "Box2D" 36 | } 37 | 38 | fn signature(&self) -> &Signature { 39 | &self.signature 40 | } 41 | 42 | fn return_type(&self, _arg_types: &[DataType]) -> datafusion_common::Result { 43 | Ok(Box2d::data_type()) 44 | } 45 | 46 | fn invoke(&self, args: &[ColumnarValue]) -> datafusion_common::Result { 47 | let arr = args[0].clone().into_array(1)?; 48 | match arr.data_type() { 49 | DataType::Binary => { 50 | let wkb_arr = arr.as_binary::(); 51 | let mut box2d_vec: Vec> = vec![]; 52 | for i in 0..wkb_arr.geom_len() { 53 | box2d_vec.push( 54 | wkb_arr 55 | .geo_value(i)? 56 | .and_then(|geom| geom.bounding_rect().map(Box2d::from)), 57 | ); 58 | } 59 | let arr = build_box2d_array(box2d_vec); 60 | Ok(ColumnarValue::Array(Arc::new(arr))) 61 | } 62 | DataType::LargeBinary => { 63 | let wkb_arr = arr.as_binary::(); 64 | let mut box2d_vec: Vec> = vec![]; 65 | for i in 0..wkb_arr.geom_len() { 66 | box2d_vec.push( 67 | wkb_arr 68 | .geo_value(i)? 69 | .and_then(|geom| geom.bounding_rect().map(Box2d::from)), 70 | ); 71 | } 72 | let arr = build_box2d_array(box2d_vec); 73 | Ok(ColumnarValue::Array(Arc::new(arr))) 74 | } 75 | _ => unreachable!(), 76 | } 77 | } 78 | 79 | fn aliases(&self) -> &[String] { 80 | &self.aliases 81 | } 82 | } 83 | 84 | impl Default for Box2dUdf { 85 | fn default() -> Self { 86 | Self::new() 87 | } 88 | } 89 | 90 | #[cfg(test)] 91 | mod tests { 92 | use crate::function::box2d::Box2dUdf; 93 | use crate::function::GeomFromTextUdf; 94 | use arrow::util::pretty::pretty_format_batches; 95 | use datafusion::logical_expr::ScalarUDF; 96 | use datafusion::prelude::SessionContext; 97 | 98 | #[tokio::test] 99 | async fn box2d() { 100 | let ctx = SessionContext::new(); 101 | ctx.register_udf(ScalarUDF::from(GeomFromTextUdf::new())); 102 | ctx.register_udf(ScalarUDF::from(Box2dUdf::new())); 103 | let df = ctx 104 | .sql("select Box2D(ST_GeomFromText('LINESTRING(1 2, 3 4, 5 6)'))") 105 | .await 106 | .unwrap(); 107 | assert_eq!( 108 | pretty_format_batches(&df.collect().await.unwrap()) 109 | .unwrap() 110 | .to_string(), 111 | "+-----------------------------------------------------------+ 112 | | Box2D(ST_GeomFromText(Utf8(\"LINESTRING(1 2, 3 4, 5 6)\"))) | 113 | +-----------------------------------------------------------+ 114 | | {xmin: 1.0, ymin: 2.0, xmax: 5.0, ymax: 6.0} | 115 | +-----------------------------------------------------------+" 116 | ); 117 | } 118 | } 119 | -------------------------------------------------------------------------------- /src/function/buffer.rs: -------------------------------------------------------------------------------- 1 | use crate::geo::{GeometryArray, GeometryArrayBuilder}; 2 | use crate::DFResult; 3 | use arrow_array::cast::AsArray; 4 | use arrow_array::{GenericBinaryArray, OffsetSizeTrait}; 5 | use arrow_schema::DataType; 6 | use datafusion_common::{internal_datafusion_err, internal_err, DataFusionError, ScalarValue}; 7 | use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, TypeSignature, Volatility}; 8 | use geos::Geom; 9 | use geozero::wkb::WkbDialect; 10 | use std::any::Any; 11 | use std::sync::Arc; 12 | 13 | #[derive(Debug)] 14 | pub struct BufferUdf { 15 | signature: Signature, 16 | aliases: Vec, 17 | } 18 | 19 | impl BufferUdf { 20 | pub fn new() -> Self { 21 | Self { 22 | signature: Signature::one_of( 23 | vec![ 24 | TypeSignature::Exact(vec![ 25 | DataType::Binary, 26 | DataType::Float64, 27 | DataType::Int32, 28 | ]), 29 | TypeSignature::Exact(vec![ 30 | DataType::LargeBinary, 31 | DataType::Float64, 32 | DataType::Int32, 33 | ]), 34 | ], 35 | Volatility::Immutable, 36 | ), 37 | aliases: vec!["st_buffer".to_string()], 38 | } 39 | } 40 | } 41 | 42 | impl ScalarUDFImpl for BufferUdf { 43 | fn as_any(&self) -> &dyn Any { 44 | self 45 | } 46 | 47 | fn name(&self) -> &str { 48 | "ST_Buffer" 49 | } 50 | 51 | fn signature(&self) -> &Signature { 52 | &self.signature 53 | } 54 | 55 | fn return_type(&self, arg_types: &[DataType]) -> datafusion_common::Result { 56 | Ok(arg_types[0].clone()) 57 | } 58 | 59 | fn invoke(&self, args: &[ColumnarValue]) -> datafusion_common::Result { 60 | let arr = args[0].clone().into_array(1)?; 61 | let ColumnarValue::Scalar(ScalarValue::Float64(Some(width))) = args[1] else { 62 | return internal_err!("The second arg should be f64 scalar"); 63 | }; 64 | let ColumnarValue::Scalar(ScalarValue::Int32(Some(quadsegs))) = args[2] else { 65 | return internal_err!("The third arg should be i32 scalar"); 66 | }; 67 | 68 | match args[0].data_type() { 69 | DataType::Binary => { 70 | let wkb_arr = arr.as_binary::(); 71 | build_buffer_arr(wkb_arr, width, quadsegs) 72 | } 73 | DataType::LargeBinary => { 74 | let wkb_arr = arr.as_binary::(); 75 | build_buffer_arr(wkb_arr, width, quadsegs) 76 | } 77 | _ => unreachable!(), 78 | } 79 | } 80 | 81 | fn aliases(&self) -> &[String] { 82 | &self.aliases 83 | } 84 | } 85 | 86 | fn build_buffer_arr( 87 | wkb_arr: &GenericBinaryArray, 88 | width: f64, 89 | quadsegs: i32, 90 | ) -> DFResult { 91 | let mut builder = GeometryArrayBuilder::::new(WkbDialect::Ewkb, wkb_arr.geom_len()); 92 | for i in 0..wkb_arr.geom_len() { 93 | if let Some(geom) = wkb_arr.geos_value(i)? { 94 | builder.append_geos_geometry(&Some( 95 | geom.buffer(width, quadsegs) 96 | .map_err(|e| internal_datafusion_err!("Failed to call buffer, e: {}", e))?, 97 | ))?; 98 | } else { 99 | builder.append_null(); 100 | } 101 | } 102 | 103 | Ok(ColumnarValue::Array(Arc::new(builder.build()))) 104 | } 105 | 106 | impl Default for BufferUdf { 107 | fn default() -> Self { 108 | Self::new() 109 | } 110 | } 111 | 112 | #[cfg(test)] 113 | mod tests { 114 | use crate::function::{AsTextUdf, BufferUdf, GeomFromTextUdf}; 115 | use arrow::util::pretty::pretty_format_batches; 116 | use datafusion::logical_expr::ScalarUDF; 117 | use datafusion::prelude::SessionContext; 118 | 119 | #[tokio::test] 120 | async fn buffer() { 121 | let ctx = SessionContext::new(); 122 | ctx.register_udf(ScalarUDF::from(GeomFromTextUdf::new())); 123 | ctx.register_udf(ScalarUDF::from(AsTextUdf::new())); 124 | ctx.register_udf(ScalarUDF::from(BufferUdf::new())); 125 | let df = ctx 126 | .sql("SELECT ST_AsText(ST_Buffer(ST_GeomFromText('POINT(100 90)'), 50.0, 2::Integer));") 127 | .await 128 | .unwrap(); 129 | assert_eq!( 130 | pretty_format_batches(&df.collect().await.unwrap()) 131 | .unwrap() 132 | .to_string(), 133 | "+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ 134 | | ST_AsText(ST_Buffer(ST_GeomFromText(Utf8(\"POINT(100 90)\")),Float64(50),Int64(2))) | 135 | +-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ 136 | | POLYGON((150 90,135.35533905932738 54.64466094067263,100 40,64.64466094067262 54.64466094067262,50 90,64.64466094067262 125.35533905932738,99.99999999999999 140,135.35533905932738 125.35533905932738,150 90)) | 137 | +-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+" 138 | ); 139 | } 140 | } 141 | -------------------------------------------------------------------------------- /src/function/covered_by.rs: -------------------------------------------------------------------------------- 1 | use crate::geo::GeometryArray; 2 | use crate::DFResult; 3 | use arrow_array::cast::AsArray; 4 | use arrow_array::{BooleanArray, GenericBinaryArray, OffsetSizeTrait}; 5 | use arrow_schema::DataType; 6 | use datafusion_common::{internal_datafusion_err, internal_err, DataFusionError}; 7 | use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility}; 8 | use geos::Geom; 9 | use rayon::prelude::*; 10 | use std::any::Any; 11 | use std::sync::Arc; 12 | 13 | #[derive(Debug)] 14 | pub struct CoveredByUdf { 15 | signature: Signature, 16 | aliases: Vec, 17 | } 18 | 19 | impl CoveredByUdf { 20 | pub fn new() -> Self { 21 | Self { 22 | signature: Signature::uniform( 23 | 2, 24 | vec![DataType::Binary, DataType::LargeBinary], 25 | Volatility::Immutable, 26 | ), 27 | aliases: vec!["st_coveredby".to_string()], 28 | } 29 | } 30 | } 31 | 32 | impl ScalarUDFImpl for CoveredByUdf { 33 | fn as_any(&self) -> &dyn Any { 34 | self 35 | } 36 | 37 | fn name(&self) -> &str { 38 | "ST_CoveredBy" 39 | } 40 | 41 | fn signature(&self) -> &Signature { 42 | &self.signature 43 | } 44 | 45 | fn return_type(&self, _arg_types: &[DataType]) -> datafusion_common::Result { 46 | Ok(DataType::Boolean) 47 | } 48 | 49 | fn invoke(&self, args: &[ColumnarValue]) -> datafusion_common::Result { 50 | let (arr0, arr1) = match (args[0].clone(), args[1].clone()) { 51 | (ColumnarValue::Array(arr0), ColumnarValue::Array(arr1)) => (arr0, arr1), 52 | (ColumnarValue::Array(arr0), ColumnarValue::Scalar(scalar)) => { 53 | (arr0.clone(), scalar.to_array_of_size(arr0.len())?) 54 | } 55 | (ColumnarValue::Scalar(scalar), ColumnarValue::Array(arr1)) => { 56 | (scalar.to_array_of_size(arr1.len())?, arr1) 57 | } 58 | (ColumnarValue::Scalar(scalar0), ColumnarValue::Scalar(scalar1)) => { 59 | (scalar0.to_array_of_size(1)?, scalar1.to_array_of_size(1)?) 60 | } 61 | }; 62 | if arr0.len() != arr1.len() { 63 | return internal_err!("Two arrays length is not same"); 64 | } 65 | 66 | match (arr0.data_type(), arr1.data_type()) { 67 | (DataType::Binary, DataType::Binary) => { 68 | let arr0 = arr0.as_binary::(); 69 | let arr1 = arr1.as_binary::(); 70 | covered_by::(arr0, arr1) 71 | } 72 | (DataType::LargeBinary, DataType::Binary) => { 73 | let arr0 = arr0.as_binary::(); 74 | let arr1 = arr1.as_binary::(); 75 | covered_by::(arr0, arr1) 76 | } 77 | (DataType::Binary, DataType::LargeBinary) => { 78 | let arr0 = arr0.as_binary::(); 79 | let arr1 = arr1.as_binary::(); 80 | covered_by::(arr0, arr1) 81 | } 82 | (DataType::LargeBinary, DataType::LargeBinary) => { 83 | let arr0 = arr0.as_binary::(); 84 | let arr1 = arr1.as_binary::(); 85 | covered_by::(arr0, arr1) 86 | } 87 | _ => unreachable!(), 88 | } 89 | } 90 | 91 | fn aliases(&self) -> &[String] { 92 | &self.aliases 93 | } 94 | } 95 | 96 | impl Default for CoveredByUdf { 97 | fn default() -> Self { 98 | Self::new() 99 | } 100 | } 101 | 102 | fn covered_by( 103 | arr0: &GenericBinaryArray, 104 | arr1: &GenericBinaryArray, 105 | ) -> DFResult { 106 | let bool_vec = (0..arr0.geom_len()) 107 | .into_par_iter() 108 | .map( 109 | |geom_index| match (arr0.geos_value(geom_index)?, arr1.geos_value(geom_index)?) { 110 | (Some(geom0), Some(geom1)) => { 111 | let result = geom0.covered_by(&geom1).map_err(|e| { 112 | internal_datafusion_err!("Failed to do covered_by, error: {}", e) 113 | })?; 114 | Ok(Some(result)) 115 | } 116 | _ => Ok(None), 117 | }, 118 | ) 119 | .collect::>>>()?; 120 | Ok(ColumnarValue::Array(Arc::new(BooleanArray::from(bool_vec)))) 121 | } 122 | 123 | #[cfg(test)] 124 | mod tests { 125 | use crate::function::{CoveredByUdf, GeomFromTextUdf}; 126 | use arrow::util::pretty::pretty_format_batches; 127 | use datafusion::prelude::SessionContext; 128 | use datafusion_expr::ScalarUDF; 129 | 130 | #[tokio::test] 131 | async fn covered_by() { 132 | let ctx = SessionContext::new(); 133 | ctx.register_udf(ScalarUDF::from(GeomFromTextUdf::new())); 134 | ctx.register_udf(ScalarUDF::from(CoveredByUdf::new())); 135 | let df = ctx 136 | .sql("select ST_CoveredBy(ST_GeomFromText('POINT(1 1)'), ST_GeomFromText('LINESTRING ( 1 1, 0 2 )'))") 137 | .await 138 | .unwrap(); 139 | assert_eq!( 140 | pretty_format_batches(&df.collect().await.unwrap()) 141 | .unwrap() 142 | .to_string(), 143 | "+----------------------------------------------------------------------------------------------------+ 144 | | ST_CoveredBy(ST_GeomFromText(Utf8(\"POINT(1 1)\")),ST_GeomFromText(Utf8(\"LINESTRING ( 1 1, 0 2 )\"))) | 145 | +----------------------------------------------------------------------------------------------------+ 146 | | true | 147 | +----------------------------------------------------------------------------------------------------+" 148 | ); 149 | } 150 | } 151 | -------------------------------------------------------------------------------- /src/function/covers.rs: -------------------------------------------------------------------------------- 1 | use crate::geo::GeometryArray; 2 | use crate::DFResult; 3 | use arrow_array::cast::AsArray; 4 | use arrow_array::{BooleanArray, GenericBinaryArray, OffsetSizeTrait}; 5 | use arrow_schema::DataType; 6 | use datafusion_common::{internal_datafusion_err, internal_err, DataFusionError}; 7 | use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility}; 8 | use geos::Geom; 9 | use rayon::iter::IntoParallelIterator; 10 | use rayon::prelude::*; 11 | use std::any::Any; 12 | use std::sync::Arc; 13 | 14 | #[derive(Debug)] 15 | pub struct CoversUdf { 16 | signature: Signature, 17 | aliases: Vec, 18 | } 19 | 20 | impl CoversUdf { 21 | pub fn new() -> Self { 22 | Self { 23 | signature: Signature::uniform( 24 | 2, 25 | vec![DataType::Binary, DataType::LargeBinary], 26 | Volatility::Immutable, 27 | ), 28 | aliases: vec!["st_covers".to_string()], 29 | } 30 | } 31 | } 32 | 33 | impl ScalarUDFImpl for CoversUdf { 34 | fn as_any(&self) -> &dyn Any { 35 | self 36 | } 37 | 38 | fn name(&self) -> &str { 39 | "ST_Covers" 40 | } 41 | 42 | fn signature(&self) -> &Signature { 43 | &self.signature 44 | } 45 | 46 | fn return_type(&self, _arg_types: &[DataType]) -> datafusion_common::Result { 47 | Ok(DataType::Boolean) 48 | } 49 | 50 | fn invoke(&self, args: &[ColumnarValue]) -> datafusion_common::Result { 51 | let (arr0, arr1) = match (args[0].clone(), args[1].clone()) { 52 | (ColumnarValue::Array(arr0), ColumnarValue::Array(arr1)) => (arr0, arr1), 53 | (ColumnarValue::Array(arr0), ColumnarValue::Scalar(scalar)) => { 54 | (arr0.clone(), scalar.to_array_of_size(arr0.len())?) 55 | } 56 | (ColumnarValue::Scalar(scalar), ColumnarValue::Array(arr1)) => { 57 | (scalar.to_array_of_size(arr1.len())?, arr1) 58 | } 59 | (ColumnarValue::Scalar(scalar0), ColumnarValue::Scalar(scalar1)) => { 60 | (scalar0.to_array_of_size(1)?, scalar1.to_array_of_size(1)?) 61 | } 62 | }; 63 | if arr0.len() != arr1.len() { 64 | return internal_err!("Two arrays length is not same"); 65 | } 66 | 67 | match (arr0.data_type(), arr1.data_type()) { 68 | (DataType::Binary, DataType::Binary) => { 69 | let arr0 = arr0.as_binary::(); 70 | let arr1 = arr1.as_binary::(); 71 | covers::(arr0, arr1) 72 | } 73 | (DataType::LargeBinary, DataType::Binary) => { 74 | let arr0 = arr0.as_binary::(); 75 | let arr1 = arr1.as_binary::(); 76 | covers::(arr0, arr1) 77 | } 78 | (DataType::Binary, DataType::LargeBinary) => { 79 | let arr0 = arr0.as_binary::(); 80 | let arr1 = arr1.as_binary::(); 81 | covers::(arr0, arr1) 82 | } 83 | (DataType::LargeBinary, DataType::LargeBinary) => { 84 | let arr0 = arr0.as_binary::(); 85 | let arr1 = arr1.as_binary::(); 86 | covers::(arr0, arr1) 87 | } 88 | _ => unreachable!(), 89 | } 90 | } 91 | 92 | fn aliases(&self) -> &[String] { 93 | &self.aliases 94 | } 95 | } 96 | 97 | impl Default for CoversUdf { 98 | fn default() -> Self { 99 | Self::new() 100 | } 101 | } 102 | 103 | fn covers( 104 | arr0: &GenericBinaryArray, 105 | arr1: &GenericBinaryArray, 106 | ) -> DFResult { 107 | let bool_vec = (0..arr0.geom_len()) 108 | .into_par_iter() 109 | .map( 110 | |geom_index| match (arr0.geos_value(geom_index)?, arr1.geos_value(geom_index)?) { 111 | (Some(geom0), Some(geom1)) => { 112 | let result = geom0.covers(&geom1).map_err(|e| { 113 | internal_datafusion_err!("Failed to do covers, error: {}", e) 114 | })?; 115 | Ok(Some(result)) 116 | } 117 | _ => Ok(None), 118 | }, 119 | ) 120 | .collect::>>>()?; 121 | Ok(ColumnarValue::Array(Arc::new(BooleanArray::from(bool_vec)))) 122 | } 123 | 124 | #[cfg(test)] 125 | mod tests { 126 | use crate::function::{CoversUdf, GeomFromTextUdf}; 127 | use arrow::util::pretty::pretty_format_batches; 128 | use datafusion::prelude::SessionContext; 129 | use datafusion_expr::ScalarUDF; 130 | 131 | #[tokio::test] 132 | async fn covers() { 133 | let ctx = SessionContext::new(); 134 | ctx.register_udf(ScalarUDF::from(GeomFromTextUdf::new())); 135 | ctx.register_udf(ScalarUDF::from(CoversUdf::new())); 136 | let df = ctx 137 | .sql("select ST_Covers(ST_GeomFromText('LINESTRING ( 1 1, 0 2 )'), ST_GeomFromText('POINT(1 1)'))") 138 | .await 139 | .unwrap(); 140 | assert_eq!( 141 | pretty_format_batches(&df.collect().await.unwrap()) 142 | .unwrap() 143 | .to_string(), 144 | "+-------------------------------------------------------------------------------------------------+ 145 | | ST_Covers(ST_GeomFromText(Utf8(\"LINESTRING ( 1 1, 0 2 )\")),ST_GeomFromText(Utf8(\"POINT(1 1)\"))) | 146 | +-------------------------------------------------------------------------------------------------+ 147 | | true | 148 | +-------------------------------------------------------------------------------------------------+" 149 | ); 150 | } 151 | } 152 | -------------------------------------------------------------------------------- /src/function/equals.rs: -------------------------------------------------------------------------------- 1 | use crate::geo::GeometryArray; 2 | use crate::DFResult; 3 | use arrow_array::cast::AsArray; 4 | use arrow_array::{BooleanArray, GenericBinaryArray, OffsetSizeTrait}; 5 | use arrow_schema::DataType; 6 | use datafusion_common::{internal_datafusion_err, internal_err, DataFusionError}; 7 | use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility}; 8 | use geos::Geom; 9 | use rayon::iter::IntoParallelIterator; 10 | use rayon::prelude::*; 11 | use std::any::Any; 12 | use std::sync::Arc; 13 | 14 | #[derive(Debug)] 15 | pub struct EqualsUdf { 16 | signature: Signature, 17 | aliases: Vec, 18 | } 19 | 20 | impl EqualsUdf { 21 | pub fn new() -> Self { 22 | Self { 23 | signature: Signature::uniform( 24 | 2, 25 | vec![DataType::Binary, DataType::LargeBinary], 26 | Volatility::Immutable, 27 | ), 28 | aliases: vec!["st_equals".to_string()], 29 | } 30 | } 31 | } 32 | 33 | impl ScalarUDFImpl for EqualsUdf { 34 | fn as_any(&self) -> &dyn Any { 35 | self 36 | } 37 | 38 | fn name(&self) -> &str { 39 | "ST_Equals" 40 | } 41 | 42 | fn signature(&self) -> &Signature { 43 | &self.signature 44 | } 45 | 46 | fn return_type(&self, _arg_types: &[DataType]) -> datafusion_common::Result { 47 | Ok(DataType::Boolean) 48 | } 49 | 50 | fn invoke(&self, args: &[ColumnarValue]) -> datafusion_common::Result { 51 | let (arr0, arr1) = match (args[0].clone(), args[1].clone()) { 52 | (ColumnarValue::Array(arr0), ColumnarValue::Array(arr1)) => (arr0, arr1), 53 | (ColumnarValue::Array(arr0), ColumnarValue::Scalar(scalar)) => { 54 | (arr0.clone(), scalar.to_array_of_size(arr0.len())?) 55 | } 56 | (ColumnarValue::Scalar(scalar), ColumnarValue::Array(arr1)) => { 57 | (scalar.to_array_of_size(arr1.len())?, arr1) 58 | } 59 | (ColumnarValue::Scalar(scalar0), ColumnarValue::Scalar(scalar1)) => { 60 | (scalar0.to_array_of_size(1)?, scalar1.to_array_of_size(1)?) 61 | } 62 | }; 63 | if arr0.len() != arr1.len() { 64 | return internal_err!("Two arrays length is not same"); 65 | } 66 | 67 | match (arr0.data_type(), arr1.data_type()) { 68 | (DataType::Binary, DataType::Binary) => { 69 | let arr0 = arr0.as_binary::(); 70 | let arr1 = arr1.as_binary::(); 71 | equals::(arr0, arr1) 72 | } 73 | (DataType::LargeBinary, DataType::Binary) => { 74 | let arr0 = arr0.as_binary::(); 75 | let arr1 = arr1.as_binary::(); 76 | equals::(arr0, arr1) 77 | } 78 | (DataType::Binary, DataType::LargeBinary) => { 79 | let arr0 = arr0.as_binary::(); 80 | let arr1 = arr1.as_binary::(); 81 | equals::(arr0, arr1) 82 | } 83 | (DataType::LargeBinary, DataType::LargeBinary) => { 84 | let arr0 = arr0.as_binary::(); 85 | let arr1 = arr1.as_binary::(); 86 | equals::(arr0, arr1) 87 | } 88 | _ => unreachable!(), 89 | } 90 | } 91 | 92 | fn aliases(&self) -> &[String] { 93 | &self.aliases 94 | } 95 | } 96 | 97 | impl Default for EqualsUdf { 98 | fn default() -> Self { 99 | Self::new() 100 | } 101 | } 102 | 103 | fn equals( 104 | arr0: &GenericBinaryArray, 105 | arr1: &GenericBinaryArray, 106 | ) -> DFResult { 107 | let bool_vec = (0..arr0.geom_len()) 108 | .into_par_iter() 109 | .map( 110 | |geom_index| match (arr0.geos_value(geom_index)?, arr1.geos_value(geom_index)?) { 111 | (Some(geom0), Some(geom1)) => { 112 | let result = geom0.equals(&geom1).map_err(|e| { 113 | internal_datafusion_err!("Failed to do equals, error: {}", e) 114 | })?; 115 | Ok(Some(result)) 116 | } 117 | _ => Ok(None), 118 | }, 119 | ) 120 | .collect::>>>()?; 121 | Ok(ColumnarValue::Array(Arc::new(BooleanArray::from(bool_vec)))) 122 | } 123 | 124 | #[cfg(test)] 125 | mod tests { 126 | use crate::function::{EqualsUdf, GeomFromTextUdf}; 127 | use arrow::util::pretty::pretty_format_batches; 128 | use datafusion::prelude::SessionContext; 129 | use datafusion_expr::ScalarUDF; 130 | 131 | #[tokio::test] 132 | async fn equals() { 133 | let ctx = SessionContext::new(); 134 | ctx.register_udf(ScalarUDF::from(GeomFromTextUdf::new())); 135 | ctx.register_udf(ScalarUDF::from(EqualsUdf::new())); 136 | let df = ctx 137 | .sql("SELECT ST_Equals(ST_GeomFromText('LINESTRING(0 0, 10 10)'), ST_GeomFromText('LINESTRING(0 0, 5 5, 10 10)'))") 138 | .await 139 | .unwrap(); 140 | assert_eq!( 141 | pretty_format_batches(&df.collect().await.unwrap()) 142 | .unwrap() 143 | .to_string(), 144 | "+-----------------------------------------------------------------------------------------------------------------+ 145 | | ST_Equals(ST_GeomFromText(Utf8(\"LINESTRING(0 0, 10 10)\")),ST_GeomFromText(Utf8(\"LINESTRING(0 0, 5 5, 10 10)\"))) | 146 | +-----------------------------------------------------------------------------------------------------------------+ 147 | | true | 148 | +-----------------------------------------------------------------------------------------------------------------+" 149 | ); 150 | } 151 | } 152 | -------------------------------------------------------------------------------- /src/function/extent.rs: -------------------------------------------------------------------------------- 1 | use crate::geo::{Box2d, GeometryArray}; 2 | use crate::DFResult; 3 | use arrow_array::cast::AsArray; 4 | use arrow_array::{Array, ArrayRef, GenericBinaryArray, OffsetSizeTrait}; 5 | use arrow_schema::DataType; 6 | use datafusion_common::ScalarValue; 7 | use datafusion_expr::{Accumulator, AggregateUDFImpl, Signature, Volatility}; 8 | use geo::BoundingRect; 9 | use std::any::Any; 10 | 11 | // TODO add aliases after datafusion 37.0 released 12 | #[derive(Debug)] 13 | pub struct ExtentUdaf { 14 | signature: Signature, 15 | } 16 | 17 | impl ExtentUdaf { 18 | pub fn new() -> Self { 19 | Self { 20 | signature: Signature::uniform( 21 | 1, 22 | vec![DataType::Binary, DataType::LargeBinary], 23 | Volatility::Immutable, 24 | ), 25 | } 26 | } 27 | } 28 | 29 | impl AggregateUDFImpl for ExtentUdaf { 30 | fn as_any(&self) -> &dyn Any { 31 | self 32 | } 33 | 34 | fn name(&self) -> &str { 35 | // uadf not support alias 36 | "st_extent" 37 | } 38 | 39 | fn signature(&self) -> &Signature { 40 | &self.signature 41 | } 42 | 43 | fn return_type(&self, _arg_types: &[DataType]) -> datafusion_common::Result { 44 | Ok(Box2d::data_type()) 45 | } 46 | 47 | fn accumulator(&self, _arg: &DataType) -> datafusion_common::Result> { 48 | Ok(Box::new(ExtentAccumulator::new())) 49 | } 50 | 51 | fn state_type(&self, _return_type: &DataType) -> datafusion_common::Result> { 52 | Ok(vec![Box2d::data_type()]) 53 | } 54 | } 55 | 56 | impl Default for ExtentUdaf { 57 | fn default() -> Self { 58 | Self::new() 59 | } 60 | } 61 | 62 | #[derive(Debug)] 63 | pub struct ExtentAccumulator { 64 | box2d: Box2d, 65 | } 66 | 67 | impl ExtentAccumulator { 68 | pub fn new() -> Self { 69 | Self { 70 | box2d: Box2d::new(), 71 | } 72 | } 73 | } 74 | 75 | impl Accumulator for ExtentAccumulator { 76 | fn update_batch(&mut self, values: &[ArrayRef]) -> datafusion_common::Result<()> { 77 | if values.is_empty() { 78 | return Ok(()); 79 | } 80 | let arr = &values[0]; 81 | match arr.data_type() { 82 | DataType::Binary => { 83 | let wkb_arr = arr.as_binary::(); 84 | let box2d = compute_extent::(wkb_arr)?; 85 | self.box2d = compute_bounding_box2d(self.box2d.clone(), box2d); 86 | } 87 | DataType::LargeBinary => { 88 | let wkb_arr = arr.as_binary::(); 89 | let box2d = compute_extent::(wkb_arr)?; 90 | self.box2d = compute_bounding_box2d(self.box2d.clone(), box2d); 91 | } 92 | _ => unreachable!(), 93 | } 94 | Ok(()) 95 | } 96 | 97 | fn evaluate(&mut self) -> datafusion_common::Result { 98 | Ok(self.box2d.clone().into()) 99 | } 100 | 101 | fn size(&self) -> usize { 102 | std::mem::size_of_val(self) 103 | } 104 | 105 | fn state(&mut self) -> datafusion_common::Result> { 106 | Ok(vec![self.box2d.clone().into()]) 107 | } 108 | 109 | fn merge_batch(&mut self, states: &[ArrayRef]) -> datafusion_common::Result<()> { 110 | if states.is_empty() { 111 | return Ok(()); 112 | } 113 | let arr = &states[0]; 114 | (0..arr.len()).try_for_each(|index| { 115 | let v = states 116 | .iter() 117 | .map(|array| ScalarValue::try_from_array(array, index)) 118 | .collect::>>()?; 119 | if let ScalarValue::Struct(arr) = &v[0] { 120 | if let Some(box2d) = Box2d::value(arr, 0)? { 121 | self.box2d = compute_bounding_box2d(self.box2d.clone(), box2d); 122 | } 123 | } else { 124 | unreachable!("") 125 | } 126 | Ok(()) 127 | }) 128 | } 129 | } 130 | 131 | fn compute_extent(arr: &GenericBinaryArray) -> DFResult { 132 | let mut box2d = Box2d::new(); 133 | for i in 0..arr.geom_len() { 134 | if let Some(value) = arr 135 | .geo_value(i)? 136 | .and_then(|geom| geom.bounding_rect().map(Box2d::from)) 137 | { 138 | box2d = compute_bounding_box2d(box2d, value); 139 | } 140 | } 141 | Ok(box2d) 142 | } 143 | 144 | fn compute_bounding_box2d(b0: Box2d, b1: Box2d) -> Box2d { 145 | let xmin = b0.xmin.min(b1.xmin); 146 | let ymin = b0.ymin.min(b1.ymin); 147 | let xmax = b0.xmax.max(b1.xmax); 148 | let ymax = b0.ymax.max(b1.ymax); 149 | Box2d { 150 | xmin, 151 | ymin, 152 | xmax, 153 | ymax, 154 | } 155 | } 156 | 157 | #[cfg(test)] 158 | mod tests { 159 | use crate::function::extent::ExtentUdaf; 160 | use crate::geo::GeometryArrayBuilder; 161 | use arrow::util::pretty::pretty_format_batches; 162 | use arrow_array::{RecordBatch, StringArray}; 163 | use arrow_schema::{DataType, Field, Schema}; 164 | use datafusion::datasource::MemTable; 165 | use datafusion::prelude::SessionContext; 166 | use datafusion_expr::AggregateUDF; 167 | use geo::line_string; 168 | use std::sync::Arc; 169 | 170 | #[tokio::test] 171 | async fn extent() { 172 | let schema = Arc::new(Schema::new(vec![ 173 | Field::new("geom", DataType::Binary, true), 174 | Field::new("name", DataType::Utf8, true), 175 | ])); 176 | 177 | let mut linestrint_vec = vec![]; 178 | for i in 0..4 { 179 | let i = i as f64; 180 | let linestring = line_string![ 181 | (x: i, y: i + 1.0), 182 | (x: i + 2.0, y: i + 3.0), 183 | (x: i + 4.0, y: i + 5.0), 184 | ]; 185 | linestrint_vec.push(Some(linestring)); 186 | } 187 | let builder: GeometryArrayBuilder = linestrint_vec.as_slice().into(); 188 | 189 | let record = RecordBatch::try_new( 190 | schema.clone(), 191 | vec![ 192 | Arc::new(builder.build()), 193 | Arc::new(StringArray::from(vec!["a", "a", "b", "b"])), 194 | ], 195 | ) 196 | .unwrap(); 197 | 198 | let mem_table = MemTable::try_new(schema.clone(), vec![vec![record]]).unwrap(); 199 | 200 | let ctx = SessionContext::new(); 201 | ctx.register_table("geom_table", Arc::new(mem_table)) 202 | .unwrap(); 203 | ctx.register_udaf(AggregateUDF::from(ExtentUdaf::new())); 204 | let df = ctx 205 | .sql("select ST_Extent(geom), name from geom_table group by name order by name") 206 | .await 207 | .unwrap(); 208 | assert_eq!( 209 | pretty_format_batches(&df.collect().await.unwrap()) 210 | .unwrap() 211 | .to_string(), 212 | "+----------------------------------------------+------+ 213 | | st_extent(geom_table.geom) | name | 214 | +----------------------------------------------+------+ 215 | | {xmin: 0.0, ymin: 1.0, xmax: 5.0, ymax: 6.0} | a | 216 | | {xmin: 2.0, ymin: 3.0, xmax: 7.0, ymax: 8.0} | b | 217 | +----------------------------------------------+------+" 218 | ); 219 | } 220 | } 221 | -------------------------------------------------------------------------------- /src/function/geom_from_text.rs: -------------------------------------------------------------------------------- 1 | use crate::geo::GeometryArrayBuilder; 2 | use arrow_array::cast::AsArray; 3 | use arrow_schema::DataType; 4 | use datafusion_common::ScalarValue; 5 | use datafusion_common::{internal_datafusion_err, internal_err, DataFusionError}; 6 | use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, TypeSignature, Volatility}; 7 | use geozero::wkb::WkbDialect; 8 | use geozero::{GeozeroGeometry, ToWkb}; 9 | use std::any::Any; 10 | use std::sync::Arc; 11 | 12 | #[derive(Debug)] 13 | pub struct GeomFromTextUdf { 14 | signature: Signature, 15 | aliases: Vec, 16 | } 17 | 18 | impl GeomFromTextUdf { 19 | pub fn new() -> Self { 20 | Self { 21 | signature: Signature::one_of( 22 | vec![ 23 | TypeSignature::Exact(vec![DataType::Utf8]), 24 | TypeSignature::Exact(vec![DataType::Utf8, DataType::Int64]), 25 | ], 26 | Volatility::Immutable, 27 | ), 28 | aliases: vec!["st_geomfromtext".to_string()], 29 | } 30 | } 31 | } 32 | 33 | impl ScalarUDFImpl for GeomFromTextUdf { 34 | fn as_any(&self) -> &dyn Any { 35 | self 36 | } 37 | 38 | fn name(&self) -> &str { 39 | "ST_GeomFromText" 40 | } 41 | 42 | fn signature(&self) -> &Signature { 43 | &self.signature 44 | } 45 | 46 | fn return_type(&self, _arg_types: &[DataType]) -> datafusion_common::Result { 47 | Ok(DataType::Binary) 48 | } 49 | 50 | fn invoke(&self, args: &[ColumnarValue]) -> datafusion_common::Result { 51 | let srid = if args.len() == 2 { 52 | let ColumnarValue::Scalar(ScalarValue::Int64(Some(srid))) = &args[1] else { 53 | return internal_err!("The second arg should be int64"); 54 | }; 55 | Some(*srid as i32) 56 | } else { 57 | None 58 | }; 59 | let arr = args[0].clone().into_array(1)?; 60 | let string_arr = arr.as_string::(); 61 | 62 | let mut builder = GeometryArrayBuilder::::new(WkbDialect::Ewkb, 1); 63 | for value in string_arr.iter() { 64 | match value { 65 | None => builder.append_null(), 66 | Some(data) => { 67 | let wkt = geozero::wkt::Wkt(data); 68 | let ewkb = wkt.to_ewkb(wkt.dims(), srid).map_err(|e| { 69 | internal_datafusion_err!("Failed to convert wkt to ewkb, error: {}", e) 70 | })?; 71 | builder.append_wkb(Some(&ewkb))?; 72 | } 73 | } 74 | } 75 | Ok(ColumnarValue::Array(Arc::new(builder.build()))) 76 | } 77 | 78 | fn aliases(&self) -> &[String] { 79 | &self.aliases 80 | } 81 | } 82 | 83 | impl Default for GeomFromTextUdf { 84 | fn default() -> Self { 85 | Self::new() 86 | } 87 | } 88 | 89 | #[cfg(test)] 90 | mod tests { 91 | use crate::function::{AsTextUdf, GeomFromTextUdf}; 92 | use arrow::util::pretty::pretty_format_batches; 93 | use datafusion::logical_expr::ScalarUDF; 94 | use datafusion::prelude::SessionContext; 95 | 96 | #[tokio::test] 97 | async fn geom_from_text() { 98 | let ctx = SessionContext::new(); 99 | ctx.register_udf(ScalarUDF::from(GeomFromTextUdf::new())); 100 | ctx.register_udf(ScalarUDF::from(AsTextUdf::new())); 101 | let df = ctx 102 | .sql("select ST_AsText(ST_GeomFromText('POINT(-71.064544 42.28787)'))") 103 | .await 104 | .unwrap(); 105 | assert_eq!( 106 | pretty_format_batches(&df.collect().await.unwrap()) 107 | .unwrap() 108 | .to_string(), 109 | "+----------------------------------------------------------------+ 110 | | ST_AsText(ST_GeomFromText(Utf8(\"POINT(-71.064544 42.28787)\"))) | 111 | +----------------------------------------------------------------+ 112 | | POINT(-71.064544 42.28787) | 113 | +----------------------------------------------------------------+" 114 | ); 115 | } 116 | 117 | #[cfg(feature = "geos")] 118 | #[tokio::test] 119 | async fn geom_from_text_with_srid() { 120 | let ctx = SessionContext::new(); 121 | ctx.register_udf(ScalarUDF::from(GeomFromTextUdf::new())); 122 | ctx.register_udf(ScalarUDF::from(crate::function::AsEwktUdf::new())); 123 | let df = ctx 124 | .sql("select ST_AsEWKT(ST_GeomFromText('POINT(-71.064544 42.28787)', 4269))") 125 | .await 126 | .unwrap(); 127 | assert_eq!( 128 | pretty_format_batches(&df.collect().await.unwrap()) 129 | .unwrap() 130 | .to_string(), 131 | "+----------------------------------------------------------------------------+ 132 | | ST_AsEWKT(ST_GeomFromText(Utf8(\"POINT(-71.064544 42.28787)\"),Int64(4269))) | 133 | +----------------------------------------------------------------------------+ 134 | | SRID=4269;POINT(-71.064544 42.28787) | 135 | +----------------------------------------------------------------------------+" 136 | ); 137 | } 138 | } 139 | -------------------------------------------------------------------------------- /src/function/geom_from_wkb.rs: -------------------------------------------------------------------------------- 1 | use crate::geo::GeometryArrayBuilder; 2 | use arrow_array::cast::AsArray; 3 | use arrow_schema::DataType; 4 | use datafusion_common::ScalarValue; 5 | use datafusion_common::{internal_datafusion_err, internal_err, DataFusionError}; 6 | use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, TypeSignature, Volatility}; 7 | use geozero::wkb::WkbDialect; 8 | use geozero::{GeozeroGeometry, ToWkb}; 9 | use std::any::Any; 10 | use std::sync::Arc; 11 | 12 | #[derive(Debug)] 13 | pub struct GeomFromWkbUdf { 14 | signature: Signature, 15 | aliases: Vec, 16 | } 17 | 18 | impl GeomFromWkbUdf { 19 | pub fn new() -> Self { 20 | Self { 21 | signature: Signature::one_of( 22 | vec![ 23 | TypeSignature::Exact(vec![DataType::Binary]), 24 | TypeSignature::Exact(vec![DataType::Binary, DataType::Int64]), 25 | ], 26 | Volatility::Immutable, 27 | ), 28 | aliases: vec!["st_geomfromwkb".to_string()], 29 | } 30 | } 31 | } 32 | 33 | impl ScalarUDFImpl for GeomFromWkbUdf { 34 | fn as_any(&self) -> &dyn Any { 35 | self 36 | } 37 | 38 | fn name(&self) -> &str { 39 | "ST_GeomFromWKB" 40 | } 41 | 42 | fn signature(&self) -> &Signature { 43 | &self.signature 44 | } 45 | 46 | fn return_type(&self, _arg_types: &[DataType]) -> datafusion_common::Result { 47 | Ok(DataType::Binary) 48 | } 49 | 50 | fn invoke(&self, args: &[ColumnarValue]) -> datafusion_common::Result { 51 | let srid = if args.len() == 2 { 52 | let ColumnarValue::Scalar(ScalarValue::Int64(Some(srid))) = &args[1] else { 53 | return internal_err!("The second arg should be int32"); 54 | }; 55 | Some(*srid as i32) 56 | } else { 57 | None 58 | }; 59 | let arr = args[0].clone().into_array(1)?; 60 | let binary_arr = arr.as_binary::(); 61 | 62 | let mut builder = GeometryArrayBuilder::::new(WkbDialect::Ewkb, 1); 63 | for value in binary_arr.iter() { 64 | match value { 65 | None => builder.append_null(), 66 | Some(data) => { 67 | let wkb = geozero::wkb::Wkb(data); 68 | let ewkb = wkb.to_ewkb(wkb.dims(), srid).map_err(|e| { 69 | internal_datafusion_err!("Failed to convert wkb to ewkb, error: {}", e) 70 | })?; 71 | builder.append_wkb(Some(&ewkb))?; 72 | } 73 | } 74 | } 75 | Ok(ColumnarValue::Array(Arc::new(builder.build()))) 76 | } 77 | 78 | fn aliases(&self) -> &[String] { 79 | &self.aliases 80 | } 81 | } 82 | 83 | impl Default for GeomFromWkbUdf { 84 | fn default() -> Self { 85 | Self::new() 86 | } 87 | } 88 | 89 | #[cfg(test)] 90 | mod tests { 91 | use crate::function::geom_from_wkb::GeomFromWkbUdf; 92 | use crate::function::AsTextUdf; 93 | use arrow::util::pretty::pretty_format_batches; 94 | use datafusion::logical_expr::ScalarUDF; 95 | use datafusion::prelude::SessionContext; 96 | 97 | #[tokio::test] 98 | async fn geom_from_wkb() { 99 | let ctx = SessionContext::new(); 100 | ctx.register_udf(ScalarUDF::from(GeomFromWkbUdf::new())); 101 | ctx.register_udf(ScalarUDF::from(AsTextUdf::new())); 102 | let df = ctx 103 | .sql("select ST_AsText(ST_GeomFromWKB(0x0101000000cb49287d21c451c0f0bf95ecd8244540))") 104 | .await 105 | .unwrap(); 106 | assert_eq!( 107 | pretty_format_batches(&df.collect().await.unwrap()) 108 | .unwrap() 109 | .to_string(), 110 | "+---------------------------------------------------------------------------------------------------------+ 111 | | ST_AsText(ST_GeomFromWKB(Binary(\"1,1,0,0,0,203,73,40,125,33,196,81,192,240,191,149,236,216,36,69,64\"))) | 112 | +---------------------------------------------------------------------------------------------------------+ 113 | | POINT(-71.064544 42.28787) | 114 | +---------------------------------------------------------------------------------------------------------+" 115 | ); 116 | } 117 | 118 | #[cfg(feature = "geos")] 119 | #[tokio::test] 120 | async fn geom_from_wkb_with_srid() { 121 | let ctx = SessionContext::new(); 122 | ctx.register_udf(ScalarUDF::from(GeomFromWkbUdf::new())); 123 | ctx.register_udf(ScalarUDF::from(crate::function::AsEwktUdf::new())); 124 | let df = ctx 125 | .sql("select ST_AsEWKT(ST_GeomFromWKB(0x0101000000cb49287d21c451c0f0bf95ecd8244540, 4269))") 126 | .await 127 | .unwrap(); 128 | assert_eq!( 129 | pretty_format_batches(&df.collect().await.unwrap()) 130 | .unwrap() 131 | .to_string(), 132 | "+---------------------------------------------------------------------------------------------------------------------+ 133 | | ST_AsEWKT(ST_GeomFromWKB(Binary(\"1,1,0,0,0,203,73,40,125,33,196,81,192,240,191,149,236,216,36,69,64\"),Int64(4269))) | 134 | +---------------------------------------------------------------------------------------------------------------------+ 135 | | SRID=4269;POINT(-71.064544 42.28787) | 136 | +---------------------------------------------------------------------------------------------------------------------+" 137 | ); 138 | } 139 | } 140 | -------------------------------------------------------------------------------- /src/function/geometry_type.rs: -------------------------------------------------------------------------------- 1 | use crate::geo::GeometryArray; 2 | use arrow_array::cast::AsArray; 3 | use arrow_array::{Array, StringArray}; 4 | use arrow_schema::DataType; 5 | use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility}; 6 | use std::any::Any; 7 | use std::sync::Arc; 8 | 9 | #[derive(Debug)] 10 | pub struct GeometryTypeUdf { 11 | signature: Signature, 12 | aliases: Vec, 13 | } 14 | 15 | impl GeometryTypeUdf { 16 | pub fn new() -> Self { 17 | Self { 18 | signature: Signature::uniform( 19 | 1, 20 | vec![DataType::Binary, DataType::LargeBinary], 21 | Volatility::Immutable, 22 | ), 23 | aliases: vec!["st_geometrytype".to_string()], 24 | } 25 | } 26 | } 27 | 28 | impl ScalarUDFImpl for GeometryTypeUdf { 29 | fn as_any(&self) -> &dyn Any { 30 | self 31 | } 32 | 33 | fn name(&self) -> &str { 34 | "ST_GeometryType" 35 | } 36 | 37 | fn signature(&self) -> &Signature { 38 | &self.signature 39 | } 40 | 41 | fn return_type(&self, _arg_types: &[DataType]) -> datafusion_common::Result { 42 | Ok(DataType::Utf8) 43 | } 44 | 45 | fn invoke(&self, args: &[ColumnarValue]) -> datafusion_common::Result { 46 | let arr = args[0].clone().into_array(1)?; 47 | match arr.data_type() { 48 | DataType::Binary => { 49 | let wkb_arr = arr.as_binary::(); 50 | let mut type_vec = vec![]; 51 | for i in 0..wkb_arr.geom_len() { 52 | type_vec.push(wkb_arr.geo_value(i)?.map(geometry_type)); 53 | } 54 | Ok(ColumnarValue::Array(Arc::new(StringArray::from(type_vec)))) 55 | } 56 | DataType::LargeBinary => { 57 | let wkb_arr = arr.as_binary::(); 58 | let mut type_vec = vec![]; 59 | for i in 0..wkb_arr.geom_len() { 60 | type_vec.push(wkb_arr.geo_value(i)?.map(geometry_type)); 61 | } 62 | Ok(ColumnarValue::Array(Arc::new(StringArray::from(type_vec)))) 63 | } 64 | _ => unreachable!(), 65 | } 66 | } 67 | 68 | fn aliases(&self) -> &[String] { 69 | &self.aliases 70 | } 71 | } 72 | 73 | impl Default for GeometryTypeUdf { 74 | fn default() -> Self { 75 | Self::new() 76 | } 77 | } 78 | 79 | fn geometry_type(geom: geo::Geometry) -> &'static str { 80 | match geom { 81 | geo::Geometry::Point(_) => "ST_Point", 82 | geo::Geometry::Line(_) => "ST_Line", 83 | geo::Geometry::LineString(_) => "ST_LineString", 84 | geo::Geometry::Polygon(_) => "ST_Polygon", 85 | geo::Geometry::MultiPoint(_) => "ST_MultiPoint", 86 | geo::Geometry::MultiLineString(_) => "ST_MultiLineString", 87 | geo::Geometry::MultiPolygon(_) => "ST_MultiPolygon", 88 | geo::Geometry::GeometryCollection(_) => "ST_GeometryCollection", 89 | geo::Geometry::Rect(_) => "ST_Rect", 90 | geo::Geometry::Triangle(_) => "ST_Triangle", 91 | } 92 | } 93 | 94 | #[cfg(test)] 95 | mod tests { 96 | use crate::function::geometry_type::GeometryTypeUdf; 97 | use crate::function::GeomFromTextUdf; 98 | use arrow::util::pretty::pretty_format_batches; 99 | use datafusion::logical_expr::ScalarUDF; 100 | use datafusion::prelude::SessionContext; 101 | 102 | #[tokio::test] 103 | async fn geometry_type() { 104 | let ctx = SessionContext::new(); 105 | ctx.register_udf(ScalarUDF::from(GeomFromTextUdf::new())); 106 | ctx.register_udf(ScalarUDF::from(GeometryTypeUdf::new())); 107 | let df = ctx 108 | .sql("select ST_GeometryType(ST_GeomFromText('POINT(1 1)'))") 109 | .await 110 | .unwrap(); 111 | assert_eq!( 112 | pretty_format_batches(&df.collect().await.unwrap()) 113 | .unwrap() 114 | .to_string(), 115 | "+------------------------------------------------------+ 116 | | ST_GeometryType(ST_GeomFromText(Utf8(\"POINT(1 1)\"))) | 117 | +------------------------------------------------------+ 118 | | ST_Point | 119 | +------------------------------------------------------+" 120 | ); 121 | } 122 | } 123 | -------------------------------------------------------------------------------- /src/function/intersects.rs: -------------------------------------------------------------------------------- 1 | use crate::geo::GeometryArray; 2 | use crate::DFResult; 3 | use arrow_array::cast::AsArray; 4 | use arrow_array::{Array, BooleanArray, GenericBinaryArray, OffsetSizeTrait}; 5 | use arrow_schema::DataType; 6 | use datafusion_common::{internal_err, DataFusionError}; 7 | use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility}; 8 | use rayon::prelude::*; 9 | use std::any::Any; 10 | use std::sync::Arc; 11 | 12 | #[derive(Debug)] 13 | pub struct IntersectsUdf { 14 | signature: Signature, 15 | aliases: Vec, 16 | } 17 | 18 | impl IntersectsUdf { 19 | pub fn new() -> Self { 20 | Self { 21 | signature: Signature::uniform( 22 | 2, 23 | vec![DataType::Binary, DataType::LargeBinary], 24 | Volatility::Immutable, 25 | ), 26 | aliases: vec!["st_intersects".to_string()], 27 | } 28 | } 29 | } 30 | 31 | impl ScalarUDFImpl for IntersectsUdf { 32 | fn as_any(&self) -> &dyn Any { 33 | self 34 | } 35 | 36 | fn name(&self) -> &str { 37 | "ST_Intersects" 38 | } 39 | 40 | fn signature(&self) -> &Signature { 41 | &self.signature 42 | } 43 | 44 | fn return_type(&self, _arg_types: &[DataType]) -> datafusion_common::Result { 45 | Ok(DataType::Boolean) 46 | } 47 | 48 | fn invoke(&self, args: &[ColumnarValue]) -> datafusion_common::Result { 49 | let (arr0, arr1) = match (args[0].clone(), args[1].clone()) { 50 | (ColumnarValue::Array(arr0), ColumnarValue::Array(arr1)) => (arr0, arr1), 51 | (ColumnarValue::Array(arr0), ColumnarValue::Scalar(scalar)) => { 52 | (arr0.clone(), scalar.to_array_of_size(arr0.len())?) 53 | } 54 | (ColumnarValue::Scalar(scalar), ColumnarValue::Array(arr1)) => { 55 | (scalar.to_array_of_size(arr1.len())?, arr1) 56 | } 57 | (ColumnarValue::Scalar(scalar0), ColumnarValue::Scalar(scalar1)) => { 58 | (scalar0.to_array_of_size(1)?, scalar1.to_array_of_size(1)?) 59 | } 60 | }; 61 | if arr0.len() != arr1.len() { 62 | return internal_err!("Two arrays length is not same"); 63 | } 64 | match (arr0.data_type(), arr1.data_type()) { 65 | (DataType::Binary, DataType::Binary) => { 66 | let arr0 = arr0.as_binary::(); 67 | let arr1 = arr1.as_binary::(); 68 | intersects::(arr0, arr1) 69 | } 70 | (DataType::LargeBinary, DataType::Binary) => { 71 | let arr0 = arr0.as_binary::(); 72 | let arr1 = arr1.as_binary::(); 73 | intersects::(arr0, arr1) 74 | } 75 | (DataType::Binary, DataType::LargeBinary) => { 76 | let arr0 = arr0.as_binary::(); 77 | let arr1 = arr1.as_binary::(); 78 | intersects::(arr0, arr1) 79 | } 80 | (DataType::LargeBinary, DataType::LargeBinary) => { 81 | let arr0 = arr0.as_binary::(); 82 | let arr1 = arr1.as_binary::(); 83 | intersects::(arr0, arr1) 84 | } 85 | _ => unreachable!(), 86 | } 87 | } 88 | 89 | fn aliases(&self) -> &[String] { 90 | &self.aliases 91 | } 92 | } 93 | 94 | impl Default for IntersectsUdf { 95 | fn default() -> Self { 96 | Self::new() 97 | } 98 | } 99 | 100 | fn intersects( 101 | arr0: &GenericBinaryArray, 102 | arr1: &GenericBinaryArray, 103 | ) -> DFResult { 104 | let bool_vec = (0..arr0.geom_len()) 105 | .into_par_iter() 106 | .map(|geom_index| { 107 | #[cfg(feature = "geos")] 108 | { 109 | use datafusion_common::internal_datafusion_err; 110 | use geos::Geom; 111 | match (arr0.geos_value(geom_index)?, arr1.geos_value(geom_index)?) { 112 | (Some(geom0), Some(geom1)) => { 113 | let result = geom0.intersects(&geom1).map_err(|e| { 114 | internal_datafusion_err!("Failed to do intersects, error: {}", e) 115 | })?; 116 | Ok(Some(result)) 117 | } 118 | _ => Ok(None), 119 | } 120 | } 121 | #[cfg(not(feature = "geos"))] 122 | { 123 | use geo::Intersects; 124 | match (arr0.geo_value(geom_index)?, arr1.geo_value(geom_index)?) { 125 | (Some(geom0), Some(geom1)) => Ok(Some(geom0.intersects(&geom1))), 126 | _ => Ok(None), 127 | } 128 | } 129 | }) 130 | .collect::>>>()?; 131 | Ok(ColumnarValue::Array(Arc::new(BooleanArray::from(bool_vec)))) 132 | } 133 | 134 | #[cfg(test)] 135 | mod tests { 136 | use crate::function::{GeomFromTextUdf, IntersectsUdf}; 137 | use crate::geo::GeometryArrayBuilder; 138 | use arrow::util::pretty::pretty_format_batches; 139 | use arrow_array::RecordBatch; 140 | use arrow_schema::{DataType, Field, Schema}; 141 | use datafusion::datasource::MemTable; 142 | use datafusion::logical_expr::ScalarUDF; 143 | use datafusion::prelude::SessionContext; 144 | use geo::line_string; 145 | use std::sync::Arc; 146 | 147 | #[tokio::test] 148 | async fn intersects() { 149 | let ctx = SessionContext::new(); 150 | ctx.register_udf(ScalarUDF::from(GeomFromTextUdf::new())); 151 | ctx.register_udf(ScalarUDF::from(IntersectsUdf::new())); 152 | let df = ctx 153 | .sql("select ST_Intersects(ST_GeomFromText('POINT(1 1)'), ST_GeomFromText('LINESTRING ( 1 1, 0 2 )'))") 154 | .await 155 | .unwrap(); 156 | assert_eq!( 157 | pretty_format_batches(&df.collect().await.unwrap()) 158 | .unwrap() 159 | .to_string(), 160 | "+-----------------------------------------------------------------------------------------------------+ 161 | | ST_Intersects(ST_GeomFromText(Utf8(\"POINT(1 1)\")),ST_GeomFromText(Utf8(\"LINESTRING ( 1 1, 0 2 )\"))) | 162 | +-----------------------------------------------------------------------------------------------------+ 163 | | true | 164 | +-----------------------------------------------------------------------------------------------------+" 165 | ); 166 | } 167 | 168 | #[tokio::test] 169 | async fn intersects_table() { 170 | let ctx = SessionContext::new(); 171 | ctx.register_udf(ScalarUDF::from(GeomFromTextUdf::new())); 172 | ctx.register_udf(ScalarUDF::from(IntersectsUdf::new())); 173 | 174 | let schema = Arc::new(Schema::new(vec![Field::new( 175 | "geom", 176 | DataType::Binary, 177 | true, 178 | )])); 179 | 180 | let mut linestrint_vec = vec![]; 181 | for i in 0..3 { 182 | let i = i as f64; 183 | let linestring = line_string![ 184 | (x: i, y: i + 1.0), 185 | (x: i + 2.0, y: i + 3.0), 186 | (x: i + 4.0, y: i + 5.0), 187 | ]; 188 | linestrint_vec.push(Some(linestring)); 189 | } 190 | let builder: GeometryArrayBuilder = linestrint_vec.as_slice().into(); 191 | let record = RecordBatch::try_new(schema.clone(), vec![Arc::new(builder.build())]).unwrap(); 192 | 193 | let mem_table = 194 | MemTable::try_new(schema.clone(), vec![vec![record.clone()], vec![record]]).unwrap(); 195 | ctx.register_table("geom_table", Arc::new(mem_table)) 196 | .unwrap(); 197 | 198 | let df = ctx 199 | .sql("select ST_Intersects(geom, ST_GeomFromText('POINT(0 1)')) from geom_table") 200 | .await 201 | .unwrap(); 202 | assert_eq!( 203 | pretty_format_batches(&df.collect().await.unwrap()) 204 | .unwrap() 205 | .to_string(), 206 | "+--------------------------------------------------------------------+ 207 | | ST_Intersects(geom_table.geom,ST_GeomFromText(Utf8(\"POINT(0 1)\"))) | 208 | +--------------------------------------------------------------------+ 209 | | true | 210 | | false | 211 | | false | 212 | | true | 213 | | false | 214 | | false | 215 | +--------------------------------------------------------------------+" 216 | ); 217 | } 218 | } 219 | -------------------------------------------------------------------------------- /src/function/make_envelope.rs: -------------------------------------------------------------------------------- 1 | use crate::geo::GeometryArrayBuilder; 2 | use arrow_schema::DataType; 3 | use datafusion_common::{internal_datafusion_err, internal_err, DataFusionError, ScalarValue}; 4 | use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, TypeSignature, Volatility}; 5 | use geos::CoordSeq; 6 | use geozero::wkb::WkbDialect; 7 | use std::any::Any; 8 | use std::sync::Arc; 9 | 10 | #[derive(Debug)] 11 | pub struct MakeEnvelopeUdf { 12 | signature: Signature, 13 | aliases: Vec, 14 | } 15 | 16 | impl MakeEnvelopeUdf { 17 | pub fn new() -> Self { 18 | Self { 19 | signature: Signature::one_of( 20 | vec![ 21 | TypeSignature::Exact(vec![ 22 | DataType::Float64, 23 | DataType::Float64, 24 | DataType::Float64, 25 | DataType::Float64, 26 | ]), 27 | TypeSignature::Exact(vec![ 28 | DataType::Float64, 29 | DataType::Float64, 30 | DataType::Float64, 31 | DataType::Float64, 32 | DataType::Int64, 33 | ]), 34 | ], 35 | Volatility::Immutable, 36 | ), 37 | aliases: vec!["st_makeenvelope".to_string()], 38 | } 39 | } 40 | } 41 | 42 | impl ScalarUDFImpl for MakeEnvelopeUdf { 43 | fn as_any(&self) -> &dyn Any { 44 | self 45 | } 46 | 47 | fn name(&self) -> &str { 48 | "ST_MakeEnvelope" 49 | } 50 | 51 | fn signature(&self) -> &Signature { 52 | &self.signature 53 | } 54 | 55 | fn return_type(&self, _arg_types: &[DataType]) -> datafusion_common::Result { 56 | Ok(DataType::Binary) 57 | } 58 | 59 | fn invoke(&self, args: &[ColumnarValue]) -> datafusion_common::Result { 60 | let error = internal_err!("The arg should be float64"); 61 | let ColumnarValue::Scalar(ScalarValue::Float64(Some(xmin))) = args[0] else { 62 | return error; 63 | }; 64 | let ColumnarValue::Scalar(ScalarValue::Float64(Some(ymin))) = args[1] else { 65 | return error; 66 | }; 67 | let ColumnarValue::Scalar(ScalarValue::Float64(Some(xmax))) = args[2] else { 68 | return error; 69 | }; 70 | let ColumnarValue::Scalar(ScalarValue::Float64(Some(ymax))) = args[3] else { 71 | return error; 72 | }; 73 | let srid = if args.len() == 5 { 74 | let ColumnarValue::Scalar(ScalarValue::Int64(Some(srid))) = args[4] else { 75 | return internal_err!("The fifth arg should be int64"); 76 | }; 77 | Some(srid) 78 | } else { 79 | None 80 | }; 81 | 82 | let coords = CoordSeq::new_from_vec(&[ 83 | &[xmin, ymin], 84 | &[xmin, ymax], 85 | &[xmax, ymax], 86 | &[xmax, ymin], 87 | &[xmin, ymin], 88 | ]) 89 | .map_err(|_| internal_datafusion_err!("Failed to create coord req"))?; 90 | let exterior = geos::Geometry::create_linear_ring(coords) 91 | .map_err(|_| internal_datafusion_err!("Failed to create exterior"))?; 92 | let mut polygon = geos::Geometry::create_polygon(exterior, vec![]) 93 | .map_err(|_| internal_datafusion_err!("Failed to create polygon"))?; 94 | 95 | let mut builder = if let Some(srid) = srid { 96 | polygon.set_srid(srid as usize); 97 | GeometryArrayBuilder::::new(WkbDialect::Ewkb, 1) 98 | } else { 99 | GeometryArrayBuilder::::new(WkbDialect::Wkb, 1) 100 | }; 101 | builder.append_geos_geometry(&Some(polygon))?; 102 | 103 | let wkb_arr = builder.build(); 104 | Ok(ColumnarValue::Array(Arc::new(wkb_arr))) 105 | } 106 | 107 | fn aliases(&self) -> &[String] { 108 | &self.aliases 109 | } 110 | } 111 | 112 | impl Default for MakeEnvelopeUdf { 113 | fn default() -> Self { 114 | Self::new() 115 | } 116 | } 117 | 118 | #[cfg(test)] 119 | mod tests { 120 | use crate::function::{AsEwktUdf, MakeEnvelopeUdf}; 121 | use arrow::util::pretty::pretty_format_batches; 122 | use datafusion::logical_expr::ScalarUDF; 123 | use datafusion::prelude::SessionContext; 124 | 125 | #[tokio::test] 126 | async fn make_envelope() { 127 | let ctx = SessionContext::new(); 128 | ctx.register_udf(ScalarUDF::from(MakeEnvelopeUdf::new())); 129 | ctx.register_udf(ScalarUDF::from(AsEwktUdf::new())); 130 | let df = ctx 131 | .sql("select ST_AsEWKT(ST_MakeEnvelope(10, 10, 11, 11))") 132 | .await 133 | .unwrap(); 134 | assert_eq!( 135 | pretty_format_batches(&df.collect().await.unwrap()) 136 | .unwrap() 137 | .to_string(), 138 | "+---------------------------------------------------------------------+ 139 | | ST_AsEWKT(ST_MakeEnvelope(Int64(10),Int64(10),Int64(11),Int64(11))) | 140 | +---------------------------------------------------------------------+ 141 | | POLYGON((10 10,10 11,11 11,11 10,10 10)) | 142 | +---------------------------------------------------------------------+" 143 | ); 144 | 145 | let df = ctx 146 | .sql("select ST_AsEWKT(ST_MakeEnvelope(10, 10, 11, 11, 4236))") 147 | .await 148 | .unwrap(); 149 | assert_eq!( 150 | pretty_format_batches(&df.collect().await.unwrap()) 151 | .unwrap() 152 | .to_string(), 153 | "+---------------------------------------------------------------------------------+ 154 | | ST_AsEWKT(ST_MakeEnvelope(Int64(10),Int64(10),Int64(11),Int64(11),Int64(4236))) | 155 | +---------------------------------------------------------------------------------+ 156 | | SRID=4236;POLYGON((10 10,10 11,11 11,11 10,10 10)) | 157 | +---------------------------------------------------------------------------------+" 158 | ); 159 | } 160 | } 161 | -------------------------------------------------------------------------------- /src/function/mod.rs: -------------------------------------------------------------------------------- 1 | #[cfg(feature = "geos")] 2 | mod as_ewkt; 3 | mod as_geojson; 4 | mod as_mvt_geom; 5 | mod as_text; 6 | #[cfg(feature = "geos")] 7 | mod boundary; 8 | mod box2d; 9 | #[cfg(feature = "geos")] 10 | mod buffer; 11 | #[cfg(feature = "geos")] 12 | mod covered_by; 13 | #[cfg(feature = "geos")] 14 | mod covers; 15 | #[cfg(feature = "geos")] 16 | mod equals; 17 | mod extent; 18 | mod geom_from_text; 19 | mod geom_from_wkb; 20 | mod geometry_type; 21 | mod intersects; 22 | #[cfg(feature = "geos")] 23 | mod make_envelope; 24 | #[cfg(feature = "geos")] 25 | mod split; 26 | #[cfg(feature = "geos")] 27 | mod srid; 28 | mod translate; 29 | 30 | #[cfg(feature = "geos")] 31 | pub use as_ewkt::*; 32 | pub use as_geojson::*; 33 | pub use as_text::*; 34 | #[cfg(feature = "geos")] 35 | pub use boundary::*; 36 | #[cfg(feature = "geos")] 37 | pub use buffer::*; 38 | #[cfg(feature = "geos")] 39 | pub use covered_by::*; 40 | #[cfg(feature = "geos")] 41 | pub use covers::*; 42 | #[cfg(feature = "geos")] 43 | pub use equals::*; 44 | pub use geom_from_text::*; 45 | pub use geometry_type::*; 46 | pub use intersects::*; 47 | #[cfg(feature = "geos")] 48 | pub use make_envelope::*; 49 | #[cfg(feature = "geos")] 50 | pub use split::*; 51 | #[cfg(feature = "geos")] 52 | pub use srid::*; 53 | pub use translate::*; 54 | -------------------------------------------------------------------------------- /src/function/split.rs: -------------------------------------------------------------------------------- 1 | use crate::geo::{GeometryArray, GeometryArrayBuilder}; 2 | use crate::DFResult; 3 | use arrow_array::cast::AsArray; 4 | use arrow_array::{GenericBinaryArray, OffsetSizeTrait}; 5 | use arrow_schema::DataType; 6 | use datafusion_common::{internal_datafusion_err, internal_err, DataFusionError}; 7 | use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility}; 8 | use geos::Geom; 9 | use rayon::iter::IntoParallelIterator; 10 | use rayon::prelude::*; 11 | use std::any::Any; 12 | use std::sync::Arc; 13 | 14 | #[derive(Debug)] 15 | pub struct SplitUdf { 16 | signature: Signature, 17 | aliases: Vec, 18 | } 19 | 20 | impl SplitUdf { 21 | pub fn new() -> Self { 22 | Self { 23 | signature: Signature::uniform( 24 | 2, 25 | vec![DataType::Binary, DataType::LargeBinary], 26 | Volatility::Immutable, 27 | ), 28 | aliases: vec!["st_split".to_string()], 29 | } 30 | } 31 | } 32 | 33 | impl ScalarUDFImpl for SplitUdf { 34 | fn as_any(&self) -> &dyn Any { 35 | self 36 | } 37 | 38 | fn name(&self) -> &str { 39 | "ST_Split" 40 | } 41 | 42 | fn signature(&self) -> &Signature { 43 | &self.signature 44 | } 45 | 46 | fn return_type(&self, arg_types: &[DataType]) -> datafusion_common::Result { 47 | Ok(arg_types[0].clone()) 48 | } 49 | 50 | fn invoke(&self, args: &[ColumnarValue]) -> datafusion_common::Result { 51 | let (arr0, arr1) = match (args[0].clone(), args[1].clone()) { 52 | (ColumnarValue::Array(arr0), ColumnarValue::Array(arr1)) => (arr0, arr1), 53 | (ColumnarValue::Array(arr0), ColumnarValue::Scalar(scalar)) => { 54 | (arr0.clone(), scalar.to_array_of_size(arr0.len())?) 55 | } 56 | (ColumnarValue::Scalar(scalar), ColumnarValue::Array(arr1)) => { 57 | (scalar.to_array_of_size(arr1.len())?, arr1) 58 | } 59 | (ColumnarValue::Scalar(scalar0), ColumnarValue::Scalar(scalar1)) => { 60 | (scalar0.to_array_of_size(1)?, scalar1.to_array_of_size(1)?) 61 | } 62 | }; 63 | if arr0.len() != arr1.len() { 64 | return internal_err!("Two arrays length is not same"); 65 | } 66 | 67 | match (arr0.data_type(), arr1.data_type()) { 68 | (DataType::Binary, DataType::Binary) => { 69 | let arr0 = arr0.as_binary::(); 70 | let arr1 = arr1.as_binary::(); 71 | split::(arr0, arr1) 72 | } 73 | (DataType::LargeBinary, DataType::Binary) => { 74 | let arr0 = arr0.as_binary::(); 75 | let arr1 = arr1.as_binary::(); 76 | split::(arr0, arr1) 77 | } 78 | (DataType::Binary, DataType::LargeBinary) => { 79 | let arr0 = arr0.as_binary::(); 80 | let arr1 = arr1.as_binary::(); 81 | split::(arr0, arr1) 82 | } 83 | (DataType::LargeBinary, DataType::LargeBinary) => { 84 | let arr0 = arr0.as_binary::(); 85 | let arr1 = arr1.as_binary::(); 86 | split::(arr0, arr1) 87 | } 88 | _ => unreachable!(), 89 | } 90 | } 91 | 92 | fn aliases(&self) -> &[String] { 93 | &self.aliases 94 | } 95 | } 96 | 97 | impl Default for SplitUdf { 98 | fn default() -> Self { 99 | Self::new() 100 | } 101 | } 102 | 103 | fn split( 104 | arr0: &GenericBinaryArray, 105 | arr1: &GenericBinaryArray, 106 | ) -> DFResult { 107 | let geom_vec = (0..arr0.geom_len()) 108 | .into_par_iter() 109 | .map( 110 | |geom_index| match (arr0.geos_value(geom_index)?, arr1.geos_value(geom_index)?) { 111 | (Some(geom0), Some(geom1)) => { 112 | let boundary = geom0.boundary().map_err(|e| { 113 | internal_datafusion_err!("Failed to do boundary, error: {}", e) 114 | })?; 115 | let union = boundary.union(&geom1).map_err(|e| { 116 | internal_datafusion_err!("Failed to do union, error: {}", e) 117 | })?; 118 | let (result, ..) = union.polygonize_full().map_err(|e| { 119 | internal_datafusion_err!("Failed to do polygonize_full, error: {}", e) 120 | })?; 121 | 122 | Ok(Some(result)) 123 | } 124 | _ => Ok(None), 125 | }, 126 | ) 127 | .collect::>>>()?; 128 | let builder = GeometryArrayBuilder::::from(geom_vec.as_slice()); 129 | Ok(ColumnarValue::Array(Arc::new(builder.build()))) 130 | } 131 | 132 | #[cfg(test)] 133 | mod tests { 134 | use crate::function::{GeomFromTextUdf, SplitUdf}; 135 | // use arrow::util::pretty::pretty_format_batches; 136 | use datafusion::prelude::SessionContext; 137 | use datafusion_expr::ScalarUDF; 138 | 139 | #[tokio::test] 140 | async fn split() { 141 | let ctx = SessionContext::new(); 142 | ctx.register_udf(ScalarUDF::from(GeomFromTextUdf::new())); 143 | ctx.register_udf(ScalarUDF::from(SplitUdf::new())); 144 | let _df = ctx 145 | .sql("select ST_Split(ST_GeomFromText('LINESTRING ( 0 0, 1 1, 2 2 )'), ST_GeomFromText('POINT(1 1)'))") 146 | .await 147 | .unwrap(); 148 | // FIXME fix split implementation 149 | // assert_eq!( 150 | // pretty_format_batches(&df.collect().await.unwrap()) 151 | // .unwrap() 152 | // .to_string(), 153 | // "" 154 | // ); 155 | } 156 | } 157 | -------------------------------------------------------------------------------- /src/function/srid.rs: -------------------------------------------------------------------------------- 1 | use crate::geo::GeometryArray; 2 | use arrow_array::cast::AsArray; 3 | use arrow_array::{Array, Int32Array}; 4 | use arrow_schema::DataType; 5 | use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility}; 6 | use geozero::GeozeroGeometry; 7 | use std::any::Any; 8 | use std::sync::Arc; 9 | 10 | #[derive(Debug)] 11 | pub struct SridUdf { 12 | signature: Signature, 13 | aliases: Vec, 14 | } 15 | 16 | impl SridUdf { 17 | pub fn new() -> Self { 18 | Self { 19 | signature: Signature::uniform( 20 | 1, 21 | vec![DataType::Binary, DataType::LargeBinary], 22 | Volatility::Immutable, 23 | ), 24 | aliases: vec!["st_srid".to_string()], 25 | } 26 | } 27 | } 28 | 29 | impl ScalarUDFImpl for SridUdf { 30 | fn as_any(&self) -> &dyn Any { 31 | self 32 | } 33 | 34 | fn name(&self) -> &str { 35 | "ST_SRID" 36 | } 37 | 38 | fn signature(&self) -> &Signature { 39 | &self.signature 40 | } 41 | 42 | fn return_type(&self, _arg_types: &[DataType]) -> datafusion_common::Result { 43 | Ok(DataType::Int32) 44 | } 45 | 46 | fn invoke(&self, args: &[ColumnarValue]) -> datafusion_common::Result { 47 | let arr = args[0].clone().into_array(1)?; 48 | match arr.data_type() { 49 | DataType::Binary => { 50 | let wkb_arr = arr.as_binary::(); 51 | let mut srid_vec = vec![]; 52 | for i in 0..wkb_arr.geom_len() { 53 | srid_vec.push(wkb_arr.geos_value(i)?.and_then(|geom| geom.srid())); 54 | } 55 | Ok(ColumnarValue::Array(Arc::new(Int32Array::from(srid_vec)))) 56 | } 57 | DataType::LargeBinary => { 58 | let wkb_arr = arr.as_binary::(); 59 | let mut srid_vec = vec![]; 60 | for i in 0..wkb_arr.geom_len() { 61 | srid_vec.push(wkb_arr.geos_value(i)?.and_then(|geom| geom.srid())); 62 | } 63 | Ok(ColumnarValue::Array(Arc::new(Int32Array::from(srid_vec)))) 64 | } 65 | _ => unreachable!(), 66 | } 67 | } 68 | 69 | fn aliases(&self) -> &[String] { 70 | &self.aliases 71 | } 72 | } 73 | 74 | impl Default for SridUdf { 75 | fn default() -> Self { 76 | Self::new() 77 | } 78 | } 79 | 80 | #[cfg(test)] 81 | mod tests { 82 | use crate::function::{GeomFromTextUdf, SridUdf}; 83 | use arrow::util::pretty::pretty_format_batches; 84 | use datafusion::logical_expr::ScalarUDF; 85 | use datafusion::prelude::SessionContext; 86 | 87 | #[tokio::test] 88 | async fn srid() { 89 | let ctx = SessionContext::new(); 90 | ctx.register_udf(ScalarUDF::from(GeomFromTextUdf::new())); 91 | ctx.register_udf(ScalarUDF::from(SridUdf::new())); 92 | let df = ctx 93 | .sql("select ST_SRID(ST_GeomFromText('POINT(1 1)', 4269))") 94 | .await 95 | .unwrap(); 96 | assert_eq!( 97 | pretty_format_batches(&df.collect().await.unwrap()) 98 | .unwrap() 99 | .to_string(), 100 | "+----------------------------------------------------------+ 101 | | ST_SRID(ST_GeomFromText(Utf8(\"POINT(1 1)\"),Int64(4269))) | 102 | +----------------------------------------------------------+ 103 | | 4269 | 104 | +----------------------------------------------------------+" 105 | ); 106 | } 107 | } 108 | -------------------------------------------------------------------------------- /src/function/translate.rs: -------------------------------------------------------------------------------- 1 | use crate::geo::{GeometryArray, GeometryArrayBuilder}; 2 | use arrow_array::cast::AsArray; 3 | use arrow_schema::DataType; 4 | use datafusion_common::{internal_err, DataFusionError, ScalarValue}; 5 | use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, TypeSignature, Volatility}; 6 | use geo::Translate; 7 | use std::any::Any; 8 | use std::sync::Arc; 9 | 10 | #[derive(Debug)] 11 | pub struct TranslateUdf { 12 | signature: Signature, 13 | aliases: Vec, 14 | } 15 | 16 | impl TranslateUdf { 17 | pub fn new() -> Self { 18 | Self { 19 | signature: Signature::one_of( 20 | vec![ 21 | TypeSignature::Exact(vec![ 22 | DataType::Binary, 23 | DataType::Float64, 24 | DataType::Float64, 25 | ]), 26 | TypeSignature::Exact(vec![ 27 | DataType::LargeBinary, 28 | DataType::Float64, 29 | DataType::Float64, 30 | ]), 31 | ], 32 | Volatility::Immutable, 33 | ), 34 | aliases: vec!["st_translate".to_string()], 35 | } 36 | } 37 | } 38 | 39 | impl ScalarUDFImpl for TranslateUdf { 40 | fn as_any(&self) -> &dyn Any { 41 | self 42 | } 43 | 44 | fn name(&self) -> &str { 45 | "ST_Translate" 46 | } 47 | 48 | fn signature(&self) -> &Signature { 49 | &self.signature 50 | } 51 | 52 | fn return_type(&self, arg_types: &[DataType]) -> datafusion_common::Result { 53 | Ok(arg_types[0].clone()) 54 | } 55 | 56 | fn invoke(&self, args: &[ColumnarValue]) -> datafusion_common::Result { 57 | let ColumnarValue::Scalar(ScalarValue::Float64(Some(x_offset))) = args[1] else { 58 | return internal_err!("The second arg should be f64 scalar"); 59 | }; 60 | let ColumnarValue::Scalar(ScalarValue::Float64(Some(y_offset))) = args[2] else { 61 | return internal_err!("The third arg should be f64 scalar"); 62 | }; 63 | 64 | match args[0].data_type() { 65 | DataType::Binary => { 66 | let arr = args[0].clone().into_array(1)?; 67 | let wkb_arr = arr.as_binary::(); 68 | 69 | let mut geom_vec = vec![]; 70 | for i in 0..wkb_arr.geom_len() { 71 | geom_vec.push( 72 | wkb_arr 73 | .geo_value(i)? 74 | .map(|geom| geom.translate(x_offset, y_offset)), 75 | ); 76 | } 77 | 78 | let builder: GeometryArrayBuilder = geom_vec.as_slice().into(); 79 | Ok(ColumnarValue::Array(Arc::new(builder.build()))) 80 | } 81 | DataType::LargeBinary => { 82 | let arr = args[0].clone().into_array(0)?; 83 | let wkb_arr = arr.as_binary::(); 84 | 85 | let mut geom_vec = vec![]; 86 | for i in 0..wkb_arr.geom_len() { 87 | geom_vec.push( 88 | wkb_arr 89 | .geo_value(i)? 90 | .map(|geom| geom.translate(x_offset, y_offset)), 91 | ); 92 | } 93 | let builder: GeometryArrayBuilder = geom_vec.as_slice().into(); 94 | Ok(ColumnarValue::Array(Arc::new(builder.build()))) 95 | } 96 | _ => unreachable!(), 97 | } 98 | } 99 | 100 | fn aliases(&self) -> &[String] { 101 | &self.aliases 102 | } 103 | } 104 | 105 | impl Default for TranslateUdf { 106 | fn default() -> Self { 107 | Self::new() 108 | } 109 | } 110 | 111 | #[cfg(test)] 112 | mod tests { 113 | use crate::function::{AsTextUdf, GeomFromTextUdf, TranslateUdf}; 114 | use arrow::util::pretty::pretty_format_batches; 115 | use datafusion::logical_expr::ScalarUDF; 116 | use datafusion::prelude::SessionContext; 117 | 118 | #[tokio::test] 119 | async fn translate() { 120 | let ctx = SessionContext::new(); 121 | ctx.register_udf(ScalarUDF::from(GeomFromTextUdf::new())); 122 | ctx.register_udf(ScalarUDF::from(TranslateUdf::new())); 123 | ctx.register_udf(ScalarUDF::from(AsTextUdf::new())); 124 | let df = ctx 125 | .sql("select ST_AsText(ST_Translate(ST_GeomFromText('POINT(-71.064544 42.28787)'), 1.0, 2.0))") 126 | .await 127 | .unwrap(); 128 | assert_eq!( 129 | pretty_format_batches(&df.collect().await.unwrap()) 130 | .unwrap() 131 | .to_string(), 132 | "+----------------------------------------------------------------------------------------------------+ 133 | | ST_AsText(ST_Translate(ST_GeomFromText(Utf8(\"POINT(-71.064544 42.28787)\")),Float64(1),Float64(2))) | 134 | +----------------------------------------------------------------------------------------------------+ 135 | | POINT(-70.064544 44.28787) | 136 | +----------------------------------------------------------------------------------------------------+" 137 | ); 138 | } 139 | } 140 | -------------------------------------------------------------------------------- /src/geo/array.rs: -------------------------------------------------------------------------------- 1 | use crate::geo::dialect::decode_wkb_dialect; 2 | use crate::DFResult; 3 | use arrow_array::types::GenericBinaryType; 4 | use arrow_array::{Array, GenericByteArray, OffsetSizeTrait}; 5 | use datafusion_common::{internal_datafusion_err, DataFusionError}; 6 | use geozero::wkb::FromWkb; 7 | 8 | pub trait GeometryArray { 9 | fn geom_len(&self) -> usize; 10 | 11 | fn wkb(&self, geom_index: usize) -> Option<&[u8]>; 12 | 13 | fn geo_value(&self, geom_index: usize) -> DFResult> { 14 | if let Some(wkb) = self.wkb(geom_index) { 15 | let dialect = decode_wkb_dialect(wkb[0])?; 16 | let mut rdr = std::io::Cursor::new(&wkb[1..]); 17 | let value = geo::Geometry::from_wkb(&mut rdr, dialect) 18 | .map_err(|e| internal_datafusion_err!("Failed to parse wkb, error: {}", e))?; 19 | Ok(Some(value)) 20 | } else { 21 | Ok(None) 22 | } 23 | } 24 | 25 | #[cfg(feature = "geos")] 26 | fn geos_value(&self, geom_index: usize) -> DFResult> { 27 | if let Some(wkb) = self.wkb(geom_index) { 28 | let dialect = decode_wkb_dialect(wkb[0])?; 29 | let mut rdr = std::io::Cursor::new(&wkb[1..]); 30 | let value = geos::Geometry::from_wkb(&mut rdr, dialect) 31 | .map_err(|e| internal_datafusion_err!("Failed to parse wkb, error: {}", e))?; 32 | Ok(Some(value)) 33 | } else { 34 | Ok(None) 35 | } 36 | } 37 | } 38 | 39 | impl GeometryArray for GenericByteArray> { 40 | fn geom_len(&self) -> usize { 41 | self.len() 42 | } 43 | 44 | fn wkb(&self, geom_index: usize) -> Option<&[u8]> { 45 | if geom_index >= self.geom_len() || self.is_null(geom_index) { 46 | return None; 47 | } 48 | Some(self.value(geom_index)) 49 | } 50 | } 51 | 52 | #[cfg(test)] 53 | mod tests { 54 | use crate::geo::{GeometryArray, GeometryArrayBuilder}; 55 | use geo::{line_string, point, polygon}; 56 | 57 | #[test] 58 | fn point_array() { 59 | let p0 = point!(x: 0f64, y: 1f64); 60 | let p2 = point!(x: 2f64, y: 3f64); 61 | let builder: GeometryArrayBuilder = vec![Some(p0), None, Some(p2)].as_slice().into(); 62 | let arr = builder.build(); 63 | assert_eq!(arr.geom_len(), 3); 64 | 65 | assert_eq!(arr.geo_value(0).unwrap(), Some(geo::Geometry::Point(p0))); 66 | assert_eq!(arr.geo_value(1).unwrap(), None); 67 | assert_eq!(arr.geo_value(2).unwrap(), Some(geo::Geometry::Point(p2))); 68 | assert_eq!(arr.geo_value(3).unwrap(), None); 69 | } 70 | 71 | #[test] 72 | fn linestring_array() { 73 | let ls0 = line_string![ 74 | (x: 0., y: 1.), 75 | (x: 1., y: 2.) 76 | ]; 77 | let ls2 = line_string![ 78 | (x: 3., y: 4.), 79 | (x: 5., y: 6.) 80 | ]; 81 | let builder: GeometryArrayBuilder = vec![Some(ls0.clone()), None, Some(ls2.clone())] 82 | .as_slice() 83 | .into(); 84 | let arr = builder.build(); 85 | assert_eq!(arr.geom_len(), 3); 86 | 87 | assert_eq!( 88 | arr.geo_value(0).unwrap(), 89 | Some(geo::Geometry::LineString(ls0)) 90 | ); 91 | assert_eq!(arr.geo_value(1).unwrap(), None); 92 | assert_eq!( 93 | arr.geo_value(2).unwrap(), 94 | Some(geo::Geometry::LineString(ls2)) 95 | ); 96 | assert_eq!(arr.geo_value(3).unwrap(), None); 97 | } 98 | 99 | #[test] 100 | fn polygon_array() { 101 | let p0 = polygon![ 102 | (x: -111., y: 45.), 103 | (x: -111., y: 41.), 104 | (x: -104., y: 41.), 105 | (x: -104., y: 45.), 106 | ]; 107 | let p2 = polygon!( 108 | exterior: [ 109 | (x: -111., y: 45.), 110 | (x: -111., y: 41.), 111 | (x: -104., y: 41.), 112 | (x: -104., y: 45.), 113 | ], 114 | interiors: [ 115 | [ 116 | (x: -110., y: 44.), 117 | (x: -110., y: 42.), 118 | (x: -105., y: 42.), 119 | (x: -105., y: 44.), 120 | ], 121 | ], 122 | ); 123 | let builder: GeometryArrayBuilder = vec![Some(p0.clone()), None, Some(p2.clone())] 124 | .as_slice() 125 | .into(); 126 | let arr = builder.build(); 127 | assert_eq!(arr.geom_len(), 3); 128 | 129 | assert_eq!(arr.geo_value(0).unwrap(), Some(geo::Geometry::Polygon(p0))); 130 | assert_eq!(arr.geo_value(1).unwrap(), None); 131 | assert_eq!(arr.geo_value(2).unwrap(), Some(geo::Geometry::Polygon(p2))); 132 | assert_eq!(arr.geo_value(3).unwrap(), None); 133 | } 134 | 135 | #[test] 136 | fn multi_point_array() { 137 | let mp0 = geo::MultiPoint::new(vec![ 138 | point!( 139 | x: 0., y: 1. 140 | ), 141 | point!( 142 | x: 1., y: 2. 143 | ), 144 | ]); 145 | let mp2 = geo::MultiPoint::new(vec![ 146 | point!( 147 | x: 3., y: 4. 148 | ), 149 | point!( 150 | x: 5., y: 6. 151 | ), 152 | ]); 153 | let builder: GeometryArrayBuilder = vec![Some(mp0.clone()), None, Some(mp2.clone())] 154 | .as_slice() 155 | .into(); 156 | let arr = builder.build(); 157 | assert_eq!(arr.geom_len(), 3); 158 | 159 | assert_eq!( 160 | arr.geo_value(0).unwrap(), 161 | Some(geo::Geometry::MultiPoint(mp0)) 162 | ); 163 | assert_eq!(arr.geo_value(1).unwrap(), None); 164 | assert_eq!( 165 | arr.geo_value(2).unwrap(), 166 | Some(geo::Geometry::MultiPoint(mp2)) 167 | ); 168 | assert_eq!(arr.geo_value(3).unwrap(), None); 169 | } 170 | 171 | #[test] 172 | fn multi_line_string_array() { 173 | let ml0 = geo::MultiLineString::new(vec![line_string![ 174 | (x: -111., y: 45.), 175 | (x: -111., y: 41.), 176 | (x: -104., y: 41.), 177 | (x: -104., y: 45.), 178 | ]]); 179 | let ml2 = geo::MultiLineString::new(vec![ 180 | line_string![ 181 | (x: -111., y: 45.), 182 | (x: -111., y: 41.), 183 | (x: -104., y: 41.), 184 | (x: -104., y: 45.), 185 | ], 186 | line_string![ 187 | (x: -110., y: 44.), 188 | (x: -110., y: 42.), 189 | (x: -105., y: 42.), 190 | (x: -105., y: 44.), 191 | ], 192 | ]); 193 | 194 | let builder: GeometryArrayBuilder = vec![Some(ml0.clone()), None, Some(ml2.clone())] 195 | .as_slice() 196 | .into(); 197 | let arr = builder.build(); 198 | assert_eq!(arr.geom_len(), 3); 199 | 200 | assert_eq!( 201 | arr.geo_value(0).unwrap(), 202 | Some(geo::Geometry::MultiLineString(ml0)) 203 | ); 204 | assert_eq!(arr.geo_value(1).unwrap(), None); 205 | assert_eq!( 206 | arr.geo_value(2).unwrap(), 207 | Some(geo::Geometry::MultiLineString(ml2)) 208 | ); 209 | assert_eq!(arr.geo_value(3).unwrap(), None); 210 | } 211 | 212 | #[test] 213 | fn multi_polygon_array() { 214 | let mp0 = geo::MultiPolygon::new(vec![ 215 | polygon![ 216 | (x: -111., y: 45.), 217 | (x: -111., y: 41.), 218 | (x: -104., y: 41.), 219 | (x: -104., y: 45.), 220 | ], 221 | polygon!( 222 | exterior: [ 223 | (x: -111., y: 45.), 224 | (x: -111., y: 41.), 225 | (x: -104., y: 41.), 226 | (x: -104., y: 45.), 227 | ], 228 | interiors: [ 229 | [ 230 | (x: -110., y: 44.), 231 | (x: -110., y: 42.), 232 | (x: -105., y: 42.), 233 | (x: -105., y: 44.), 234 | ], 235 | ], 236 | ), 237 | ]); 238 | let mp2 = geo::MultiPolygon::new(vec![ 239 | polygon![ 240 | (x: -111., y: 45.), 241 | (x: -111., y: 41.), 242 | (x: -104., y: 41.), 243 | (x: -104., y: 45.), 244 | ], 245 | polygon![ 246 | (x: -110., y: 44.), 247 | (x: -110., y: 42.), 248 | (x: -105., y: 42.), 249 | (x: -105., y: 44.), 250 | ], 251 | ]); 252 | 253 | let builder: GeometryArrayBuilder = vec![Some(mp0.clone()), None, Some(mp2.clone())] 254 | .as_slice() 255 | .into(); 256 | let arr = builder.build(); 257 | assert_eq!(arr.geom_len(), 3); 258 | 259 | assert_eq!( 260 | arr.geo_value(0).unwrap(), 261 | Some(geo::Geometry::MultiPolygon(mp0)) 262 | ); 263 | assert_eq!(arr.geo_value(1).unwrap(), None); 264 | assert_eq!( 265 | arr.geo_value(2).unwrap(), 266 | Some(geo::Geometry::MultiPolygon(mp2)) 267 | ); 268 | assert_eq!(arr.geo_value(3).unwrap(), None); 269 | } 270 | } 271 | -------------------------------------------------------------------------------- /src/geo/box.rs: -------------------------------------------------------------------------------- 1 | use crate::DFResult; 2 | use arrow_array::cast::AsArray; 3 | use arrow_array::types::Float64Type; 4 | use arrow_array::{Array, Float64Array, StructArray}; 5 | use arrow_buffer::NullBuffer; 6 | use arrow_schema::{DataType, Field}; 7 | use datafusion_common::{internal_err, DataFusionError, ScalarValue}; 8 | use std::sync::Arc; 9 | 10 | #[derive(Debug, Clone)] 11 | pub struct Box2d { 12 | pub(crate) xmin: f64, 13 | pub(crate) ymin: f64, 14 | pub(crate) xmax: f64, 15 | pub(crate) ymax: f64, 16 | } 17 | 18 | impl Box2d { 19 | pub fn new() -> Self { 20 | Self { 21 | xmin: f64::MAX, 22 | ymin: f64::MAX, 23 | xmax: f64::MIN, 24 | ymax: f64::MIN, 25 | } 26 | } 27 | pub fn fields() -> Vec { 28 | vec![ 29 | Field::new("xmin", DataType::Float64, false), 30 | Field::new("ymin", DataType::Float64, false), 31 | Field::new("xmax", DataType::Float64, false), 32 | Field::new("ymax", DataType::Float64, false), 33 | ] 34 | } 35 | pub fn data_type() -> DataType { 36 | DataType::Struct(Self::fields().into()) 37 | } 38 | 39 | pub fn value(arr: &StructArray, index: usize) -> DFResult> { 40 | if arr.data_type() != &Box2d::data_type() { 41 | return internal_err!("StructArray data type is not matched"); 42 | } 43 | if index >= arr.len() || arr.is_null(index) { 44 | return Ok(None); 45 | } 46 | let scalar = ScalarValue::Struct(Arc::new(arr.slice(index, 1))); 47 | let box2d: Box2d = (&scalar).try_into()?; 48 | Ok(Some(box2d)) 49 | } 50 | } 51 | 52 | impl Default for Box2d { 53 | fn default() -> Self { 54 | Self::new() 55 | } 56 | } 57 | 58 | impl TryFrom<&ScalarValue> for Box2d { 59 | type Error = DataFusionError; 60 | 61 | fn try_from(value: &ScalarValue) -> Result { 62 | if let ScalarValue::Struct(arr) = value { 63 | if arr.data_type() != &Box2d::data_type() { 64 | return internal_err!("ScalarValue data type is not matched"); 65 | } 66 | let xmin = arr.column(0).as_primitive::().value(0); 67 | let ymin = arr.column(1).as_primitive::().value(0); 68 | let xmax = arr.column(2).as_primitive::().value(0); 69 | let ymax = arr.column(3).as_primitive::().value(0); 70 | Ok(Box2d { 71 | xmin, 72 | ymin, 73 | xmax, 74 | ymax, 75 | }) 76 | } else { 77 | internal_err!("ScalarValue is not struct") 78 | } 79 | } 80 | } 81 | 82 | impl From for ScalarValue { 83 | fn from(value: Box2d) -> Self { 84 | let arr = build_box2d_array(vec![Some(value)]); 85 | ScalarValue::Struct(Arc::new(arr)) 86 | } 87 | } 88 | 89 | impl From for Box2d { 90 | fn from(value: geo::Rect) -> Self { 91 | Self { 92 | xmin: value.min().x, 93 | ymin: value.min().y, 94 | xmax: value.max().x, 95 | ymax: value.max().y, 96 | } 97 | } 98 | } 99 | 100 | #[cfg(feature = "geos")] 101 | impl TryFrom> for Box2d { 102 | type Error = DataFusionError; 103 | 104 | fn try_from(value: geos::Geometry) -> Result { 105 | use datafusion_common::internal_datafusion_err; 106 | use geos::Geom; 107 | let xmin = value 108 | .get_x_min() 109 | .map_err(|_| internal_datafusion_err!("geom get_x_min failed"))?; 110 | let ymin = value 111 | .get_y_min() 112 | .map_err(|_| internal_datafusion_err!("geom get_y_min failed"))?; 113 | let xmax = value 114 | .get_x_max() 115 | .map_err(|_| internal_datafusion_err!("geom get_x_max failed"))?; 116 | let ymax = value 117 | .get_y_max() 118 | .map_err(|_| internal_datafusion_err!("geom get_y_max failed"))?; 119 | Ok(Box2d { 120 | xmin, 121 | ymin, 122 | xmax, 123 | ymax, 124 | }) 125 | } 126 | } 127 | 128 | pub fn build_box2d_array(data: Vec>) -> StructArray { 129 | let xmin_arr = Arc::new(Float64Array::from( 130 | data.iter() 131 | .map(|b| b.clone().map(|b| b.xmin)) 132 | .collect::>(), 133 | )); 134 | let ymin_arr = Arc::new(Float64Array::from( 135 | data.iter() 136 | .map(|b| b.clone().map(|b| b.ymin)) 137 | .collect::>(), 138 | )); 139 | let xmax_arr = Arc::new(Float64Array::from( 140 | data.iter() 141 | .map(|b| b.clone().map(|b| b.xmax)) 142 | .collect::>(), 143 | )); 144 | let ymax_arr = Arc::new(Float64Array::from( 145 | data.iter() 146 | .map(|b| b.clone().map(|b| b.ymax)) 147 | .collect::>(), 148 | )); 149 | let nulls: NullBuffer = data.iter().map(|b| b.is_some()).collect::>().into(); 150 | StructArray::try_new( 151 | Box2d::fields().into(), 152 | vec![xmin_arr, ymin_arr, xmax_arr, ymax_arr], 153 | Some(nulls), 154 | ) 155 | .expect("data is valid") 156 | } 157 | 158 | #[cfg(test)] 159 | mod tests { 160 | use crate::geo::r#box::{build_box2d_array, Box2d}; 161 | use arrow_array::{Array, StructArray}; 162 | 163 | #[test] 164 | fn box2d_array() { 165 | let box2d0 = Box2d { 166 | xmin: 1.0, 167 | ymin: 2.0, 168 | xmax: 3.0, 169 | ymax: 4.0, 170 | }; 171 | let box2d2 = Box2d { 172 | xmin: 5.0, 173 | ymin: 6.0, 174 | xmax: 7.0, 175 | ymax: 8.0, 176 | }; 177 | let arr: StructArray = 178 | build_box2d_array(vec![Some(box2d0.clone()), None, Some(box2d2.clone())]); 179 | assert_eq!(arr.len(), 3); 180 | 181 | assert_eq!( 182 | format!("{:?}", Box2d::value(&arr, 0).unwrap()), 183 | "Some(Box2d { xmin: 1.0, ymin: 2.0, xmax: 3.0, ymax: 4.0 })" 184 | ); 185 | assert_eq!(format!("{:?}", Box2d::value(&arr, 1).unwrap()), "None"); 186 | assert_eq!( 187 | format!("{:?}", Box2d::value(&arr, 2).unwrap()), 188 | "Some(Box2d { xmin: 5.0, ymin: 6.0, xmax: 7.0, ymax: 8.0 })" 189 | ); 190 | assert_eq!(format!("{:?}", Box2d::value(&arr, 3).unwrap()), "None"); 191 | } 192 | } 193 | -------------------------------------------------------------------------------- /src/geo/builder.rs: -------------------------------------------------------------------------------- 1 | use crate::geo::dialect::wkb_type_id; 2 | use crate::DFResult; 3 | use arrow_array::builder::UInt8BufferBuilder; 4 | use arrow_array::types::GenericBinaryType; 5 | use arrow_array::{GenericByteArray, OffsetSizeTrait}; 6 | use arrow_buffer::{BufferBuilder, NullBufferBuilder, OffsetBuffer}; 7 | use datafusion_common::{internal_datafusion_err, DataFusionError}; 8 | use geozero::wkb::{FromWkb, WkbDialect}; 9 | use geozero::{GeozeroGeometry, ToWkb}; 10 | 11 | pub struct GeometryArrayBuilder { 12 | dialect: WkbDialect, 13 | value_builder: UInt8BufferBuilder, 14 | offsets_builder: BufferBuilder, 15 | null_buffer_builder: NullBufferBuilder, 16 | } 17 | 18 | impl GeometryArrayBuilder { 19 | pub fn new(dialect: WkbDialect, capacity: usize) -> Self { 20 | let mut offsets_builder = BufferBuilder::::new(capacity + 1); 21 | offsets_builder.append(O::from_usize(0).unwrap()); 22 | 23 | Self { 24 | dialect, 25 | value_builder: UInt8BufferBuilder::new(capacity), 26 | offsets_builder, 27 | null_buffer_builder: NullBufferBuilder::new(capacity), 28 | } 29 | } 30 | 31 | #[inline] 32 | pub fn append_wkb(&mut self, wkb: Option<&[u8]>) -> DFResult<()> { 33 | if let Some(wkb) = wkb { 34 | check_wkb(wkb, self.dialect)?; 35 | self.internal_append_wkb(wkb); 36 | } else { 37 | self.append_null(); 38 | } 39 | Ok(()) 40 | } 41 | 42 | #[inline] 43 | pub fn append_geo_geometry(&mut self, geom: &Option) -> DFResult<()> { 44 | if let Some(geom) = geom { 45 | let wkb = geom 46 | .to_wkb_dialect(self.dialect, geom.dims(), geom.srid(), vec![]) 47 | .map_err(|e| internal_datafusion_err!("Failed to convert to wkb, error: {}", e))?; 48 | self.internal_append_wkb(&wkb); 49 | } else { 50 | self.append_null(); 51 | } 52 | Ok(()) 53 | } 54 | 55 | #[cfg(feature = "geos")] 56 | #[inline] 57 | pub fn append_geos_geometry(&mut self, geom: &Option) -> DFResult<()> { 58 | if let Some(geom) = geom { 59 | let wkb = geom 60 | .to_wkb_dialect(self.dialect, geom.dims(), geom.srid(), vec![]) 61 | .map_err(|e| internal_datafusion_err!("Failed to convert to wkb, error: {}", e))?; 62 | self.internal_append_wkb(&wkb); 63 | } else { 64 | self.append_null(); 65 | } 66 | Ok(()) 67 | } 68 | 69 | #[inline] 70 | pub fn append_null(&mut self) { 71 | self.null_buffer_builder.append_null(); 72 | self.offsets_builder.append(self.next_offset()); 73 | } 74 | 75 | fn internal_append_wkb(&mut self, wkb: &[u8]) { 76 | let mut bytes = vec![wkb_type_id(self.dialect)]; 77 | bytes.extend_from_slice(wkb); 78 | self.value_builder.append_slice(&bytes); 79 | self.null_buffer_builder.append(true); 80 | self.offsets_builder.append(self.next_offset()); 81 | } 82 | 83 | #[inline] 84 | fn next_offset(&self) -> O { 85 | O::from_usize(self.value_builder.len()).expect("array offset overflow") 86 | } 87 | 88 | pub fn build(mut self) -> GenericByteArray> { 89 | GenericByteArray::new( 90 | OffsetBuffer::::new(self.offsets_builder.finish().into()), 91 | self.value_builder.finish(), 92 | self.null_buffer_builder.finish(), 93 | ) 94 | } 95 | } 96 | 97 | fn check_wkb(wkb: &[u8], dialect: WkbDialect) -> DFResult<()> { 98 | let mut rdr = std::io::Cursor::new(wkb); 99 | #[cfg(feature = "geos")] 100 | { 101 | let _ = geos::Geometry::from_wkb(&mut rdr, dialect) 102 | .map_err(|e| internal_datafusion_err!("Failed to parse wkb, error: {}", e))?; 103 | } 104 | #[cfg(not(feature = "geos"))] 105 | { 106 | let _ = geo::Geometry::from_wkb(&mut rdr, dialect) 107 | .map_err(|e| internal_datafusion_err!("Failed to parse wkb, error: {}", e))?; 108 | } 109 | Ok(()) 110 | } 111 | 112 | impl From<&[Option]> for GeometryArrayBuilder { 113 | fn from(value: &[Option]) -> Self { 114 | let mut builder = GeometryArrayBuilder::::new(WkbDialect::Ewkb, value.len()); 115 | for geom in value { 116 | builder 117 | .append_geo_geometry(geom) 118 | .expect("geometry data is valid"); 119 | } 120 | builder 121 | } 122 | } 123 | 124 | impl From<&[Option]> for GeometryArrayBuilder { 125 | fn from(value: &[Option]) -> Self { 126 | let geo_vec = value 127 | .iter() 128 | .map(|p| p.map(geo::Geometry::Point)) 129 | .collect::>(); 130 | geo_vec.as_slice().into() 131 | } 132 | } 133 | 134 | impl From<&[Option]> for GeometryArrayBuilder { 135 | fn from(value: &[Option]) -> Self { 136 | let geo_vec = value 137 | .iter() 138 | .map(|ls| ls.clone().map(geo::Geometry::LineString)) 139 | .collect::>(); 140 | geo_vec.as_slice().into() 141 | } 142 | } 143 | 144 | impl From<&[Option]> for GeometryArrayBuilder { 145 | fn from(value: &[Option]) -> Self { 146 | let geo_vec = value 147 | .iter() 148 | .map(|p| p.clone().map(geo::Geometry::Polygon)) 149 | .collect::>(); 150 | geo_vec.as_slice().into() 151 | } 152 | } 153 | 154 | impl From<&[Option]> for GeometryArrayBuilder { 155 | fn from(value: &[Option]) -> Self { 156 | let geo_vec = value 157 | .iter() 158 | .map(|mp| mp.clone().map(geo::Geometry::MultiPoint)) 159 | .collect::>(); 160 | geo_vec.as_slice().into() 161 | } 162 | } 163 | 164 | impl From<&[Option]> for GeometryArrayBuilder { 165 | fn from(value: &[Option]) -> Self { 166 | let geo_vec = value 167 | .iter() 168 | .map(|ml| ml.clone().map(geo::Geometry::MultiLineString)) 169 | .collect::>(); 170 | geo_vec.as_slice().into() 171 | } 172 | } 173 | 174 | impl From<&[Option]> for GeometryArrayBuilder { 175 | fn from(value: &[Option]) -> Self { 176 | let geo_vec = value 177 | .iter() 178 | .map(|mp| mp.clone().map(geo::Geometry::MultiPolygon)) 179 | .collect::>(); 180 | geo_vec.as_slice().into() 181 | } 182 | } 183 | 184 | impl From<&[Option]> for GeometryArrayBuilder { 185 | fn from(value: &[Option]) -> Self { 186 | let geo_vec = value 187 | .iter() 188 | .map(|gc| gc.clone().map(geo::Geometry::GeometryCollection)) 189 | .collect::>(); 190 | geo_vec.as_slice().into() 191 | } 192 | } 193 | 194 | #[cfg(feature = "geos")] 195 | impl From<&[Option>]> for GeometryArrayBuilder { 196 | fn from(value: &[Option]) -> Self { 197 | let mut builder = GeometryArrayBuilder::::new(WkbDialect::Ewkb, value.len()); 198 | for geom in value { 199 | builder 200 | .append_geos_geometry(geom) 201 | .expect("geometry data is valid"); 202 | } 203 | builder 204 | } 205 | } 206 | -------------------------------------------------------------------------------- /src/geo/dialect.rs: -------------------------------------------------------------------------------- 1 | use crate::DFResult; 2 | use datafusion_common::{internal_err, DataFusionError}; 3 | use geozero::wkb::WkbDialect; 4 | 5 | pub(crate) fn wkb_type_id(dialect: WkbDialect) -> u8 { 6 | match dialect { 7 | WkbDialect::Wkb => 1, 8 | WkbDialect::Ewkb => 2, 9 | WkbDialect::Geopackage => 3, 10 | WkbDialect::MySQL => 4, 11 | WkbDialect::SpatiaLite => 5, 12 | } 13 | } 14 | 15 | pub(crate) fn decode_wkb_dialect(type_id: u8) -> DFResult { 16 | if type_id == wkb_type_id(WkbDialect::Wkb) { 17 | Ok(WkbDialect::Wkb) 18 | } else if type_id == wkb_type_id(WkbDialect::Ewkb) { 19 | Ok(WkbDialect::Ewkb) 20 | } else if type_id == wkb_type_id(WkbDialect::Geopackage) { 21 | Ok(WkbDialect::Geopackage) 22 | } else if type_id == wkb_type_id(WkbDialect::MySQL) { 23 | Ok(WkbDialect::MySQL) 24 | } else if type_id == wkb_type_id(WkbDialect::SpatiaLite) { 25 | Ok(WkbDialect::SpatiaLite) 26 | } else { 27 | internal_err!("Cannot decode WkbDialect from {}", type_id) 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /src/geo/index.rs: -------------------------------------------------------------------------------- 1 | use crate::geo::{Box2d, GeometryArray}; 2 | use crate::DFResult; 3 | use arrow_array::{GenericBinaryArray, OffsetSizeTrait}; 4 | use geo::BoundingRect; 5 | use rstar::{RTree, RTreeObject, AABB}; 6 | 7 | #[derive(Clone, Debug)] 8 | pub struct GeoGeometry(geo::Geometry); 9 | 10 | impl RTreeObject for GeoGeometry { 11 | type Envelope = AABB<[f64; 2]>; 12 | 13 | fn envelope(&self) -> Self::Envelope { 14 | let box2d: Box2d = if let Some(rect) = self.0.bounding_rect() { 15 | rect.into() 16 | } else { 17 | Box2d::new() 18 | }; 19 | AABB::from_corners([box2d.xmin, box2d.ymin], [box2d.xmax, box2d.ymax]) 20 | } 21 | } 22 | 23 | pub fn build_rtree_index( 24 | wkb_arr: GenericBinaryArray, 25 | ) -> DFResult> { 26 | let mut geom_vec = vec![]; 27 | for i in 0..wkb_arr.geom_len() { 28 | if let Some(geom) = wkb_arr.geo_value(i)? { 29 | geom_vec.push(GeoGeometry(geom)); 30 | } 31 | } 32 | Ok(RTree::bulk_load(geom_vec)) 33 | } 34 | 35 | #[cfg(test)] 36 | mod tests { 37 | use crate::geo::index::build_rtree_index; 38 | use crate::geo::GeometryArrayBuilder; 39 | use geo::line_string; 40 | use rstar::AABB; 41 | 42 | #[test] 43 | fn rtree_index() { 44 | let ls0 = line_string![ 45 | (x: 0., y: 0.), 46 | (x: 1., y: 1.) 47 | ]; 48 | let ls2 = line_string![ 49 | (x: 0., y: 0.), 50 | (x: -1., y: -1.) 51 | ]; 52 | let builder: GeometryArrayBuilder = vec![Some(ls0), None, Some(ls2)].as_slice().into(); 53 | let wkb_arr = builder.build(); 54 | 55 | let index = build_rtree_index(wkb_arr).unwrap(); 56 | 57 | let elements = index.locate_in_envelope(&AABB::from_corners([0., 0.], [0.5, 0.5])); 58 | assert_eq!(elements.count(), 0); 59 | let elements = index.locate_in_envelope(&AABB::from_corners([0., 0.], [1., 1.])); 60 | assert_eq!(elements.count(), 1); 61 | let elements = index.locate_in_envelope(&AABB::from_corners([-1., -1.], [1., 1.])); 62 | assert_eq!(elements.count(), 2); 63 | let elements = index.locate_in_envelope(&AABB::from_corners([-2., -2.], [2., 2.])); 64 | assert_eq!(elements.count(), 2); 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /src/geo/mod.rs: -------------------------------------------------------------------------------- 1 | mod array; 2 | mod r#box; 3 | mod builder; 4 | pub(crate) mod dialect; 5 | mod index; 6 | 7 | pub use array::*; 8 | pub use builder::*; 9 | pub use index::*; 10 | pub use r#box::*; 11 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | pub mod function; 2 | pub mod geo; 3 | 4 | pub type DFResult = datafusion_common::Result; 5 | --------------------------------------------------------------------------------