├── .gitignore ├── Cargo.toml ├── src ├── types │ ├── any.rs │ ├── generic.rs │ ├── null.rs │ ├── empty_array.rs │ ├── boolean.rs │ ├── string.rs │ ├── array.rs │ ├── nullable.rs │ └── number.rs ├── property.rs ├── expr.rs ├── util.rs ├── types.rs ├── display.rs ├── runtime.rs ├── type_check.rs ├── main.rs ├── function.rs └── values.rs ├── LICENSE ├── README.md ├── tests └── run-output └── Cargo.lock /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | .vscode 3 | 4 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "typed-type-exercise" 3 | version = "0.1.0" 4 | edition = "2021" 5 | 6 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 7 | 8 | [dependencies] 9 | educe = "0.4" 10 | enum-as-inner = "0.4" 11 | goldenfile = "1.4.3" 12 | arrow2 = "0.12" 13 | -------------------------------------------------------------------------------- /src/types/any.rs: -------------------------------------------------------------------------------- 1 | use crate::values::{Column, Scalar}; 2 | 3 | use super::ValueType; 4 | 5 | pub struct AnyType; 6 | 7 | impl ValueType for AnyType { 8 | type Scalar = Scalar; 9 | type ScalarRef<'a> = &'a Scalar; 10 | type Column = Column; 11 | 12 | fn to_owned_scalar<'a>(scalar: Self::ScalarRef<'a>) -> Self::Scalar { 13 | scalar.clone() 14 | } 15 | 16 | fn to_scalar_ref<'a>(scalar: &'a Self::Scalar) -> Self::ScalarRef<'a> { 17 | scalar 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /src/property.rs: -------------------------------------------------------------------------------- 1 | #[derive(Debug, Default, Clone, Copy)] 2 | pub struct ValueProperty { 3 | pub not_null: bool, 4 | } 5 | 6 | #[derive(Debug, Default, Clone, Copy)] 7 | pub struct FunctionProperty { 8 | pub preserve_not_null: bool, 9 | pub commutative: bool, 10 | // pub injectivity: bool, 11 | } 12 | 13 | impl ValueProperty { 14 | pub fn not_null(mut self, not_null: bool) -> Self { 15 | self.not_null = not_null; 16 | self 17 | } 18 | } 19 | 20 | impl FunctionProperty { 21 | pub fn preserve_not_null(mut self, preserve_not_null: bool) -> Self { 22 | self.preserve_not_null = preserve_not_null; 23 | self 24 | } 25 | 26 | pub fn commutative(mut self, commutative: bool) -> Self { 27 | self.commutative = commutative; 28 | self 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /src/expr.rs: -------------------------------------------------------------------------------- 1 | use std::sync::Arc; 2 | 3 | use crate::{ 4 | function::{Function, FunctionID}, 5 | property::ValueProperty, 6 | types::DataType, 7 | }; 8 | 9 | #[derive(Debug, Clone)] 10 | pub enum AST { 11 | Literal(Literal), 12 | ColumnRef { 13 | name: String, 14 | data_type: DataType, 15 | property: ValueProperty, 16 | }, 17 | FunctionCall { 18 | name: String, 19 | params: Vec, 20 | args: Vec, 21 | }, 22 | } 23 | 24 | #[derive(Debug, Clone)] 25 | pub enum Expr { 26 | Literal(Literal), 27 | ColumnRef { 28 | name: String, 29 | }, 30 | Cast { 31 | expr: Box, 32 | dest_type: DataType, 33 | }, 34 | FunctionCall { 35 | id: FunctionID, 36 | function: Arc, 37 | generics: Vec, 38 | args: Vec<(Expr, ValueProperty)>, 39 | }, 40 | } 41 | 42 | #[derive(Debug, Clone)] 43 | pub enum Literal { 44 | Null, 45 | Int8(i8), 46 | Int16(i16), 47 | UInt8(u8), 48 | UInt16(u16), 49 | Boolean(bool), 50 | String(Vec), 51 | } 52 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Ashish's Web / kcak11.com / ashishkumarkc.com 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /src/util.rs: -------------------------------------------------------------------------------- 1 | use arrow2::{ 2 | bitmap::{Bitmap, MutableBitmap}, 3 | buffer::Buffer, 4 | types::NativeType, 5 | }; 6 | 7 | pub fn bitmap_into_mut(bitmap: Bitmap) -> MutableBitmap { 8 | bitmap 9 | .into_mut() 10 | .map_left(|bitmap| { 11 | let mut builder = MutableBitmap::new(); 12 | builder.extend_from_bitmap(&bitmap); 13 | builder 14 | }) 15 | .into_inner() 16 | } 17 | 18 | pub fn repeat_bitmap(bitmap: &mut Bitmap, n: usize) -> MutableBitmap { 19 | let mut builder = MutableBitmap::new(); 20 | for _ in 0..n { 21 | builder.extend_from_bitmap(bitmap); 22 | } 23 | builder 24 | } 25 | 26 | pub fn append_bitmap(bitmap: &mut MutableBitmap, other: &MutableBitmap) { 27 | bitmap.extend_from_slice(other.as_slice(), 0, other.len()); 28 | } 29 | 30 | pub fn constant_bitmap(value: bool, len: usize) -> MutableBitmap { 31 | let mut builder = MutableBitmap::new(); 32 | builder.extend_constant(len, value); 33 | builder 34 | } 35 | 36 | pub fn buffer_into_mut(buffer: Buffer) -> Vec { 37 | buffer 38 | .into_mut() 39 | .map_left(|buffer| buffer.to_vec()) 40 | .into_inner() 41 | } 42 | -------------------------------------------------------------------------------- /src/types/generic.rs: -------------------------------------------------------------------------------- 1 | use std::ops::Range; 2 | 3 | use crate::values::{Column, ColumnBuilder, ColumnIterator, Scalar, ScalarRef}; 4 | 5 | use super::{ArgType, DataType, GenericMap, ValueType}; 6 | 7 | pub struct GenericType; 8 | 9 | impl ValueType for GenericType { 10 | type Scalar = Scalar; 11 | type ScalarRef<'a> = ScalarRef<'a>; 12 | type Column = Column; 13 | 14 | fn to_owned_scalar<'a>(scalar: Self::ScalarRef<'a>) -> Self::Scalar { 15 | scalar.to_owned() 16 | } 17 | 18 | fn to_scalar_ref<'a>(scalar: &'a Self::Scalar) -> Self::ScalarRef<'a> { 19 | scalar.as_ref() 20 | } 21 | } 22 | 23 | impl ArgType for GenericType { 24 | type ColumnIterator<'a> = ColumnIterator<'a>; 25 | type ColumnBuilder = ColumnBuilder; 26 | 27 | fn data_type() -> DataType { 28 | DataType::Generic(INDEX) 29 | } 30 | 31 | fn try_downcast_scalar<'a>(scalar: &'a Scalar) -> Option> { 32 | Some(scalar.as_ref()) 33 | } 34 | 35 | fn try_downcast_column<'a>(col: &'a Column) -> Option { 36 | Some(col.clone()) 37 | } 38 | 39 | fn upcast_scalar(scalar: Self::Scalar) -> Scalar { 40 | scalar 41 | } 42 | 43 | fn upcast_column(col: Self::Column) -> Column { 44 | col 45 | } 46 | 47 | fn column_len<'a>(col: &'a Self::Column) -> usize { 48 | col.len() 49 | } 50 | 51 | fn index_column<'a>(col: &'a Self::Column, index: usize) -> Self::ScalarRef<'a> { 52 | col.index(index) 53 | } 54 | 55 | fn slice_column<'a>(col: &'a Self::Column, range: Range) -> Self::Column { 56 | col.slice(range) 57 | } 58 | 59 | fn iter_column<'a>(col: &'a Self::Column) -> Self::ColumnIterator<'a> { 60 | col.iter() 61 | } 62 | 63 | fn create_builder(capacity: usize, generics: &GenericMap) -> Self::ColumnBuilder { 64 | ColumnBuilder::with_capacity(&generics[INDEX], capacity) 65 | } 66 | 67 | fn column_to_builder(col: Self::Column) -> Self::ColumnBuilder { 68 | ColumnBuilder::from_column(col) 69 | } 70 | 71 | fn builder_len(builder: &Self::ColumnBuilder) -> usize { 72 | builder.len() 73 | } 74 | 75 | fn push_item(builder: &mut Self::ColumnBuilder, item: Self::ScalarRef<'_>) { 76 | builder.push(item); 77 | } 78 | 79 | fn push_default(builder: &mut Self::ColumnBuilder) { 80 | builder.push_default(); 81 | } 82 | 83 | fn append_builder(builder: &mut Self::ColumnBuilder, other: &Self::ColumnBuilder) { 84 | builder.append(other); 85 | } 86 | 87 | fn build_column(builder: Self::ColumnBuilder) -> Self::Column { 88 | builder.build() 89 | } 90 | 91 | fn build_scalar(builder: Self::ColumnBuilder) -> Self::Scalar { 92 | builder.build_scalar() 93 | } 94 | } 95 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Typed Type Exercise in Rust 2 | 3 | Build database expression type checker and vectorized runtime executor in type-safe Rust. 4 | 5 | > This project is highly inspired by [@skyzh](https://github.com/skyzh)'s [type-exercise-in-rust](https://github.com/skyzh/type-exercise-in-rust). While adopting his idea in [Databend](https://github.com/datafuselabs/databend), I also implemented a few features that I think are useful: 6 | 7 | 1. **Type checking**. The type checker can catch all type errors in the SQL compilation phase with a set of carefully defined typing rules. The type checker outputs a totally untyped expression that is ready for runtime execution. So this makes the runtime free of any type information. 8 | 9 | 2. **Type-safe downcast**. Function authors no longer have to worry about downcasting runtime inputs. Thanks to Rust's type system, so long as your function compiles, the downcast is always successful. 10 | 11 | 3. **Enum-dispatched columns**. Use enum to exhaustive all column types and scalar types. They should further minimize runtime overhead and mental effort, compared to `dyn`-dispatched strategy. 12 | 13 | 4. **Generic types**. Use generic in the function signature to reduce the number of hand-written overloads. For example, you can express `get(arr: Array, idx: i64) -> T0` in the type system. 14 | 15 | ## Snippet of code 16 | 17 | Define a fast, type-safe, auto-downcating and vectorized binary function in several lines of code: 18 | 19 | ```rust 20 | registry.register_2_arg::( 21 | "and", 22 | FunctionProperty::default(), 23 | |lhs, rhs| lhs && rhs, 24 | ); 25 | ``` 26 | 27 | Define a generic function `get` which returns an item of an array by the index: 28 | 29 | ```rust 30 | registry.register_with_writer_2_arg::>, Int16Type, GenericType<0>, _>( 31 | "get", 32 | FunctionProperty::default(), 33 | |array, idx, output| output.push(array.index(idx as usize)), 34 | ); 35 | ``` 36 | 37 | ## Run 38 | 39 | ``` 40 | cargo run 41 | ``` 42 | 43 | ## Things to do 44 | 45 | - [x] Automatcially generate the nullable function. 46 | - [x] Automatcially generate the Null function. 47 | - [ ] Automatcially dispatch arithmetic types. 48 | - [x] Implement arrays. 49 | - [x] Implement column builder. 50 | - [x] Implement unlimited-length tuples. 51 | - [x] Implment generic functions. 52 | - [x] Implment functions properties. 53 | - [x] Implment variadic functions. 54 | - [ ] Implment sparse columns (some of the rows in a column are hidden). 55 | - [ ] Check ambiguity between function overloads. 56 | - [ ] Read material for the project. 57 | 58 | ## Reading material 59 | 60 | - [Databend/RFC: Formal Type System](https://github.com/datafuselabs/databend/discussions/5438) 61 | - [type-exercise-in-rust](https://github.com/skyzh/type-exercise-in-rust) 62 | - [数据库表达式执行的黑魔法:用 Rust 做类型体操](https://zhuanlan.zhihu.com/p/460702914) 63 | - [Book: Types and Progaming Language](https://www.amazon.com/Types-Programming-Languages-MIT-Press/dp/0262162091) 64 | - [Paper: Type inference with simple subtypes](https://www.cambridge.org/core/services/aop-cambridge-core/content/view/S0956796800000113) 65 | -------------------------------------------------------------------------------- /src/types/null.rs: -------------------------------------------------------------------------------- 1 | use std::ops::Range; 2 | 3 | use crate::values::{Column, Scalar}; 4 | 5 | use super::{ArgType, DataType, GenericMap, ValueType}; 6 | 7 | pub struct NullType; 8 | 9 | impl ValueType for NullType { 10 | type Scalar = (); 11 | type ScalarRef<'a> = (); 12 | type Column = usize; 13 | 14 | fn to_owned_scalar<'a>(scalar: Self::ScalarRef<'a>) -> Self::Scalar { 15 | scalar 16 | } 17 | 18 | fn to_scalar_ref<'a>(scalar: &'a Self::Scalar) -> Self::ScalarRef<'a> { 19 | *scalar 20 | } 21 | } 22 | 23 | impl ArgType for NullType { 24 | type ColumnIterator<'a> = std::iter::Take>; 25 | type ColumnBuilder = usize; 26 | 27 | fn data_type() -> DataType { 28 | DataType::Null 29 | } 30 | 31 | fn try_downcast_scalar<'a>(scalar: &'a Scalar) -> Option> { 32 | match scalar { 33 | Scalar::Null => Some(()), 34 | _ => None, 35 | } 36 | } 37 | 38 | fn try_downcast_column<'a>(col: &'a Column) -> Option { 39 | match col { 40 | Column::Null { len } => Some(*len), 41 | _ => None, 42 | } 43 | } 44 | 45 | fn upcast_scalar(_: Self::Scalar) -> Scalar { 46 | Scalar::Null 47 | } 48 | 49 | fn upcast_column(len: Self::Column) -> Column { 50 | Column::Null { len } 51 | } 52 | 53 | fn column_len<'a>(len: &'a Self::Column) -> usize { 54 | *len 55 | } 56 | 57 | fn index_column<'a>(len: &'a Self::Column, index: usize) -> Self::ScalarRef<'a> { 58 | if index >= *len { 59 | panic!("index {index} out of 0..{len}"); 60 | } 61 | } 62 | 63 | fn slice_column<'a>(len: &'a Self::Column, range: Range) -> Self::Column { 64 | if range.end <= *len { 65 | range.end - range.start 66 | } else { 67 | panic!("range {range:?} out of 0..{len}"); 68 | } 69 | } 70 | 71 | fn iter_column<'a>(len: &'a Self::Column) -> Self::ColumnIterator<'a> { 72 | std::iter::repeat(()).take(*len) 73 | } 74 | 75 | fn column_from_iter(iter: impl Iterator, _: &GenericMap) -> Self::Column { 76 | iter.count() 77 | } 78 | 79 | fn create_builder(_capacity: usize, _generics: &GenericMap) -> Self::ColumnBuilder { 80 | 0 81 | } 82 | 83 | fn column_to_builder(len: Self::Column) -> Self::ColumnBuilder { 84 | len 85 | } 86 | 87 | fn builder_len(len: &Self::ColumnBuilder) -> usize { 88 | *len 89 | } 90 | 91 | fn push_item(len: &mut Self::ColumnBuilder, _item: Self::Scalar) { 92 | *len += 1 93 | } 94 | 95 | fn push_default(len: &mut Self::ColumnBuilder) { 96 | *len += 1 97 | } 98 | 99 | fn append_builder(len: &mut Self::ColumnBuilder, other_len: &Self::ColumnBuilder) { 100 | *len += other_len 101 | } 102 | 103 | fn build_column(len: Self::ColumnBuilder) -> Self::Column { 104 | len 105 | } 106 | 107 | fn build_scalar(len: Self::ColumnBuilder) -> Self::Scalar { 108 | assert_eq!(len, 1); 109 | } 110 | } 111 | -------------------------------------------------------------------------------- /src/types/empty_array.rs: -------------------------------------------------------------------------------- 1 | use std::ops::Range; 2 | 3 | use crate::values::{Column, Scalar}; 4 | 5 | use super::{ArgType, DataType, GenericMap, ValueType}; 6 | 7 | pub struct EmptyArrayType; 8 | 9 | impl ValueType for EmptyArrayType { 10 | type Scalar = (); 11 | type ScalarRef<'a> = (); 12 | type Column = usize; 13 | 14 | fn to_owned_scalar<'a>(scalar: Self::ScalarRef<'a>) -> Self::Scalar { 15 | scalar 16 | } 17 | 18 | fn to_scalar_ref<'a>(scalar: &'a Self::Scalar) -> Self::ScalarRef<'a> { 19 | *scalar 20 | } 21 | } 22 | 23 | impl ArgType for EmptyArrayType { 24 | type ColumnIterator<'a> = std::iter::Take>; 25 | type ColumnBuilder = usize; 26 | 27 | fn data_type() -> DataType { 28 | DataType::EmptyArray 29 | } 30 | 31 | fn try_downcast_scalar<'a>(scalar: &'a Scalar) -> Option> { 32 | match scalar { 33 | Scalar::EmptyArray => Some(()), 34 | _ => None, 35 | } 36 | } 37 | 38 | fn try_downcast_column<'a>(col: &'a Column) -> Option { 39 | match col { 40 | Column::EmptyArray { len } => Some(*len), 41 | _ => None, 42 | } 43 | } 44 | 45 | fn upcast_scalar(_: Self::Scalar) -> Scalar { 46 | Scalar::EmptyArray 47 | } 48 | 49 | fn upcast_column(len: Self::Column) -> Column { 50 | Column::EmptyArray { len } 51 | } 52 | 53 | fn column_len<'a>(len: &'a Self::Column) -> usize { 54 | *len 55 | } 56 | 57 | fn index_column<'a>(len: &'a Self::Column, index: usize) -> Self::ScalarRef<'a> { 58 | if index >= *len { 59 | panic!("index {index} out of 0..{len}"); 60 | } 61 | } 62 | 63 | fn slice_column<'a>(len: &'a Self::Column, range: Range) -> Self::Column { 64 | if range.end <= *len { 65 | range.end - range.start 66 | } else { 67 | panic!("range {range:?} out of 0..{len}"); 68 | } 69 | } 70 | 71 | fn iter_column<'a>(len: &'a Self::Column) -> Self::ColumnIterator<'a> { 72 | std::iter::repeat(()).take(*len) 73 | } 74 | 75 | fn column_from_iter(iter: impl Iterator, _: &GenericMap) -> Self::Column { 76 | iter.count() 77 | } 78 | 79 | fn create_builder(_capacity: usize, _generics: &GenericMap) -> Self::ColumnBuilder { 80 | 0 81 | } 82 | 83 | fn column_to_builder(len: Self::Column) -> Self::ColumnBuilder { 84 | len 85 | } 86 | 87 | fn builder_len(len: &Self::ColumnBuilder) -> usize { 88 | *len 89 | } 90 | 91 | fn push_item(len: &mut Self::ColumnBuilder, _: Self::Scalar) { 92 | *len += 1 93 | } 94 | 95 | fn push_default(len: &mut Self::ColumnBuilder) { 96 | *len += 1 97 | } 98 | 99 | fn append_builder(len: &mut Self::ColumnBuilder, other_len: &Self::ColumnBuilder) { 100 | *len += other_len 101 | } 102 | 103 | fn build_column(len: Self::ColumnBuilder) -> Self::Column { 104 | len 105 | } 106 | 107 | fn build_scalar(len: Self::ColumnBuilder) -> Self::Scalar { 108 | assert_eq!(len, 1); 109 | } 110 | } 111 | -------------------------------------------------------------------------------- /src/types/boolean.rs: -------------------------------------------------------------------------------- 1 | use std::ops::Range; 2 | 3 | use arrow2::bitmap::{Bitmap, MutableBitmap}; 4 | 5 | use crate::{ 6 | util::bitmap_into_mut, 7 | values::{Column, Scalar}, 8 | }; 9 | 10 | use super::{ArgType, DataType, GenericMap, ValueType}; 11 | 12 | pub struct BooleanType; 13 | 14 | impl ValueType for BooleanType { 15 | type Scalar = bool; 16 | type ScalarRef<'a> = bool; 17 | type Column = Bitmap; 18 | 19 | fn to_owned_scalar<'a>(scalar: Self::ScalarRef<'a>) -> Self::Scalar { 20 | scalar 21 | } 22 | 23 | fn to_scalar_ref<'a>(scalar: &'a Self::Scalar) -> Self::ScalarRef<'a> { 24 | *scalar 25 | } 26 | } 27 | 28 | impl ArgType for BooleanType { 29 | type ColumnIterator<'a> = arrow2::bitmap::utils::BitmapIter<'a>; 30 | type ColumnBuilder = MutableBitmap; 31 | 32 | fn data_type() -> DataType { 33 | DataType::Boolean 34 | } 35 | 36 | fn try_downcast_scalar<'a>(scalar: &'a Scalar) -> Option> { 37 | match scalar { 38 | Scalar::Boolean(scalar) => Some(*scalar), 39 | _ => None, 40 | } 41 | } 42 | 43 | fn try_downcast_column<'a>(col: &'a Column) -> Option { 44 | match col { 45 | Column::Boolean(column) => Some(column.clone()), 46 | _ => None, 47 | } 48 | } 49 | 50 | fn upcast_scalar(scalar: Self::Scalar) -> Scalar { 51 | Scalar::Boolean(scalar) 52 | } 53 | 54 | fn upcast_column(col: Self::Column) -> Column { 55 | Column::Boolean(col) 56 | } 57 | 58 | fn column_len<'a>(col: &'a Self::Column) -> usize { 59 | col.len() 60 | } 61 | 62 | fn index_column<'a>(col: &'a Self::Column, index: usize) -> Self::ScalarRef<'a> { 63 | col.get(index).unwrap() 64 | } 65 | 66 | fn slice_column<'a>(col: &'a Self::Column, range: Range) -> Self::Column { 67 | col.clone().slice(range.start, range.end - range.start) 68 | } 69 | 70 | fn iter_column<'a>(col: &'a Self::Column) -> Self::ColumnIterator<'a> { 71 | col.iter() 72 | } 73 | 74 | fn column_from_iter(iter: impl Iterator, _: &GenericMap) -> Self::Column { 75 | iter.collect() 76 | } 77 | 78 | fn create_builder(capacity: usize, _: &GenericMap) -> Self::ColumnBuilder { 79 | MutableBitmap::with_capacity(capacity) 80 | } 81 | 82 | fn column_to_builder(col: Self::Column) -> Self::ColumnBuilder { 83 | bitmap_into_mut(col) 84 | } 85 | 86 | fn builder_len(builder: &Self::ColumnBuilder) -> usize { 87 | builder.len() 88 | } 89 | 90 | fn push_item(builder: &mut Self::ColumnBuilder, item: Self::ScalarRef<'_>) { 91 | builder.push(item); 92 | } 93 | 94 | fn push_default(builder: &mut Self::ColumnBuilder) { 95 | builder.push(false); 96 | } 97 | 98 | fn append_builder(builder: &mut Self::ColumnBuilder, other_builder: &Self::ColumnBuilder) { 99 | builder.extend_from_slice(other_builder.as_slice(), 0, other_builder.len()); 100 | } 101 | 102 | fn build_column(builder: Self::ColumnBuilder) -> Self::Column { 103 | builder.into() 104 | } 105 | 106 | fn build_scalar(builder: Self::ColumnBuilder) -> Self::Scalar { 107 | assert_eq!(builder.len(), 1); 108 | builder.get(0) 109 | } 110 | } 111 | -------------------------------------------------------------------------------- /src/types.rs: -------------------------------------------------------------------------------- 1 | pub mod any; 2 | pub mod array; 3 | pub mod boolean; 4 | pub mod empty_array; 5 | pub mod generic; 6 | pub mod null; 7 | pub mod nullable; 8 | pub mod number; 9 | pub mod string; 10 | 11 | pub use any::AnyType; 12 | pub use array::ArrayType; 13 | use arrow2::trusted_len::TrustedLen; 14 | pub use boolean::BooleanType; 15 | pub use empty_array::EmptyArrayType; 16 | pub use generic::GenericType; 17 | pub use null::NullType; 18 | pub use nullable::NullableType; 19 | pub use number::NumberType; 20 | pub use string::StringType; 21 | 22 | use std::{fmt::Debug, ops::Range}; 23 | 24 | use enum_as_inner::EnumAsInner; 25 | 26 | use crate::{ 27 | values::Scalar, 28 | values::{Column, Value, ValueRef}, 29 | }; 30 | 31 | pub type GenericMap<'a> = [DataType]; 32 | 33 | #[derive(Debug, Clone, PartialEq, Eq, EnumAsInner)] 34 | pub enum DataType { 35 | Boolean, 36 | String, 37 | UInt8, 38 | UInt16, 39 | Int8, 40 | Int16, 41 | Null, 42 | Nullable(Box), 43 | EmptyArray, 44 | Array(Box), 45 | Tuple(Vec), 46 | Generic(usize), 47 | } 48 | 49 | pub trait ValueType: Sized + 'static { 50 | type Scalar: Debug + Clone; 51 | type ScalarRef<'a>: Debug + Clone; 52 | type Column: Debug + Clone; 53 | 54 | fn to_owned_scalar<'a>(scalar: Self::ScalarRef<'a>) -> Self::Scalar; 55 | fn to_scalar_ref<'a>(scalar: &'a Self::Scalar) -> Self::ScalarRef<'a>; 56 | } 57 | 58 | pub trait ArgType: ValueType { 59 | type ColumnIterator<'a>: Iterator> + TrustedLen; 60 | type ColumnBuilder; 61 | 62 | fn data_type() -> DataType; 63 | fn try_downcast_scalar<'a>(scalar: &'a Scalar) -> Option>; 64 | fn try_downcast_column<'a>(col: &'a Column) -> Option; 65 | fn upcast_scalar(scalar: Self::Scalar) -> Scalar; 66 | fn upcast_column(col: Self::Column) -> Column; 67 | fn try_downcast_value<'a>(value: &'a ValueRef<'_, AnyType>) -> Option> { 68 | Some(match value { 69 | ValueRef::Scalar(scalar) => ValueRef::Scalar(Self::try_downcast_scalar(scalar)?), 70 | ValueRef::Column(col) => ValueRef::Column(Self::try_downcast_column(col)?), 71 | }) 72 | } 73 | fn upcast_value(value: Value) -> Value { 74 | match value { 75 | Value::Scalar(scalar) => Value::Scalar(Self::upcast_scalar(scalar)), 76 | Value::Column(col) => Value::Column(Self::upcast_column(col)), 77 | } 78 | } 79 | 80 | fn column_len<'a>(col: &'a Self::Column) -> usize; 81 | fn index_column<'a>(col: &'a Self::Column, index: usize) -> Self::ScalarRef<'a>; 82 | fn slice_column<'a>(col: &'a Self::Column, range: Range) -> Self::Column; 83 | fn iter_column<'a>(col: &'a Self::Column) -> Self::ColumnIterator<'a>; 84 | fn column_from_iter( 85 | iter: impl Iterator, 86 | generics: &GenericMap, 87 | ) -> Self::Column { 88 | let mut col = Self::create_builder(iter.size_hint().0, generics); 89 | for item in iter { 90 | Self::push_item(&mut col, Self::to_scalar_ref(&item)); 91 | } 92 | Self::build_column(col) 93 | } 94 | 95 | fn create_builder(capacity: usize, generics: &GenericMap) -> Self::ColumnBuilder; 96 | fn column_to_builder(col: Self::Column) -> Self::ColumnBuilder; 97 | fn builder_len(builder: &Self::ColumnBuilder) -> usize; 98 | fn push_item(builder: &mut Self::ColumnBuilder, item: Self::ScalarRef<'_>); 99 | fn push_default(builder: &mut Self::ColumnBuilder); 100 | fn append_builder(builder: &mut Self::ColumnBuilder, other_builder: &Self::ColumnBuilder); 101 | fn build_column(builder: Self::ColumnBuilder) -> Self::Column; 102 | fn build_scalar(builder: Self::ColumnBuilder) -> Self::Scalar; 103 | } 104 | -------------------------------------------------------------------------------- /src/types/string.rs: -------------------------------------------------------------------------------- 1 | use std::ops::Range; 2 | 3 | use arrow2::{buffer::Buffer, trusted_len::TrustedLen}; 4 | 5 | use crate::{ 6 | util::buffer_into_mut, 7 | values::{Column, Scalar}, 8 | }; 9 | 10 | use super::{ArgType, DataType, GenericMap, ValueType}; 11 | 12 | pub struct StringType; 13 | 14 | impl ValueType for StringType { 15 | type Scalar = Vec; 16 | type ScalarRef<'a> = &'a [u8]; 17 | type Column = (Buffer, Vec); 18 | 19 | fn to_owned_scalar<'a>(scalar: Self::ScalarRef<'a>) -> Self::Scalar { 20 | scalar.to_vec() 21 | } 22 | 23 | fn to_scalar_ref<'a>(scalar: &'a Self::Scalar) -> Self::ScalarRef<'a> { 24 | scalar 25 | } 26 | } 27 | 28 | impl ArgType for StringType { 29 | type ColumnIterator<'a> = StringIterator<'a>; 30 | type ColumnBuilder = (Vec, Vec); 31 | 32 | fn data_type() -> DataType { 33 | DataType::String 34 | } 35 | 36 | fn try_downcast_scalar<'a>(scalar: &'a Scalar) -> Option> { 37 | scalar.as_string().map(Vec::as_slice) 38 | } 39 | 40 | fn try_downcast_column<'a>(col: &'a Column) -> Option { 41 | col.as_string() 42 | .map(|(data, offsets)| (data.clone(), offsets.clone())) 43 | } 44 | 45 | fn upcast_scalar(scalar: Self::Scalar) -> Scalar { 46 | Scalar::String(scalar) 47 | } 48 | 49 | fn upcast_column((data, offsets): Self::Column) -> Column { 50 | Column::String { data, offsets } 51 | } 52 | 53 | fn column_len<'a>((_, offsets): &'a Self::Column) -> usize { 54 | offsets.len() - 1 55 | } 56 | 57 | fn index_column<'a>((data, offsets): &'a Self::Column, index: usize) -> Self::ScalarRef<'a> { 58 | &data[offsets[index]..offsets[index + 1]] 59 | } 60 | 61 | fn slice_column<'a>((data, offsets): &'a Self::Column, range: Range) -> Self::Column { 62 | let offsets = offsets[range.start..(range.end + 1)].to_vec(); 63 | (data.clone(), offsets) 64 | } 65 | 66 | fn iter_column<'a>((data, offsets): &'a Self::Column) -> Self::ColumnIterator<'a> { 67 | StringIterator { 68 | data, 69 | offsets: offsets.windows(2), 70 | } 71 | } 72 | 73 | fn create_builder(capacity: usize, _: &GenericMap) -> Self::ColumnBuilder { 74 | let mut offsets = Vec::with_capacity(capacity + 1); 75 | offsets.push(0); 76 | (Vec::new(), offsets) 77 | } 78 | 79 | fn column_to_builder((data, offsets): Self::Column) -> Self::ColumnBuilder { 80 | (buffer_into_mut(data), offsets) 81 | } 82 | 83 | fn builder_len((_, offsets): &Self::ColumnBuilder) -> usize { 84 | offsets.len() - 1 85 | } 86 | 87 | fn push_item((data, offsets): &mut Self::ColumnBuilder, item: Self::ScalarRef<'_>) { 88 | data.extend_from_slice(item); 89 | offsets.push(data.len()); 90 | } 91 | 92 | fn push_default((data, offsets): &mut Self::ColumnBuilder) { 93 | offsets.push(data.len()); 94 | } 95 | 96 | fn append_builder( 97 | (data, offsets): &mut Self::ColumnBuilder, 98 | (other_data, other_offsets): &Self::ColumnBuilder, 99 | ) { 100 | data.extend_from_slice(other_data); 101 | let start = offsets.last().cloned().unwrap(); 102 | offsets.extend(other_offsets.iter().skip(1).map(|offset| start + offset)); 103 | } 104 | 105 | fn build_column((data, offsets): Self::ColumnBuilder) -> Self::Column { 106 | (data.into(), offsets) 107 | } 108 | 109 | fn build_scalar((data, offsets): Self::ColumnBuilder) -> Self::Scalar { 110 | assert_eq!(data.len(), 1); 111 | assert_eq!(offsets.len(), 2); 112 | data[offsets[0]..offsets[1]].to_vec() 113 | } 114 | } 115 | 116 | pub struct StringIterator<'a> { 117 | data: &'a Buffer, 118 | offsets: std::slice::Windows<'a, usize>, 119 | } 120 | 121 | impl<'a> Iterator for StringIterator<'a> { 122 | type Item = &'a [u8]; 123 | 124 | fn next(&mut self) -> Option { 125 | self.offsets 126 | .next() 127 | .map(|range| &self.data[range[0]..range[1]]) 128 | } 129 | 130 | fn size_hint(&self) -> (usize, Option) { 131 | self.offsets.size_hint() 132 | } 133 | } 134 | 135 | unsafe impl<'a> TrustedLen for StringIterator<'a> {} 136 | -------------------------------------------------------------------------------- /tests/run-output: -------------------------------------------------------------------------------- 1 | ast: and(true::Boolean, false::Boolean) 2 | expr: and(true::Boolean{not_null}, false::Boolean{not_null}) 3 | type: Boolean 4 | property: {not_null} 5 | result: Boolean(false) 6 | 7 | ast: and(NULL, false::Boolean) 8 | expr: and, Boolean>(NULL{}, false::Boolean{not_null}) 9 | type: Nullable 10 | property: {} 11 | result: Null 12 | 13 | ast: plus(a::Nullable{}, -10::Int8) 14 | expr: plus, Nullable>(cast>(a){}, cast>(-10::Int8){not_null}) 15 | type: Nullable 16 | property: {} 17 | result: Nullable { column: Int16([0, 1, 2]), validity: [0b_____010] } 18 | 19 | ast: plus(a::Nullable{}, b::Nullable{}) 20 | expr: plus, Nullable>(cast>(a){}, cast>(b){}) 21 | type: Nullable 22 | property: {} 23 | result: Nullable { column: Int16([11, 13, 15]), validity: [0b_____110] } 24 | 25 | ast: not(a::Nullable{}) 26 | expr: not>(a{}) 27 | type: Nullable 28 | property: {} 29 | result: Nullable { column: Boolean([0b_____010]), validity: [0b_____010] } 30 | 31 | ast: least(10::UInt8, 20::UInt8, 30::UInt8, 40::UInt8) 32 | expr: least(cast(10::UInt8){not_null}, cast(20::UInt8){not_null}, cast(30::UInt8){not_null}, cast(40::UInt8){not_null}) 33 | type: Int16 34 | property: {not_null} 35 | result: Int16(10) 36 | 37 | ast: create_tuple(NULL, true::Boolean) 38 | expr: create_tuple, Boolean>(NULL{}, true::Boolean{not_null}) 39 | type: (Nullable, Boolean) 40 | property: {not_null} 41 | result: Tuple([Null, Boolean(true)]) 42 | 43 | ast: get_tuple(1)(create_tuple(a::Int16{not_null}, b::Nullable{})) 44 | expr: get_tuple<(Int16, Nullable)>(create_tuple>(a{not_null}, b{}){not_null}) 45 | type: Nullable 46 | property: {not_null} 47 | result: Nullable { column: String { data: [97, 98, 99, 100, 101], offsets: [0, 1, 2, 3, 4, 5] }, validity: [0b___00011] } 48 | 49 | ast: get_tuple(1)(a::Nullable<(Boolean, String)>{not_null}) 50 | expr: get_tuple>(a{not_null}) 51 | type: Nullable 52 | property: {not_null} 53 | result: Nullable { column: String { data: [97, 98, 99, 100, 101], offsets: [0, 1, 2, 3, 4, 5] }, validity: [0b___00011] } 54 | 55 | ast: create_array() 56 | expr: create_array<>() 57 | type: Array 58 | property: {not_null} 59 | result: EmptyArray 60 | 61 | ast: create_array(NULL, true::Boolean) 62 | expr: create_array>(cast>(NULL){}, cast>(true::Boolean){not_null}) 63 | type: Array> 64 | property: {not_null} 65 | result: Array(Nullable { column: Boolean([0b______10]), validity: [0b______10] }) 66 | 67 | ast: create_array(a::Int16{not_null}, b::Int16{not_null}) 68 | expr: create_array(a{not_null}, b{not_null}) 69 | type: Array 70 | property: {not_null} 71 | result: Array { array: Int16([0, 5, 1, 6, 2, 7, 3, 8, 4, 9]), offsets: [0, 2, 4, 6, 8, 10] } 72 | 73 | ast: create_array(create_array(a::Int16{not_null}, b::Int16{not_null}), NULL, NULL) 74 | expr: create_array>>(cast>>(create_array(a{not_null}, b{not_null})){not_null}, cast>>(NULL){}, cast>>(NULL){}) 75 | type: Array>> 76 | property: {not_null} 77 | result: Array { array: Nullable { column: Array { array: Int16([0, 5, 1, 6, 2, 7, 3, 8, 4, 9]), offsets: [0, 2, 2, 2, 4, 4, 4, 6, 6, 6, 8, 8, 8, 10, 10, 10] }, validity: [0b01001001, 0b_0010010] }, offsets: [0, 3, 6, 9, 12, 15] } 78 | 79 | ast: get(array::Array{not_null}, idx::UInt8{not_null}) 80 | expr: get, Int16>(array{not_null}, cast(idx){not_null}) 81 | type: Int16 82 | property: {not_null} 83 | result: Int16([0, 21, 42, 63, 84]) 84 | 85 | ast: get(array::Array>{not_null}, idx::UInt8{not_null}) 86 | expr: get>, Int16>(array{not_null}, cast(idx){not_null}) 87 | type: Array 88 | property: {not_null} 89 | result: Array { array: Int16([0, 1, 2, 3, 4, 25, 26, 27, 28, 29, 50, 51, 52, 53, 54]), offsets: [0, 5, 10, 15] } 90 | 91 | -------------------------------------------------------------------------------- /src/types/array.rs: -------------------------------------------------------------------------------- 1 | use std::{marker::PhantomData, ops::Range}; 2 | 3 | use arrow2::trusted_len::TrustedLen; 4 | 5 | use crate::values::{Column, Scalar}; 6 | 7 | use super::{ArgType, DataType, GenericMap, ValueType}; 8 | 9 | pub struct ArrayType(PhantomData); 10 | 11 | impl ValueType for ArrayType { 12 | type Scalar = T::Column; 13 | type ScalarRef<'a> = T::Column; 14 | type Column = (T::Column, Vec); 15 | 16 | fn to_owned_scalar<'a>(scalar: Self::ScalarRef<'a>) -> Self::Scalar { 17 | scalar 18 | } 19 | 20 | fn to_scalar_ref<'a>(scalar: &'a Self::Scalar) -> Self::ScalarRef<'a> { 21 | scalar.clone() 22 | } 23 | } 24 | 25 | impl ArgType for ArrayType { 26 | type ColumnIterator<'a> = ArrayIterator<'a, T>; 27 | type ColumnBuilder = (T::ColumnBuilder, Vec); 28 | 29 | fn data_type() -> DataType { 30 | DataType::Array(Box::new(T::data_type())) 31 | } 32 | 33 | fn try_downcast_scalar<'a>(scalar: &'a Scalar) -> Option> { 34 | match scalar { 35 | Scalar::Array(array) => T::try_downcast_column(array), 36 | _ => None, 37 | } 38 | } 39 | 40 | fn try_downcast_column<'a>(col: &'a Column) -> Option { 41 | match col { 42 | Column::Array { array, offsets } => { 43 | Some((T::try_downcast_column(array)?, offsets.clone())) 44 | } 45 | _ => None, 46 | } 47 | } 48 | 49 | fn upcast_scalar(scalar: Self::Scalar) -> Scalar { 50 | Scalar::Array(T::upcast_column(scalar)) 51 | } 52 | 53 | fn upcast_column((col, offsets): Self::Column) -> Column { 54 | Column::Array { 55 | array: Box::new(T::upcast_column(col)), 56 | offsets, 57 | } 58 | } 59 | 60 | fn column_len<'a>((_, offsets): &'a Self::Column) -> usize { 61 | offsets.len() 62 | } 63 | 64 | fn index_column<'a>((col, offsets): &'a Self::Column, index: usize) -> Self::ScalarRef<'a> { 65 | T::slice_column(col, offsets[index]..offsets[index + 1]) 66 | } 67 | 68 | fn slice_column<'a>((col, offsets): &'a Self::Column, range: Range) -> Self::Column { 69 | (col.clone(), offsets[range].to_vec()) 70 | } 71 | 72 | fn iter_column<'a>((col, offsets): &'a Self::Column) -> Self::ColumnIterator<'a> { 73 | ArrayIterator { 74 | col, 75 | offsets: offsets.windows(2), 76 | } 77 | } 78 | 79 | fn create_builder(_capacity: usize, generics: &GenericMap) -> Self::ColumnBuilder { 80 | (T::create_builder(0, generics), vec![0]) 81 | } 82 | 83 | fn column_to_builder((col, offsets): Self::Column) -> Self::ColumnBuilder { 84 | (T::column_to_builder(col), offsets) 85 | } 86 | 87 | fn builder_len((_, offsets): &Self::ColumnBuilder) -> usize { 88 | offsets.len() - 1 89 | } 90 | 91 | fn push_item((builder, offsets): &mut Self::ColumnBuilder, item: Self::ScalarRef<'_>) { 92 | let other_col = T::column_to_builder(item); 93 | T::append_builder(builder, &other_col); 94 | let len = T::builder_len(builder); 95 | offsets.push(len); 96 | } 97 | 98 | fn push_default((builder, offsets): &mut Self::ColumnBuilder) { 99 | let len = T::builder_len(builder); 100 | offsets.push(len); 101 | } 102 | 103 | fn append_builder( 104 | (builder, offsets): &mut Self::ColumnBuilder, 105 | (other_builder, other_offsets): &Self::ColumnBuilder, 106 | ) { 107 | let end = offsets.last().cloned().unwrap(); 108 | offsets.extend(other_offsets.iter().skip(1).map(|offset| offset + end)); 109 | T::append_builder(builder, other_builder); 110 | } 111 | 112 | fn build_column((builder, offsets): Self::ColumnBuilder) -> Self::Column { 113 | // TODO: check that they have same length 114 | (T::build_column(builder), offsets) 115 | } 116 | 117 | fn build_scalar((builder, offsets): Self::ColumnBuilder) -> Self::Scalar { 118 | assert_eq!(offsets.len(), 2); 119 | T::slice_column(&T::build_column(builder), offsets[0]..offsets[1]) 120 | } 121 | } 122 | 123 | pub struct ArrayIterator<'a, T: ArgType> { 124 | col: &'a T::Column, 125 | offsets: std::slice::Windows<'a, usize>, 126 | } 127 | 128 | impl<'a, T: ArgType> Iterator for ArrayIterator<'a, T> { 129 | type Item = T::Column; 130 | 131 | fn next(&mut self) -> Option { 132 | self.offsets 133 | .next() 134 | .map(|range| T::slice_column(self.col, range[0]..range[1])) 135 | } 136 | 137 | fn size_hint(&self) -> (usize, Option) { 138 | self.offsets.size_hint() 139 | } 140 | } 141 | 142 | unsafe impl<'a, T: ArgType> TrustedLen for ArrayIterator<'a, T> {} 143 | -------------------------------------------------------------------------------- /src/types/nullable.rs: -------------------------------------------------------------------------------- 1 | use std::{marker::PhantomData, ops::Range}; 2 | 3 | use arrow2::{ 4 | bitmap::{Bitmap, MutableBitmap}, 5 | trusted_len::TrustedLen, 6 | }; 7 | 8 | use crate::{ 9 | util::bitmap_into_mut, 10 | values::{Column, Scalar}, 11 | }; 12 | 13 | use super::{ArgType, DataType, GenericMap, ValueType}; 14 | 15 | pub struct NullableType(PhantomData); 16 | 17 | impl ValueType for NullableType { 18 | type Scalar = Option; 19 | type ScalarRef<'a> = Option>; 20 | type Column = (T::Column, Bitmap); 21 | 22 | fn to_owned_scalar<'a>(scalar: Self::ScalarRef<'a>) -> Self::Scalar { 23 | scalar.map(T::to_owned_scalar) 24 | } 25 | 26 | fn to_scalar_ref<'a>(scalar: &'a Self::Scalar) -> Self::ScalarRef<'a> { 27 | scalar.as_ref().map(T::to_scalar_ref) 28 | } 29 | } 30 | 31 | impl ArgType for NullableType { 32 | type ColumnIterator<'a> = NullableIterator<'a, T>; 33 | type ColumnBuilder = (T::ColumnBuilder, MutableBitmap); 34 | 35 | fn data_type() -> DataType { 36 | DataType::Nullable(Box::new(T::data_type())) 37 | } 38 | 39 | fn try_downcast_scalar<'a>(scalar: &'a Scalar) -> Option> { 40 | match scalar { 41 | Scalar::Null => Some(None), 42 | scalar => Some(Some(T::try_downcast_scalar(scalar)?)), 43 | } 44 | } 45 | 46 | fn try_downcast_column<'a>(col: &'a Column) -> Option { 47 | match col { 48 | Column::Nullable { column, validity } => { 49 | Some((T::try_downcast_column(column)?, validity.clone())) 50 | } 51 | _ => None, 52 | } 53 | } 54 | 55 | fn upcast_scalar(scalar: Self::Scalar) -> Scalar { 56 | match scalar { 57 | Some(scalar) => T::upcast_scalar(scalar), 58 | None => Scalar::Null, 59 | } 60 | } 61 | 62 | fn upcast_column((col, validity): Self::Column) -> Column { 63 | Column::Nullable { 64 | column: Box::new(T::upcast_column(col)), 65 | validity, 66 | } 67 | } 68 | 69 | fn column_len<'a>((_, validity): &'a Self::Column) -> usize { 70 | validity.len() 71 | } 72 | 73 | fn index_column<'a>((col, validity): &'a Self::Column, index: usize) -> Self::ScalarRef<'a> { 74 | let scalar = T::index_column(col, index); 75 | if validity.get(index).unwrap() { 76 | Some(scalar) 77 | } else { 78 | None 79 | } 80 | } 81 | 82 | fn slice_column<'a>((col, validity): &'a Self::Column, range: Range) -> Self::Column { 83 | (T::slice_column(col, range), validity.clone()) 84 | } 85 | 86 | fn iter_column<'a>((col, validity): &'a Self::Column) -> Self::ColumnIterator<'a> { 87 | NullableIterator { 88 | iter: T::iter_column(col), 89 | validity: validity.iter(), 90 | } 91 | } 92 | 93 | fn create_builder(capacity: usize, generics: &GenericMap) -> Self::ColumnBuilder { 94 | ( 95 | T::create_builder(capacity, generics), 96 | MutableBitmap::with_capacity(capacity), 97 | ) 98 | } 99 | 100 | fn column_to_builder((col, validity): Self::Column) -> Self::ColumnBuilder { 101 | (T::column_to_builder(col), bitmap_into_mut(validity)) 102 | } 103 | 104 | fn builder_len((_, validity): &Self::ColumnBuilder) -> usize { 105 | validity.len() 106 | } 107 | 108 | fn push_item((col, validity): &mut Self::ColumnBuilder, item: Self::ScalarRef<'_>) { 109 | match item { 110 | Some(scalar) => { 111 | T::push_item(col, scalar); 112 | validity.push(true); 113 | } 114 | None => { 115 | T::push_default(col); 116 | validity.push(false); 117 | } 118 | } 119 | } 120 | 121 | fn push_default((col, validity): &mut Self::ColumnBuilder) { 122 | T::push_default(col); 123 | validity.push(false); 124 | } 125 | 126 | fn append_builder( 127 | (col, validity): &mut Self::ColumnBuilder, 128 | (other_col, other_nulls): &Self::ColumnBuilder, 129 | ) { 130 | T::append_builder(col, other_col); 131 | validity.extend_from_slice(other_nulls.as_slice(), 0, other_nulls.len()); 132 | } 133 | 134 | fn build_column((col, validity): Self::ColumnBuilder) -> Self::Column { 135 | // TODO: check that they have same length 136 | (T::build_column(col), validity.into()) 137 | } 138 | 139 | fn build_scalar((col, validity): Self::ColumnBuilder) -> Self::Scalar { 140 | assert_eq!(validity.len(), 1); 141 | if validity.get(0) { 142 | Some(T::build_scalar(col)) 143 | } else { 144 | None 145 | } 146 | } 147 | } 148 | 149 | pub struct NullableIterator<'a, T: ArgType> { 150 | iter: T::ColumnIterator<'a>, 151 | validity: arrow2::bitmap::utils::BitmapIter<'a>, 152 | } 153 | 154 | impl<'a, T: ArgType> Iterator for NullableIterator<'a, T> { 155 | type Item = Option>; 156 | 157 | fn next(&mut self) -> Option { 158 | self.iter.next().zip(self.validity.next()).map( 159 | |(scalar, is_null)| { 160 | if is_null { 161 | None 162 | } else { 163 | Some(scalar) 164 | } 165 | }, 166 | ) 167 | } 168 | 169 | fn size_hint(&self) -> (usize, Option) { 170 | assert_eq!(self.iter.size_hint(), self.validity.size_hint()); 171 | self.validity.size_hint() 172 | } 173 | } 174 | 175 | unsafe impl<'a, T: ArgType> TrustedLen for NullableIterator<'a, T> {} 176 | -------------------------------------------------------------------------------- /src/display.rs: -------------------------------------------------------------------------------- 1 | use std::fmt::{Display, Formatter}; 2 | 3 | use crate::{ 4 | expr::{Expr, Literal, AST}, 5 | property::ValueProperty, 6 | types::{DataType, ValueType}, 7 | values::{Value, ValueRef}, 8 | }; 9 | 10 | impl Display for AST { 11 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 12 | match self { 13 | AST::Literal(literal) => write!(f, "{literal}"), 14 | AST::ColumnRef { 15 | name, 16 | data_type, 17 | property, 18 | } => write!(f, "{name}::{data_type}{property}"), 19 | AST::FunctionCall { name, args, params } => { 20 | write!(f, "{name}")?; 21 | if !params.is_empty() { 22 | write!(f, "(")?; 23 | for (i, param) in params.iter().enumerate() { 24 | if i > 0 { 25 | write!(f, ", ")?; 26 | } 27 | write!(f, "{param}")?; 28 | } 29 | write!(f, ")")?; 30 | } 31 | write!(f, "(")?; 32 | for (i, arg) in args.iter().enumerate() { 33 | if i > 0 { 34 | write!(f, ", ")?; 35 | } 36 | write!(f, "{arg}")?; 37 | } 38 | write!(f, ")") 39 | } 40 | } 41 | } 42 | } 43 | 44 | impl Display for Literal { 45 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 46 | match self { 47 | Literal::Null => write!(f, "NULL"), 48 | Literal::Boolean(val) => write!(f, "{val}::Boolean"), 49 | Literal::UInt8(val) => write!(f, "{val}::UInt8"), 50 | Literal::UInt16(val) => write!(f, "{val}::UInt16"), 51 | Literal::Int8(val) => write!(f, "{val}::Int8"), 52 | Literal::Int16(val) => write!(f, "{val}::Int16"), 53 | Literal::String(val) => write!(f, "{}::String", String::from_utf8_lossy(val)), 54 | } 55 | } 56 | } 57 | 58 | impl Display for DataType { 59 | fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { 60 | match &self { 61 | DataType::Boolean => write!(f, "Boolean"), 62 | DataType::String => write!(f, "String"), 63 | DataType::UInt8 => write!(f, "UInt8"), 64 | DataType::UInt16 => write!(f, "UInt16"), 65 | DataType::Int8 => write!(f, "Int8"), 66 | DataType::Int16 => write!(f, "Int16"), 67 | DataType::Null => write!(f, "Nullable"), 68 | DataType::Nullable(inner) => write!(f, "Nullable<{inner}>"), 69 | DataType::EmptyArray => write!(f, "Array"), 70 | DataType::Array(inner) => write!(f, "Array<{inner}>"), 71 | DataType::Tuple(tys) => { 72 | if tys.len() == 1 { 73 | write!(f, "({},)", tys[0]) 74 | } else { 75 | write!(f, "(")?; 76 | for (i, ty) in tys.iter().enumerate() { 77 | if i > 0 { 78 | write!(f, ", ")?; 79 | } 80 | write!(f, "{ty}")?; 81 | } 82 | write!(f, ")") 83 | } 84 | } 85 | DataType::Generic(index) => write!(f, "T{index}"), 86 | } 87 | } 88 | } 89 | 90 | impl Display for ValueProperty { 91 | fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { 92 | if self.not_null { 93 | write!(f, "{{not_null}}")?; 94 | } else { 95 | write!(f, "{{}}")?; 96 | } 97 | Ok(()) 98 | } 99 | } 100 | 101 | impl Display for Expr { 102 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 103 | match self { 104 | Expr::Literal(literal) => write!(f, "{literal}"), 105 | Expr::ColumnRef { name } => write!(f, "{name}"), 106 | Expr::FunctionCall { 107 | function, 108 | args, 109 | generics, 110 | .. 111 | } => { 112 | write!(f, "{}", function.signature.name)?; 113 | if !generics.is_empty() { 114 | write!(f, "<")?; 115 | for (i, ty) in generics.iter().enumerate() { 116 | if i > 0 { 117 | write!(f, ", ")?; 118 | } 119 | write!(f, "T{i}={ty}")?; 120 | } 121 | write!(f, ">")?; 122 | } 123 | write!(f, "<")?; 124 | for (i, ty) in function.signature.args_type.iter().enumerate() { 125 | if i > 0 { 126 | write!(f, ", ")?; 127 | } 128 | write!(f, "{ty}")?; 129 | } 130 | write!(f, ">")?; 131 | write!(f, "(")?; 132 | for (i, (arg, prop)) in args.iter().enumerate() { 133 | if i > 0 { 134 | write!(f, ", ")?; 135 | } 136 | write!(f, "{arg}{prop}")?; 137 | } 138 | write!(f, ")") 139 | } 140 | Expr::Cast { expr, dest_type } => { 141 | write!(f, "cast({expr})") 142 | } 143 | } 144 | } 145 | } 146 | 147 | impl Display for Value { 148 | fn fmt(&self, f: &mut Formatter) -> std::fmt::Result { 149 | match self { 150 | Value::Scalar(scalar) => write!(f, "{:?}", scalar), 151 | Value::Column(col) => write!(f, "{:?}", col), 152 | } 153 | } 154 | } 155 | 156 | impl<'a, T: ValueType> Display for ValueRef<'a, T> { 157 | fn fmt(&self, f: &mut Formatter) -> std::fmt::Result { 158 | match self { 159 | ValueRef::Scalar(scalar) => write!(f, "{:?}", scalar), 160 | ValueRef::Column(col) => write!(f, "{:?}", col), 161 | } 162 | } 163 | } 164 | -------------------------------------------------------------------------------- /src/types/number.rs: -------------------------------------------------------------------------------- 1 | use std::{marker::PhantomData, ops::Range}; 2 | 3 | use arrow2::{buffer::Buffer, types::NativeType}; 4 | 5 | use crate::{ 6 | util::buffer_into_mut, 7 | values::{Column, Scalar}, 8 | }; 9 | 10 | use super::{ArgType, DataType, GenericMap, ValueType}; 11 | 12 | pub trait Number: 'static { 13 | type Storage: NativeType; 14 | 15 | fn data_type() -> DataType; 16 | fn try_downcast_scalar(scalar: &Scalar) -> Option; 17 | fn try_downcast_column(col: &Column) -> Option>; 18 | fn upcast_scalar(scalar: Self::Storage) -> Scalar; 19 | fn upcast_column(col: Buffer) -> Column; 20 | } 21 | 22 | pub struct NumberType(PhantomData); 23 | 24 | impl ValueType for NumberType { 25 | type Scalar = Int::Storage; 26 | type ScalarRef<'a> = Int::Storage; 27 | type Column = Buffer; 28 | 29 | fn to_owned_scalar<'a>(scalar: Self::ScalarRef<'a>) -> Self::Scalar { 30 | scalar 31 | } 32 | 33 | fn to_scalar_ref<'a>(scalar: &'a Self::Scalar) -> Self::ScalarRef<'a> { 34 | *scalar 35 | } 36 | } 37 | 38 | impl ArgType for NumberType { 39 | type ColumnIterator<'a> = std::iter::Cloned>; 40 | type ColumnBuilder = Vec; 41 | 42 | fn data_type() -> DataType { 43 | T::data_type() 44 | } 45 | 46 | fn try_downcast_scalar<'a>(scalar: &'a Scalar) -> Option> { 47 | T::try_downcast_scalar(scalar) 48 | } 49 | 50 | fn try_downcast_column<'a>(col: &'a Column) -> Option { 51 | T::try_downcast_column(col) 52 | } 53 | 54 | fn upcast_scalar(scalar: Self::Scalar) -> Scalar { 55 | T::upcast_scalar(scalar) 56 | } 57 | 58 | fn upcast_column(col: Self::Column) -> Column { 59 | T::upcast_column(col) 60 | } 61 | 62 | fn column_len<'a>(col: &'a Self::Column) -> usize { 63 | col.len() 64 | } 65 | 66 | fn index_column<'a>(col: &'a Self::Column, index: usize) -> Self::ScalarRef<'a> { 67 | col[index] 68 | } 69 | 70 | fn slice_column<'a>(col: &'a Self::Column, range: Range) -> Self::Column { 71 | col.clone().slice(range.start, range.end - range.start) 72 | } 73 | 74 | fn iter_column<'a>(col: &'a Self::Column) -> Self::ColumnIterator<'a> { 75 | col.iter().cloned() 76 | } 77 | 78 | fn column_from_iter(iter: impl Iterator, _: &GenericMap) -> Self::Column { 79 | iter.collect() 80 | } 81 | 82 | fn create_builder(capacity: usize, _generics: &GenericMap) -> Self::ColumnBuilder { 83 | Vec::with_capacity(capacity) 84 | } 85 | 86 | fn column_to_builder(col: Self::Column) -> Self::ColumnBuilder { 87 | buffer_into_mut(col) 88 | } 89 | 90 | fn builder_len(builder: &Self::ColumnBuilder) -> usize { 91 | builder.len() 92 | } 93 | 94 | fn push_item(builder: &mut Self::ColumnBuilder, item: Self::Scalar) { 95 | builder.push(item); 96 | } 97 | 98 | fn push_default(builder: &mut Self::ColumnBuilder) { 99 | builder.push(T::Storage::default()); 100 | } 101 | 102 | fn append_builder(builder: &mut Self::ColumnBuilder, other_builder: &Self::ColumnBuilder) { 103 | builder.extend_from_slice(other_builder); 104 | } 105 | 106 | fn build_column(builder: Self::ColumnBuilder) -> Self::Column { 107 | builder.into() 108 | } 109 | 110 | fn build_scalar(builder: Self::ColumnBuilder) -> Self::Scalar { 111 | assert_eq!(builder.len(), 1); 112 | builder[0] 113 | } 114 | } 115 | 116 | impl Number for u8 { 117 | type Storage = u8; 118 | 119 | fn data_type() -> DataType { 120 | DataType::UInt8 121 | } 122 | 123 | fn try_downcast_scalar(scalar: &Scalar) -> Option { 124 | scalar.as_u_int8().cloned() 125 | } 126 | 127 | fn try_downcast_column(col: &Column) -> Option> { 128 | col.as_u_int8().cloned() 129 | } 130 | 131 | fn upcast_scalar(scalar: Self::Storage) -> Scalar { 132 | Scalar::UInt8(scalar) 133 | } 134 | 135 | fn upcast_column(col: Buffer) -> Column { 136 | Column::UInt8(col) 137 | } 138 | } 139 | 140 | impl Number for u16 { 141 | type Storage = u16; 142 | 143 | fn data_type() -> DataType { 144 | DataType::UInt16 145 | } 146 | 147 | fn try_downcast_scalar(scalar: &Scalar) -> Option { 148 | scalar.as_u_int16().cloned() 149 | } 150 | 151 | fn try_downcast_column(col: &Column) -> Option> { 152 | col.as_u_int16().cloned() 153 | } 154 | 155 | fn upcast_scalar(scalar: Self::Storage) -> Scalar { 156 | Scalar::UInt16(scalar) 157 | } 158 | 159 | fn upcast_column(col: Buffer) -> Column { 160 | Column::UInt16(col) 161 | } 162 | } 163 | 164 | impl Number for i8 { 165 | type Storage = i8; 166 | 167 | fn data_type() -> DataType { 168 | DataType::Int8 169 | } 170 | 171 | fn try_downcast_scalar(scalar: &Scalar) -> Option { 172 | scalar.as_int8().cloned() 173 | } 174 | 175 | fn try_downcast_column(col: &Column) -> Option> { 176 | col.as_int8().cloned() 177 | } 178 | 179 | fn upcast_scalar(scalar: Self::Storage) -> Scalar { 180 | Scalar::Int8(scalar) 181 | } 182 | 183 | fn upcast_column(col: Buffer) -> Column { 184 | Column::Int8(col) 185 | } 186 | } 187 | 188 | impl Number for i16 { 189 | type Storage = i16; 190 | 191 | fn data_type() -> DataType { 192 | DataType::Int16 193 | } 194 | 195 | fn try_downcast_scalar(scalar: &Scalar) -> Option { 196 | scalar.as_int16().cloned() 197 | } 198 | 199 | fn try_downcast_column(col: &Column) -> Option> { 200 | col.as_int16().cloned() 201 | } 202 | 203 | fn upcast_scalar(scalar: Self::Storage) -> Scalar { 204 | Scalar::Int16(scalar) 205 | } 206 | 207 | fn upcast_column(col: Buffer) -> Column { 208 | Column::Int16(col) 209 | } 210 | } 211 | -------------------------------------------------------------------------------- /src/runtime.rs: -------------------------------------------------------------------------------- 1 | use std::collections::HashMap; 2 | 3 | use crate::{ 4 | expr::{Expr, Literal}, 5 | types::{any::AnyType, DataType}, 6 | util::constant_bitmap, 7 | values::{Column, Value}, 8 | values::{ColumnBuilder, Scalar}, 9 | }; 10 | 11 | pub struct Runtime { 12 | pub columns: HashMap, 13 | } 14 | 15 | impl Runtime { 16 | pub fn run(&self, expr: &Expr) -> Value { 17 | match expr { 18 | Expr::Literal(lit) => Value::Scalar(self.run_lit(lit)), 19 | Expr::ColumnRef { name } => Value::Column(self.columns[name].clone()), 20 | Expr::FunctionCall { 21 | function, 22 | args, 23 | generics, 24 | .. 25 | } => { 26 | let cols = args 27 | .iter() 28 | .map(|(expr, _)| self.run(expr)) 29 | .collect::>(); 30 | let cols_ref = cols.iter().map(Value::as_ref).collect::>(); 31 | (function.eval)(cols_ref.as_slice(), generics) 32 | } 33 | Expr::Cast { expr, dest_type } => { 34 | let value = self.run(expr); 35 | // TODO: remove me 36 | let desc_value = format!("{}", value); 37 | self.run_cast(value, dest_type) 38 | .unwrap_or_else(|| panic!("{desc_value} can not be cast to {dest_type}")) 39 | } 40 | } 41 | } 42 | 43 | pub fn run_cast(&self, input: Value, dest_type: &DataType) -> Option> { 44 | match input { 45 | Value::Scalar(scalar) => match (scalar, dest_type) { 46 | (Scalar::Null, DataType::Nullable(_)) => Some(Value::Scalar(Scalar::Null)), 47 | (Scalar::EmptyArray, DataType::Array(dest_ty)) => { 48 | let column = ColumnBuilder::with_capacity(dest_ty, 0).build(); 49 | Some(Value::Scalar(Scalar::Array(column))) 50 | } 51 | (scalar, DataType::Nullable(dest_ty)) => { 52 | self.run_cast(Value::Scalar(scalar), dest_ty) 53 | } 54 | (Scalar::Array(array), DataType::Array(dest_ty)) => { 55 | let array = self 56 | .run_cast(Value::Column(array), dest_ty)? 57 | .into_column() 58 | .ok() 59 | .unwrap(); 60 | Some(Value::Scalar(Scalar::Array(array))) 61 | } 62 | (Scalar::UInt8(val), DataType::UInt16) => { 63 | Some(Value::Scalar(Scalar::UInt16(val as u16))) 64 | } 65 | (Scalar::Int8(val), DataType::Int16) => { 66 | Some(Value::Scalar(Scalar::Int16(val as i16))) 67 | } 68 | (Scalar::UInt8(val), DataType::Int16) => { 69 | Some(Value::Scalar(Scalar::Int16(val as i16))) 70 | } 71 | (scalar @ Scalar::Boolean(_), DataType::Boolean) 72 | | (scalar @ Scalar::String(_), DataType::String) 73 | | (scalar @ Scalar::UInt8(_), DataType::UInt8) 74 | | (scalar @ Scalar::Int8(_), DataType::Int8) 75 | | (scalar @ Scalar::Int16(_), DataType::Int16) 76 | | (scalar @ Scalar::Null, DataType::Null) 77 | | (scalar @ Scalar::EmptyArray, DataType::EmptyArray) => { 78 | Some(Value::Scalar(scalar)) 79 | } 80 | _ => None, 81 | }, 82 | Value::Column(col) => match (col, dest_type) { 83 | (Column::Null { len }, DataType::Nullable(dest_ty)) => { 84 | Some(Value::Column(Column::Nullable { 85 | column: Box::new(ColumnBuilder::with_capacity(dest_ty, len).build()), 86 | validity: constant_bitmap(false, len).into(), 87 | })) 88 | } 89 | (Column::EmptyArray { len }, DataType::Array(dest_ty)) => { 90 | Some(Value::Column(Column::Array { 91 | array: Box::new(ColumnBuilder::with_capacity(dest_ty, 0).build()), 92 | offsets: vec![0; len + 1], 93 | })) 94 | } 95 | (Column::Nullable { column, validity }, DataType::Nullable(dest_ty)) => { 96 | let column = self 97 | .run_cast(Value::Column(*column), &*dest_ty)? 98 | .into_column() 99 | .ok() 100 | .unwrap(); 101 | Some(Value::Column(Column::Nullable { 102 | column: Box::new(column), 103 | validity, 104 | })) 105 | } 106 | (col, DataType::Nullable(dest_ty)) => { 107 | let column = self 108 | .run_cast(Value::Column(col), &*dest_ty)? 109 | .into_column() 110 | .ok() 111 | .unwrap(); 112 | Some(Value::Column(Column::Nullable { 113 | validity: constant_bitmap(true, column.len()).into(), 114 | column: Box::new(column), 115 | })) 116 | } 117 | (Column::Array { array, offsets }, DataType::Array(dest_ty)) => { 118 | let array = self 119 | .run_cast(Value::Column(*array), &*dest_ty)? 120 | .into_column() 121 | .ok() 122 | .unwrap(); 123 | Some(Value::Column(Column::Array { 124 | array: Box::new(array), 125 | offsets, 126 | })) 127 | } 128 | (Column::UInt8(column), DataType::UInt16) => Some(Value::Column(Column::UInt16( 129 | column.iter().map(|v| *v as u16).collect(), 130 | ))), 131 | (Column::Int8(column), DataType::Int16) => Some(Value::Column(Column::Int16( 132 | column.iter().map(|v| *v as i16).collect(), 133 | ))), 134 | (Column::UInt8(column), DataType::Int16) => Some(Value::Column(Column::Int16( 135 | column.iter().map(|v| *v as i16).collect(), 136 | ))), 137 | (col @ Column::Boolean(_), DataType::Boolean) 138 | | (col @ Column::String { .. }, DataType::String) 139 | | (col @ Column::UInt8(_), DataType::UInt8) 140 | | (col @ Column::Int8(_), DataType::Int8) 141 | | (col @ Column::Int16(_), DataType::Int16) 142 | | (col @ Column::Null { .. }, DataType::Null) 143 | | (col @ Column::EmptyArray { .. }, DataType::EmptyArray) => { 144 | Some(Value::Column(col)) 145 | } 146 | _ => None, 147 | }, 148 | } 149 | } 150 | 151 | pub fn run_lit(&self, lit: &Literal) -> Scalar { 152 | match lit { 153 | Literal::Null => Scalar::Null, 154 | Literal::Int8(val) => Scalar::Int8(*val), 155 | Literal::Int16(val) => Scalar::Int16(*val), 156 | Literal::UInt8(val) => Scalar::UInt8(*val), 157 | Literal::UInt16(val) => Scalar::UInt16(*val), 158 | Literal::Boolean(val) => Scalar::Boolean(*val), 159 | Literal::String(val) => Scalar::String(val.clone()), 160 | } 161 | } 162 | } 163 | -------------------------------------------------------------------------------- /src/type_check.rs: -------------------------------------------------------------------------------- 1 | use std::collections::HashMap; 2 | 3 | use crate::{ 4 | expr::{Expr, Literal, AST}, 5 | function::{FunctionRegistry, FunctionSignature}, 6 | property::ValueProperty, 7 | types::DataType, 8 | }; 9 | 10 | pub fn check(ast: &AST, fn_registry: &FunctionRegistry) -> Option<(Expr, DataType, ValueProperty)> { 11 | match ast { 12 | AST::Literal(lit) => { 13 | let (ty, prop) = check_literal(lit); 14 | Some((Expr::Literal(lit.clone()), ty, prop)) 15 | } 16 | AST::ColumnRef { 17 | name, 18 | data_type, 19 | property, 20 | } => Some(( 21 | Expr::ColumnRef { name: name.clone() }, 22 | data_type.clone(), 23 | *property, 24 | )), 25 | AST::FunctionCall { name, args, params } => { 26 | let (mut args_expr, mut args_type, mut args_prop) = 27 | (Vec::new(), Vec::new(), Vec::new()); 28 | 29 | for arg in args { 30 | let (arg, ty, prop) = check(arg, fn_registry)?; 31 | args_expr.push(arg); 32 | args_type.push(ty); 33 | args_prop.push(prop); 34 | } 35 | 36 | check_function( 37 | name, 38 | params, 39 | &args_expr, 40 | &args_type, 41 | &args_prop, 42 | fn_registry, 43 | ) 44 | } 45 | } 46 | } 47 | 48 | pub fn check_literal(literal: &Literal) -> (DataType, ValueProperty) { 49 | match literal { 50 | Literal::Null => (DataType::Null, ValueProperty::default()), 51 | Literal::Int8(_) => (DataType::Int8, ValueProperty::default().not_null(true)), 52 | Literal::Int16(_) => (DataType::Int16, ValueProperty::default().not_null(true)), 53 | Literal::UInt8(_) => (DataType::UInt8, ValueProperty::default().not_null(true)), 54 | Literal::UInt16(_) => (DataType::UInt16, ValueProperty::default().not_null(true)), 55 | Literal::Boolean(_) => (DataType::Boolean, ValueProperty::default().not_null(true)), 56 | Literal::String(_) => (DataType::String, ValueProperty::default().not_null(true)), 57 | } 58 | } 59 | 60 | pub fn check_function( 61 | name: &str, 62 | params: &[usize], 63 | args: &[Expr], 64 | args_type: &[DataType], 65 | args_prop: &[ValueProperty], 66 | fn_registry: &FunctionRegistry, 67 | ) -> Option<(Expr, DataType, ValueProperty)> { 68 | for (id, func) in fn_registry.search_candidates(name, params, args_type) { 69 | if let Some((checked_args, return_ty, generics, prop)) = 70 | try_check_function(args, args_type, args_prop, &func.signature) 71 | { 72 | return Some(( 73 | Expr::FunctionCall { 74 | id, 75 | function: func.clone(), 76 | generics, 77 | args: checked_args, 78 | }, 79 | return_ty, 80 | prop, 81 | )); 82 | } 83 | } 84 | 85 | None 86 | } 87 | 88 | #[derive(Debug)] 89 | pub struct Subsitution(pub HashMap); 90 | 91 | impl Subsitution { 92 | pub fn empty() -> Self { 93 | Subsitution(HashMap::new()) 94 | } 95 | 96 | pub fn equation(idx: usize, ty: DataType) -> Self { 97 | let mut subst = Self::empty(); 98 | subst.0.insert(idx, ty); 99 | subst 100 | } 101 | 102 | pub fn merge(mut self, other: Self) -> Option { 103 | for (idx, ty1) in other.0 { 104 | if let Some(ty2) = self.0.remove(&idx) { 105 | let common_ty = common_super_type(ty1, ty2)?; 106 | self.0.insert(idx, common_ty); 107 | } else { 108 | self.0.insert(idx, ty1); 109 | } 110 | } 111 | 112 | Some(self) 113 | } 114 | 115 | pub fn apply(&self, ty: DataType) -> Option { 116 | match ty { 117 | DataType::Generic(idx) => self.0.get(&idx).cloned(), 118 | DataType::Nullable(box ty) => Some(DataType::Nullable(Box::new(self.apply(ty)?))), 119 | DataType::Array(box ty) => Some(DataType::Array(Box::new(self.apply(ty)?))), 120 | ty => Some(ty), 121 | } 122 | } 123 | } 124 | 125 | #[allow(clippy::type_complexity)] 126 | pub fn try_check_function( 127 | args: &[Expr], 128 | args_type: &[DataType], 129 | args_prop: &[ValueProperty], 130 | sig: &FunctionSignature, 131 | ) -> Option<( 132 | Vec<(Expr, ValueProperty)>, 133 | DataType, 134 | Vec, 135 | ValueProperty, 136 | )> { 137 | assert_eq!(args.len(), sig.args_type.len()); 138 | 139 | let substs = args_type 140 | .iter() 141 | .zip(&sig.args_type) 142 | .map(|(src_ty, dest_ty)| unify(src_ty, dest_ty)) 143 | .collect::>>()?; 144 | let subst = substs 145 | .into_iter() 146 | .try_reduce(|subst1, subst2| subst1.merge(subst2))? 147 | .unwrap_or_else(Subsitution::empty); 148 | 149 | let checked_args = args 150 | .iter() 151 | .zip(args_prop) 152 | .zip(args_type) 153 | .zip(&sig.args_type) 154 | .map(|(((arg, arg_prop), arg_type), sig_type)| { 155 | let sig_type = subst.apply(sig_type.clone())?; 156 | Some(if *arg_type == sig_type { 157 | (arg.clone(), *arg_prop) 158 | } else { 159 | ( 160 | Expr::Cast { 161 | expr: Box::new(arg.clone()), 162 | dest_type: sig_type, 163 | }, 164 | // TODO: does cast really preserve_not_null? 165 | ValueProperty::default().not_null(arg_prop.not_null), 166 | ) 167 | }) 168 | }) 169 | .collect::>>()?; 170 | 171 | let return_type = subst.apply(sig.return_type.clone())?; 172 | 173 | let generics = subst 174 | .0 175 | .keys() 176 | .cloned() 177 | .max() 178 | .map(|max_generic_idx| { 179 | (0..max_generic_idx + 1) 180 | .map(|idx| match subst.0.get(&idx) { 181 | Some(ty) => ty.clone(), 182 | None => DataType::Generic(idx), 183 | }) 184 | .collect() 185 | }) 186 | .unwrap_or_default(); 187 | 188 | let not_null = (return_type.as_nullable().is_none() && !return_type.is_null()) 189 | || (sig.property.preserve_not_null && args_prop.iter().all(|prop| prop.not_null)); 190 | let prop = ValueProperty::default().not_null(not_null); 191 | 192 | Some((checked_args, return_type, generics, prop)) 193 | } 194 | 195 | pub fn unify(src_ty: &DataType, dest_ty: &DataType) -> Option { 196 | match (src_ty, dest_ty) { 197 | (DataType::Generic(_), _) => unreachable!("source type must not contain generic type"), 198 | (ty, DataType::Generic(idx)) => Some(Subsitution::equation(*idx, ty.clone())), 199 | (DataType::Null, DataType::Nullable(_)) => Some(Subsitution::empty()), 200 | (DataType::EmptyArray, DataType::Array(_)) => Some(Subsitution::empty()), 201 | (DataType::Nullable(src_ty), DataType::Nullable(dest_ty)) => unify(src_ty, dest_ty), 202 | (src_ty, DataType::Nullable(dest_ty)) => unify(src_ty, dest_ty), 203 | (DataType::Array(src_ty), DataType::Array(dest_ty)) => unify(src_ty, dest_ty), 204 | (DataType::Tuple(src_tys), DataType::Tuple(dest_tys)) 205 | if src_tys.len() == dest_tys.len() => 206 | { 207 | let substs = src_tys 208 | .iter() 209 | .zip(dest_tys) 210 | .map(|(src_ty, dest_ty)| unify(src_ty, dest_ty)) 211 | .collect::>>()?; 212 | let subst = substs 213 | .into_iter() 214 | .try_reduce(|subst1, subst2| subst1.merge(subst2))? 215 | .unwrap_or_else(Subsitution::empty); 216 | Some(subst) 217 | } 218 | (src_ty, dest_ty) if can_cast_to(src_ty, dest_ty) => Some(Subsitution::empty()), 219 | _ => None, 220 | } 221 | } 222 | 223 | pub fn can_cast_to(src_ty: &DataType, dest_ty: &DataType) -> bool { 224 | match (src_ty, dest_ty) { 225 | (src_ty, dest_ty) if src_ty == dest_ty => true, 226 | (DataType::Null, DataType::Nullable(_)) => true, 227 | (DataType::EmptyArray, DataType::Array(_)) => true, 228 | (DataType::Nullable(src_ty), DataType::Nullable(dest_ty)) => can_cast_to(src_ty, dest_ty), 229 | (src_ty, DataType::Nullable(dest_ty)) => can_cast_to(src_ty, dest_ty), 230 | (DataType::Array(src_ty), DataType::Array(dest_ty)) => can_cast_to(src_ty, dest_ty), 231 | (DataType::UInt8, DataType::UInt16) 232 | | (DataType::Int8, DataType::Int16) 233 | | (DataType::UInt8, DataType::Int16) => true, 234 | _ => false, 235 | } 236 | } 237 | 238 | pub fn common_super_type(ty1: DataType, ty2: DataType) -> Option { 239 | match (ty1, ty2) { 240 | (ty1, ty2) if ty1 == ty2 => Some(ty1), 241 | (DataType::Null, ty @ DataType::Nullable(_)) 242 | | (ty @ DataType::Nullable(_), DataType::Null) => Some(ty), 243 | (DataType::Null, ty) | (ty, DataType::Null) => Some(DataType::Nullable(Box::new(ty))), 244 | (DataType::Nullable(box ty1), DataType::Nullable(box ty2)) 245 | | (DataType::Nullable(box ty1), ty2) 246 | | (ty1, DataType::Nullable(box ty2)) => { 247 | Some(DataType::Nullable(Box::new(common_super_type(ty1, ty2)?))) 248 | } 249 | (DataType::EmptyArray, ty @ DataType::Array(_)) 250 | | (ty @ DataType::Array(_), DataType::EmptyArray) => Some(ty), 251 | (DataType::Array(box ty1), DataType::Array(box ty2)) => { 252 | Some(DataType::Array(Box::new(common_super_type(ty1, ty2)?))) 253 | } 254 | (DataType::UInt8, DataType::UInt16) | (DataType::UInt16, DataType::UInt8) => { 255 | Some(DataType::UInt16) 256 | } 257 | (DataType::Int8, DataType::Int16) | (DataType::Int16, DataType::Int8) => { 258 | Some(DataType::Int16) 259 | } 260 | (DataType::Int16, DataType::UInt8) | (DataType::UInt8, DataType::Int16) => { 261 | Some(DataType::Int16) 262 | } 263 | _ => None, 264 | } 265 | } 266 | -------------------------------------------------------------------------------- /Cargo.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Cargo. 2 | # It is not intended for manual editing. 3 | version = 3 4 | 5 | [[package]] 6 | name = "arrow2" 7 | version = "0.12.0" 8 | source = "registry+https://github.com/rust-lang/crates.io-index" 9 | checksum = "5feafd6df4e3f577529e6aa2b9b7cdb3c9fe8e8f66ebc8dc29abbe71a7e968f0" 10 | dependencies = [ 11 | "bytemuck", 12 | "chrono", 13 | "either", 14 | "hash_hasher", 15 | "num-traits", 16 | "simdutf8", 17 | ] 18 | 19 | [[package]] 20 | name = "autocfg" 21 | version = "1.1.0" 22 | source = "registry+https://github.com/rust-lang/crates.io-index" 23 | checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" 24 | 25 | [[package]] 26 | name = "bitflags" 27 | version = "1.3.2" 28 | source = "registry+https://github.com/rust-lang/crates.io-index" 29 | checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" 30 | 31 | [[package]] 32 | name = "bstr" 33 | version = "0.2.17" 34 | source = "registry+https://github.com/rust-lang/crates.io-index" 35 | checksum = "ba3569f383e8f1598449f1a423e72e99569137b47740b1da11ef19af3d5c3223" 36 | dependencies = [ 37 | "lazy_static", 38 | "memchr", 39 | "regex-automata", 40 | ] 41 | 42 | [[package]] 43 | name = "bytemuck" 44 | version = "1.10.0" 45 | source = "registry+https://github.com/rust-lang/crates.io-index" 46 | checksum = "c53dfa917ec274df8ed3c572698f381a24eef2efba9492d797301b72b6db408a" 47 | dependencies = [ 48 | "bytemuck_derive", 49 | ] 50 | 51 | [[package]] 52 | name = "bytemuck_derive" 53 | version = "1.1.0" 54 | source = "registry+https://github.com/rust-lang/crates.io-index" 55 | checksum = "562e382481975bc61d11275ac5e62a19abd00b0547d99516a415336f183dcd0e" 56 | dependencies = [ 57 | "proc-macro2", 58 | "quote", 59 | "syn", 60 | ] 61 | 62 | [[package]] 63 | name = "cfg-if" 64 | version = "1.0.0" 65 | source = "registry+https://github.com/rust-lang/crates.io-index" 66 | checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" 67 | 68 | [[package]] 69 | name = "chrono" 70 | version = "0.4.19" 71 | source = "registry+https://github.com/rust-lang/crates.io-index" 72 | checksum = "670ad68c9088c2a963aaa298cb369688cf3f9465ce5e2d4ca10e6e0098a1ce73" 73 | dependencies = [ 74 | "num-integer", 75 | "num-traits", 76 | ] 77 | 78 | [[package]] 79 | name = "console" 80 | version = "0.15.0" 81 | source = "registry+https://github.com/rust-lang/crates.io-index" 82 | checksum = "a28b32d32ca44b70c3e4acd7db1babf555fa026e385fb95f18028f88848b3c31" 83 | dependencies = [ 84 | "encode_unicode", 85 | "libc", 86 | "once_cell", 87 | "terminal_size", 88 | "winapi", 89 | ] 90 | 91 | [[package]] 92 | name = "educe" 93 | version = "0.4.19" 94 | source = "registry+https://github.com/rust-lang/crates.io-index" 95 | checksum = "c07b7cc9cd8c08d10db74fca3b20949b9b6199725c04a0cce6d543496098fcac" 96 | dependencies = [ 97 | "enum-ordinalize", 98 | "proc-macro2", 99 | "quote", 100 | "syn", 101 | ] 102 | 103 | [[package]] 104 | name = "either" 105 | version = "1.7.0" 106 | source = "registry+https://github.com/rust-lang/crates.io-index" 107 | checksum = "3f107b87b6afc2a64fd13cac55fe06d6c8859f12d4b14cbcdd2c67d0976781be" 108 | 109 | [[package]] 110 | name = "encode_unicode" 111 | version = "0.3.6" 112 | source = "registry+https://github.com/rust-lang/crates.io-index" 113 | checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f" 114 | 115 | [[package]] 116 | name = "enum-as-inner" 117 | version = "0.4.0" 118 | source = "registry+https://github.com/rust-lang/crates.io-index" 119 | checksum = "21cdad81446a7f7dc43f6a77409efeb9733d2fa65553efef6018ef257c959b73" 120 | dependencies = [ 121 | "heck", 122 | "proc-macro2", 123 | "quote", 124 | "syn", 125 | ] 126 | 127 | [[package]] 128 | name = "enum-ordinalize" 129 | version = "3.1.11" 130 | source = "registry+https://github.com/rust-lang/crates.io-index" 131 | checksum = "2170fc0efee383079a8bdd05d6ea2a184d2a0f07a1c1dcabdb2fd5e9f24bc36c" 132 | dependencies = [ 133 | "num-bigint", 134 | "num-traits", 135 | "proc-macro2", 136 | "quote", 137 | "rustc_version", 138 | "syn", 139 | ] 140 | 141 | [[package]] 142 | name = "fastrand" 143 | version = "1.7.0" 144 | source = "registry+https://github.com/rust-lang/crates.io-index" 145 | checksum = "c3fcf0cee53519c866c09b5de1f6c56ff9d647101f81c1964fa632e148896cdf" 146 | dependencies = [ 147 | "instant", 148 | ] 149 | 150 | [[package]] 151 | name = "goldenfile" 152 | version = "1.4.3" 153 | source = "registry+https://github.com/rust-lang/crates.io-index" 154 | checksum = "03bd0e9c2ea26ce269d37016d6b95556bbfa544cbbbdeff40102ac54121c990b" 155 | dependencies = [ 156 | "similar-asserts", 157 | "tempfile", 158 | ] 159 | 160 | [[package]] 161 | name = "hash_hasher" 162 | version = "2.0.3" 163 | source = "registry+https://github.com/rust-lang/crates.io-index" 164 | checksum = "74721d007512d0cb3338cd20f0654ac913920061a4c4d0d8708edb3f2a698c0c" 165 | 166 | [[package]] 167 | name = "heck" 168 | version = "0.4.0" 169 | source = "registry+https://github.com/rust-lang/crates.io-index" 170 | checksum = "2540771e65fc8cb83cd6e8a237f70c319bd5c29f78ed1084ba5d50eeac86f7f9" 171 | 172 | [[package]] 173 | name = "instant" 174 | version = "0.1.12" 175 | source = "registry+https://github.com/rust-lang/crates.io-index" 176 | checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c" 177 | dependencies = [ 178 | "cfg-if", 179 | ] 180 | 181 | [[package]] 182 | name = "lazy_static" 183 | version = "1.4.0" 184 | source = "registry+https://github.com/rust-lang/crates.io-index" 185 | checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" 186 | 187 | [[package]] 188 | name = "libc" 189 | version = "0.2.126" 190 | source = "registry+https://github.com/rust-lang/crates.io-index" 191 | checksum = "349d5a591cd28b49e1d1037471617a32ddcda5731b99419008085f72d5a53836" 192 | 193 | [[package]] 194 | name = "memchr" 195 | version = "2.5.0" 196 | source = "registry+https://github.com/rust-lang/crates.io-index" 197 | checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" 198 | 199 | [[package]] 200 | name = "num-bigint" 201 | version = "0.4.3" 202 | source = "registry+https://github.com/rust-lang/crates.io-index" 203 | checksum = "f93ab6289c7b344a8a9f60f88d80aa20032336fe78da341afc91c8a2341fc75f" 204 | dependencies = [ 205 | "autocfg", 206 | "num-integer", 207 | "num-traits", 208 | ] 209 | 210 | [[package]] 211 | name = "num-integer" 212 | version = "0.1.45" 213 | source = "registry+https://github.com/rust-lang/crates.io-index" 214 | checksum = "225d3389fb3509a24c93f5c29eb6bde2586b98d9f016636dff58d7c6f7569cd9" 215 | dependencies = [ 216 | "autocfg", 217 | "num-traits", 218 | ] 219 | 220 | [[package]] 221 | name = "num-traits" 222 | version = "0.2.15" 223 | source = "registry+https://github.com/rust-lang/crates.io-index" 224 | checksum = "578ede34cf02f8924ab9447f50c28075b4d3e5b269972345e7e0372b38c6cdcd" 225 | dependencies = [ 226 | "autocfg", 227 | ] 228 | 229 | [[package]] 230 | name = "once_cell" 231 | version = "1.12.0" 232 | source = "registry+https://github.com/rust-lang/crates.io-index" 233 | checksum = "7709cef83f0c1f58f666e746a08b21e0085f7440fa6a29cc194d68aac97a4225" 234 | 235 | [[package]] 236 | name = "proc-macro2" 237 | version = "1.0.39" 238 | source = "registry+https://github.com/rust-lang/crates.io-index" 239 | checksum = "c54b25569025b7fc9651de43004ae593a75ad88543b17178aa5e1b9c4f15f56f" 240 | dependencies = [ 241 | "unicode-ident", 242 | ] 243 | 244 | [[package]] 245 | name = "quote" 246 | version = "1.0.18" 247 | source = "registry+https://github.com/rust-lang/crates.io-index" 248 | checksum = "a1feb54ed693b93a84e14094943b84b7c4eae204c512b7ccb95ab0c66d278ad1" 249 | dependencies = [ 250 | "proc-macro2", 251 | ] 252 | 253 | [[package]] 254 | name = "redox_syscall" 255 | version = "0.2.13" 256 | source = "registry+https://github.com/rust-lang/crates.io-index" 257 | checksum = "62f25bc4c7e55e0b0b7a1d43fb893f4fa1361d0abe38b9ce4f323c2adfe6ef42" 258 | dependencies = [ 259 | "bitflags", 260 | ] 261 | 262 | [[package]] 263 | name = "regex-automata" 264 | version = "0.1.10" 265 | source = "registry+https://github.com/rust-lang/crates.io-index" 266 | checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" 267 | 268 | [[package]] 269 | name = "remove_dir_all" 270 | version = "0.5.3" 271 | source = "registry+https://github.com/rust-lang/crates.io-index" 272 | checksum = "3acd125665422973a33ac9d3dd2df85edad0f4ae9b00dafb1a05e43a9f5ef8e7" 273 | dependencies = [ 274 | "winapi", 275 | ] 276 | 277 | [[package]] 278 | name = "rustc_version" 279 | version = "0.4.0" 280 | source = "registry+https://github.com/rust-lang/crates.io-index" 281 | checksum = "bfa0f585226d2e68097d4f95d113b15b83a82e819ab25717ec0590d9584ef366" 282 | dependencies = [ 283 | "semver", 284 | ] 285 | 286 | [[package]] 287 | name = "semver" 288 | version = "1.0.9" 289 | source = "registry+https://github.com/rust-lang/crates.io-index" 290 | checksum = "8cb243bdfdb5936c8dc3c45762a19d12ab4550cdc753bc247637d4ec35a040fd" 291 | 292 | [[package]] 293 | name = "simdutf8" 294 | version = "0.1.4" 295 | source = "registry+https://github.com/rust-lang/crates.io-index" 296 | checksum = "f27f6278552951f1f2b8cf9da965d10969b2efdea95a6ec47987ab46edfe263a" 297 | 298 | [[package]] 299 | name = "similar" 300 | version = "2.1.0" 301 | source = "registry+https://github.com/rust-lang/crates.io-index" 302 | checksum = "2e24979f63a11545f5f2c60141afe249d4f19f84581ea2138065e400941d83d3" 303 | dependencies = [ 304 | "bstr", 305 | "unicode-segmentation", 306 | ] 307 | 308 | [[package]] 309 | name = "similar-asserts" 310 | version = "1.2.0" 311 | source = "registry+https://github.com/rust-lang/crates.io-index" 312 | checksum = "64c9f531a2375031d51c23c415ca12d0f0271b976211e2f727b7a0eac06a099d" 313 | dependencies = [ 314 | "console", 315 | "similar", 316 | ] 317 | 318 | [[package]] 319 | name = "syn" 320 | version = "1.0.95" 321 | source = "registry+https://github.com/rust-lang/crates.io-index" 322 | checksum = "fbaf6116ab8924f39d52792136fb74fd60a80194cf1b1c6ffa6453eef1c3f942" 323 | dependencies = [ 324 | "proc-macro2", 325 | "quote", 326 | "unicode-ident", 327 | ] 328 | 329 | [[package]] 330 | name = "tempfile" 331 | version = "3.3.0" 332 | source = "registry+https://github.com/rust-lang/crates.io-index" 333 | checksum = "5cdb1ef4eaeeaddc8fbd371e5017057064af0911902ef36b39801f67cc6d79e4" 334 | dependencies = [ 335 | "cfg-if", 336 | "fastrand", 337 | "libc", 338 | "redox_syscall", 339 | "remove_dir_all", 340 | "winapi", 341 | ] 342 | 343 | [[package]] 344 | name = "terminal_size" 345 | version = "0.1.17" 346 | source = "registry+https://github.com/rust-lang/crates.io-index" 347 | checksum = "633c1a546cee861a1a6d0dc69ebeca693bf4296661ba7852b9d21d159e0506df" 348 | dependencies = [ 349 | "libc", 350 | "winapi", 351 | ] 352 | 353 | [[package]] 354 | name = "typed-type-exercise" 355 | version = "0.1.0" 356 | dependencies = [ 357 | "arrow2", 358 | "educe", 359 | "enum-as-inner", 360 | "goldenfile", 361 | ] 362 | 363 | [[package]] 364 | name = "unicode-ident" 365 | version = "1.0.0" 366 | source = "registry+https://github.com/rust-lang/crates.io-index" 367 | checksum = "d22af068fba1eb5edcb4aea19d382b2a3deb4c8f9d475c589b6ada9e0fd493ee" 368 | 369 | [[package]] 370 | name = "unicode-segmentation" 371 | version = "1.9.0" 372 | source = "registry+https://github.com/rust-lang/crates.io-index" 373 | checksum = "7e8820f5d777f6224dc4be3632222971ac30164d4a258d595640799554ebfd99" 374 | 375 | [[package]] 376 | name = "winapi" 377 | version = "0.3.9" 378 | source = "registry+https://github.com/rust-lang/crates.io-index" 379 | checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" 380 | dependencies = [ 381 | "winapi-i686-pc-windows-gnu", 382 | "winapi-x86_64-pc-windows-gnu", 383 | ] 384 | 385 | [[package]] 386 | name = "winapi-i686-pc-windows-gnu" 387 | version = "0.4.0" 388 | source = "registry+https://github.com/rust-lang/crates.io-index" 389 | checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" 390 | 391 | [[package]] 392 | name = "winapi-x86_64-pc-windows-gnu" 393 | version = "0.4.0" 394 | source = "registry+https://github.com/rust-lang/crates.io-index" 395 | checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" 396 | -------------------------------------------------------------------------------- /src/main.rs: -------------------------------------------------------------------------------- 1 | #![feature(generic_associated_types)] 2 | #![feature(iterator_try_reduce)] 3 | #![feature(box_patterns)] 4 | #![feature(associated_type_defaults)] 5 | #![allow(clippy::len_without_is_empty)] 6 | #![allow(clippy::needless_lifetimes)] 7 | 8 | use std::collections::HashMap; 9 | use std::io::Write; 10 | use std::iter::once; 11 | use std::sync::Arc; 12 | 13 | use crate::expr::{Literal, AST}; 14 | use crate::function::FunctionRegistry; 15 | use crate::function::{vectorize_2_arg, Function, FunctionSignature}; 16 | use crate::property::{FunctionProperty, ValueProperty}; 17 | use crate::runtime::Runtime; 18 | use crate::types::DataType; 19 | use crate::types::*; 20 | use crate::types::{ArgType, ArrayType}; 21 | use crate::values::{Column, ColumnBuilder, ValueRef}; 22 | use crate::values::{Scalar, Value}; 23 | 24 | pub mod display; 25 | pub mod expr; 26 | pub mod function; 27 | pub mod property; 28 | pub mod runtime; 29 | pub mod type_check; 30 | pub mod types; 31 | pub mod util; 32 | pub mod values; 33 | 34 | pub fn main() { 35 | run_cases(&mut std::io::stdout()); 36 | } 37 | 38 | #[test] 39 | pub fn test() { 40 | use goldenfile::Mint; 41 | 42 | let mut mint = Mint::new("tests"); 43 | let mut file = mint.new_goldenfile("run-output").unwrap(); 44 | run_cases(&mut file); 45 | } 46 | 47 | pub fn run_ast(output: &mut impl Write, ast: &AST, columns: HashMap) { 48 | writeln!(output, "ast: {ast}").unwrap(); 49 | let fn_registry = builtin_functions(); 50 | let (expr, ty, prop) = type_check::check(ast, &fn_registry).unwrap(); 51 | writeln!(output, "expr: {expr}").unwrap(); 52 | writeln!(output, "type: {ty}").unwrap(); 53 | writeln!(output, "property: {prop}").unwrap(); 54 | let runtime = Runtime { columns }; 55 | let result = runtime.run(&expr); 56 | writeln!(output, "result: {result}\n").unwrap(); 57 | } 58 | 59 | fn run_cases(output: &mut impl Write) { 60 | run_ast( 61 | output, 62 | &AST::FunctionCall { 63 | name: "and".to_string(), 64 | args: vec![ 65 | AST::Literal(Literal::Boolean(true)), 66 | AST::Literal(Literal::Boolean(false)), 67 | ], 68 | params: vec![], 69 | }, 70 | HashMap::new(), 71 | ); 72 | 73 | run_ast( 74 | output, 75 | &AST::FunctionCall { 76 | name: "and".to_string(), 77 | args: vec![ 78 | AST::Literal(Literal::Null), 79 | AST::Literal(Literal::Boolean(false)), 80 | ], 81 | params: vec![], 82 | }, 83 | HashMap::new(), 84 | ); 85 | 86 | run_ast( 87 | output, 88 | &AST::FunctionCall { 89 | name: "plus".to_string(), 90 | args: vec![ 91 | AST::ColumnRef { 92 | name: "a".to_string(), 93 | data_type: DataType::Nullable(Box::new(DataType::UInt8)), 94 | property: ValueProperty::default().not_null(false), 95 | }, 96 | AST::Literal(Literal::Int8(-10)), 97 | ], 98 | params: vec![], 99 | }, 100 | [( 101 | "a".to_string(), 102 | Column::Nullable { 103 | column: Box::new(Column::UInt8(vec![10, 11, 12].into())), 104 | validity: vec![false, true, false].into(), 105 | }, 106 | )] 107 | .into_iter() 108 | .collect(), 109 | ); 110 | 111 | run_ast( 112 | output, 113 | &AST::FunctionCall { 114 | name: "plus".to_string(), 115 | args: vec![ 116 | AST::ColumnRef { 117 | name: "a".to_string(), 118 | data_type: DataType::Nullable(Box::new(DataType::UInt8)), 119 | property: ValueProperty::default().not_null(false), 120 | }, 121 | AST::ColumnRef { 122 | name: "b".to_string(), 123 | data_type: DataType::Nullable(Box::new(DataType::UInt8)), 124 | property: ValueProperty::default().not_null(false), 125 | }, 126 | ], 127 | params: vec![], 128 | }, 129 | [ 130 | ( 131 | "a".to_string(), 132 | Column::Nullable { 133 | column: Box::new(Column::UInt8(vec![10, 11, 12].into())), 134 | validity: vec![false, true, false].into(), 135 | }, 136 | ), 137 | ( 138 | "b".to_string(), 139 | Column::Nullable { 140 | column: Box::new(Column::UInt8(vec![1, 2, 3].into())), 141 | validity: vec![false, true, true].into(), 142 | }, 143 | ), 144 | ] 145 | .into_iter() 146 | .collect(), 147 | ); 148 | 149 | run_ast( 150 | output, 151 | &AST::FunctionCall { 152 | name: "not".to_string(), 153 | args: vec![AST::ColumnRef { 154 | name: "a".to_string(), 155 | data_type: DataType::Nullable(Box::new(DataType::Boolean)), 156 | property: ValueProperty::default().not_null(false), 157 | }], 158 | params: vec![], 159 | }, 160 | [( 161 | "a".to_string(), 162 | Column::Nullable { 163 | column: Box::new(Column::Boolean(vec![true, false, true].into())), 164 | validity: vec![false, true, false].into(), 165 | }, 166 | )] 167 | .into_iter() 168 | .collect(), 169 | ); 170 | 171 | run_ast( 172 | output, 173 | &AST::FunctionCall { 174 | name: "least".to_string(), 175 | args: vec![ 176 | AST::Literal(Literal::UInt8(10)), 177 | AST::Literal(Literal::UInt8(20)), 178 | AST::Literal(Literal::UInt8(30)), 179 | AST::Literal(Literal::UInt8(40)), 180 | ], 181 | params: vec![], 182 | }, 183 | HashMap::new(), 184 | ); 185 | 186 | run_ast( 187 | output, 188 | &AST::FunctionCall { 189 | name: "create_tuple".to_string(), 190 | args: vec![ 191 | AST::Literal(Literal::Null), 192 | AST::Literal(Literal::Boolean(true)), 193 | ], 194 | params: vec![], 195 | }, 196 | [].into_iter().collect(), 197 | ); 198 | 199 | run_ast( 200 | output, 201 | &AST::FunctionCall { 202 | name: "get_tuple".to_string(), 203 | args: vec![AST::FunctionCall { 204 | name: "create_tuple".to_string(), 205 | args: vec![ 206 | AST::ColumnRef { 207 | name: "a".to_string(), 208 | data_type: DataType::Int16, 209 | property: ValueProperty::default().not_null(true), 210 | }, 211 | AST::ColumnRef { 212 | name: "b".to_string(), 213 | data_type: DataType::Nullable(Box::new(DataType::String)), 214 | property: ValueProperty::default().not_null(false), 215 | }, 216 | ], 217 | params: vec![], 218 | }], 219 | params: vec![1], 220 | }, 221 | [ 222 | ("a".to_string(), Column::Int16(vec![0, 1, 2, 3, 4].into())), 223 | ( 224 | "b".to_string(), 225 | Column::Nullable { 226 | column: Box::new(Column::String { 227 | data: "abcde".as_bytes().to_vec().into(), 228 | offsets: vec![0, 1, 2, 3, 4, 5], 229 | }), 230 | validity: vec![true, true, false, false, false].into(), 231 | }, 232 | ), 233 | ] 234 | .into_iter() 235 | .collect(), 236 | ); 237 | 238 | run_ast( 239 | output, 240 | &AST::FunctionCall { 241 | name: "get_tuple".to_string(), 242 | args: vec![AST::ColumnRef { 243 | name: "a".to_string(), 244 | data_type: DataType::Nullable(Box::new(DataType::Tuple(vec![ 245 | DataType::Boolean, 246 | DataType::String, 247 | ]))), 248 | property: ValueProperty::default().not_null(true), 249 | }], 250 | params: vec![1], 251 | }, 252 | [( 253 | "a".to_string(), 254 | Column::Nullable { 255 | column: Box::new(Column::Tuple { 256 | fields: vec![ 257 | Column::Boolean(vec![false; 5].into()), 258 | Column::String { 259 | data: "abcde".as_bytes().to_vec().into(), 260 | offsets: vec![0, 1, 2, 3, 4, 5], 261 | }, 262 | ], 263 | len: 5, 264 | }), 265 | validity: vec![true, true, false, false, false].into(), 266 | }, 267 | )] 268 | .into_iter() 269 | .collect(), 270 | ); 271 | 272 | run_ast( 273 | output, 274 | &AST::FunctionCall { 275 | name: "create_array".to_string(), 276 | args: vec![], 277 | params: vec![], 278 | }, 279 | [].into_iter().collect(), 280 | ); 281 | 282 | run_ast( 283 | output, 284 | &AST::FunctionCall { 285 | name: "create_array".to_string(), 286 | args: vec![ 287 | AST::Literal(Literal::Null), 288 | AST::Literal(Literal::Boolean(true)), 289 | ], 290 | params: vec![], 291 | }, 292 | [].into_iter().collect(), 293 | ); 294 | 295 | run_ast( 296 | output, 297 | &AST::FunctionCall { 298 | name: "create_array".to_string(), 299 | args: vec![ 300 | AST::ColumnRef { 301 | name: "a".to_string(), 302 | data_type: DataType::Int16, 303 | property: ValueProperty::default().not_null(true), 304 | }, 305 | AST::ColumnRef { 306 | name: "b".to_string(), 307 | data_type: DataType::Int16, 308 | property: ValueProperty::default().not_null(true), 309 | }, 310 | ], 311 | params: vec![], 312 | }, 313 | [ 314 | ("a".to_string(), Column::Int16(vec![0, 1, 2, 3, 4].into())), 315 | ("b".to_string(), Column::Int16(vec![5, 6, 7, 8, 9].into())), 316 | ] 317 | .into_iter() 318 | .collect(), 319 | ); 320 | 321 | run_ast( 322 | output, 323 | &AST::FunctionCall { 324 | name: "create_array".to_string(), 325 | args: vec![ 326 | AST::FunctionCall { 327 | name: "create_array".to_string(), 328 | args: vec![ 329 | AST::ColumnRef { 330 | name: "a".to_string(), 331 | data_type: DataType::Int16, 332 | property: ValueProperty::default().not_null(true), 333 | }, 334 | AST::ColumnRef { 335 | name: "b".to_string(), 336 | data_type: DataType::Int16, 337 | property: ValueProperty::default().not_null(true), 338 | }, 339 | ], 340 | params: vec![], 341 | }, 342 | AST::Literal(Literal::Null), 343 | AST::Literal(Literal::Null), 344 | ], 345 | params: vec![], 346 | }, 347 | [ 348 | ("a".to_string(), Column::Int16(vec![0, 1, 2, 3, 4].into())), 349 | ("b".to_string(), Column::Int16(vec![5, 6, 7, 8, 9].into())), 350 | ] 351 | .into_iter() 352 | .collect(), 353 | ); 354 | 355 | run_ast( 356 | output, 357 | &AST::FunctionCall { 358 | name: "get".to_string(), 359 | args: vec![ 360 | AST::ColumnRef { 361 | name: "array".to_string(), 362 | data_type: DataType::Array(Box::new(DataType::Int16)), 363 | property: ValueProperty::default().not_null(true), 364 | }, 365 | AST::ColumnRef { 366 | name: "idx".to_string(), 367 | data_type: DataType::UInt8, 368 | property: ValueProperty::default().not_null(true), 369 | }, 370 | ], 371 | params: vec![], 372 | }, 373 | [ 374 | ( 375 | "array".to_string(), 376 | Column::Array { 377 | array: Box::new(Column::Int16((0..100).collect())), 378 | offsets: vec![0, 20, 40, 60, 80, 100], 379 | }, 380 | ), 381 | ("idx".to_string(), Column::UInt8(vec![0, 1, 2, 3, 4].into())), 382 | ] 383 | .into_iter() 384 | .collect(), 385 | ); 386 | 387 | run_ast( 388 | output, 389 | &AST::FunctionCall { 390 | name: "get".to_string(), 391 | args: vec![ 392 | AST::ColumnRef { 393 | name: "array".to_string(), 394 | data_type: DataType::Array(Box::new(DataType::Array(Box::new( 395 | DataType::Int16, 396 | )))), 397 | property: ValueProperty::default().not_null(true), 398 | }, 399 | AST::ColumnRef { 400 | name: "idx".to_string(), 401 | data_type: DataType::UInt8, 402 | property: ValueProperty::default().not_null(true), 403 | }, 404 | ], 405 | params: vec![], 406 | }, 407 | [ 408 | ( 409 | "array".to_string(), 410 | Column::Array { 411 | array: Box::new(Column::Array { 412 | array: Box::new(Column::Int16((0..100).collect())), 413 | offsets: vec![ 414 | 0, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 415 | 90, 100, 416 | ], 417 | }), 418 | offsets: vec![0, 4, 8, 12, 16, 20], 419 | }, 420 | ), 421 | ("idx".to_string(), Column::UInt8(vec![0, 1, 2].into())), 422 | ] 423 | .into_iter() 424 | .collect(), 425 | ); 426 | } 427 | 428 | fn builtin_functions() -> FunctionRegistry { 429 | let mut registry = FunctionRegistry::default(); 430 | 431 | registry.register_2_arg::( 432 | "and", 433 | FunctionProperty::default(), 434 | |lhs, rhs| lhs && rhs, 435 | ); 436 | 437 | registry.register_2_arg::, NumberType, NumberType, _>( 438 | "plus", 439 | FunctionProperty::default(), 440 | |lhs, rhs| lhs + rhs, 441 | ); 442 | 443 | registry.register_1_arg::( 444 | "not", 445 | FunctionProperty::default(), 446 | |val| !val, 447 | ); 448 | 449 | registry.register_function_factory("least", |_, args_type| { 450 | Some(Arc::new(Function { 451 | signature: FunctionSignature { 452 | name: "least", 453 | args_type: vec![DataType::Int16; args_type.len()], 454 | return_type: DataType::Int16, 455 | property: FunctionProperty::default().preserve_not_null(true), 456 | }, 457 | eval: Box::new(|args, generics| { 458 | if args.is_empty() { 459 | Value::Scalar(Scalar::Int16(0)) 460 | } else if args.len() == 1 { 461 | args[0].clone().to_owned() 462 | } else { 463 | let mut min: Value> = vectorize_2_arg( 464 | NumberType::::try_downcast_value(&args[0]).unwrap(), 465 | NumberType::::try_downcast_value(&args[1]).unwrap(), 466 | generics, 467 | |lhs, rhs| lhs.min(rhs), 468 | ); 469 | for arg in &args[2..] { 470 | min = vectorize_2_arg( 471 | min.as_ref(), 472 | NumberType::::try_downcast_value(arg).unwrap(), 473 | generics, 474 | |lhs, rhs| lhs.min(rhs), 475 | ); 476 | } 477 | NumberType::::upcast_value(min) 478 | } 479 | }), 480 | })) 481 | }); 482 | 483 | registry.register_0_arg_core::( 484 | "create_array", 485 | FunctionProperty::default(), 486 | |_| Value::Scalar(()), 487 | ); 488 | 489 | registry.register_function_factory("create_array", |_, args_type| { 490 | Some(Arc::new(Function { 491 | signature: FunctionSignature { 492 | name: "create_array", 493 | args_type: vec![DataType::Generic(0); args_type.len()], 494 | return_type: DataType::Array(Box::new(DataType::Generic(0))), 495 | property: FunctionProperty::default().preserve_not_null(true), 496 | }, 497 | eval: Box::new(|args, generics| { 498 | let len = args.iter().find_map(|arg| match arg { 499 | ValueRef::Column(col) => Some(col.len()), 500 | _ => None, 501 | }); 502 | if let Some(len) = len { 503 | let mut array_builder = ColumnBuilder::with_capacity(&generics[0], 0); 504 | for idx in 0..len { 505 | for arg in args { 506 | match arg { 507 | ValueRef::Scalar(scalar) => { 508 | array_builder.push(scalar.as_ref()); 509 | } 510 | ValueRef::Column(col) => { 511 | array_builder.push(col.index(idx)); 512 | } 513 | } 514 | } 515 | } 516 | let offsets = once(0) 517 | .chain((0..len).map(|row| args.len() * (row + 1))) 518 | .collect(); 519 | Value::Column(Column::Array { 520 | array: Box::new(array_builder.build()), 521 | offsets, 522 | }) 523 | } else { 524 | // All args are scalars, so we return a scalar as result 525 | let mut array = ColumnBuilder::with_capacity(&generics[0], 0); 526 | for arg in args { 527 | match arg { 528 | ValueRef::Scalar(scalar) => { 529 | array.push(scalar.as_ref()); 530 | } 531 | ValueRef::Column(_) => unreachable!(), 532 | } 533 | } 534 | Value::Scalar(Scalar::Array(array.build())) 535 | } 536 | }), 537 | })) 538 | }); 539 | 540 | registry.register_with_writer_2_arg::>, NumberType, GenericType<0>, _>( 541 | "get", 542 | FunctionProperty::default(), 543 | |array, idx, output| output.push(array.index(idx as usize)), 544 | ); 545 | 546 | registry.register_function_factory("create_tuple", |_, args_type| { 547 | Some(Arc::new(Function { 548 | signature: FunctionSignature { 549 | name: "create_tuple", 550 | args_type: args_type.to_vec(), 551 | return_type: DataType::Tuple(args_type.to_vec()), 552 | property: FunctionProperty::default().preserve_not_null(true), 553 | }, 554 | eval: Box::new(move |args, _generics| { 555 | let len = args.iter().find_map(|arg| match arg { 556 | ValueRef::Column(col) => Some(col.len()), 557 | _ => None, 558 | }); 559 | if let Some(len) = len { 560 | let fields = args 561 | .iter() 562 | .map(|arg| match arg { 563 | ValueRef::Scalar(scalar) => scalar.as_ref().repeat(len).build(), 564 | ValueRef::Column(col) => col.clone(), 565 | }) 566 | .collect(); 567 | Value::Column(Column::Tuple { fields, len }) 568 | } else { 569 | // All args are scalars, so we return a scalar as result 570 | let fields = args 571 | .iter() 572 | .map(|arg| match arg { 573 | ValueRef::Scalar(scalar) => (*scalar).to_owned(), 574 | ValueRef::Column(_) => unreachable!(), 575 | }) 576 | .collect(); 577 | Value::Scalar(Scalar::Tuple(fields)) 578 | } 579 | }), 580 | })) 581 | }); 582 | 583 | registry.register_function_factory("get_tuple", |params, args_type| { 584 | let idx = *params.get(0)?; 585 | let tuple_tys = match args_type.get(0) { 586 | Some(DataType::Tuple(tys)) => tys, 587 | _ => return None, 588 | }; 589 | if idx >= tuple_tys.len() { 590 | return None; 591 | } 592 | 593 | Some(Arc::new(Function { 594 | signature: FunctionSignature { 595 | name: "get_tuple", 596 | args_type: vec![DataType::Tuple(tuple_tys.to_vec())], 597 | return_type: tuple_tys[idx].clone(), 598 | property: FunctionProperty::default().preserve_not_null(true), 599 | }, 600 | eval: Box::new(move |args, _| match &args[0] { 601 | ValueRef::Scalar(Scalar::Tuple(fields)) => Value::Scalar(fields[idx].to_owned()), 602 | ValueRef::Column(Column::Tuple { fields, .. }) => { 603 | Value::Column(fields[idx].to_owned()) 604 | } 605 | _ => unreachable!(), 606 | }), 607 | })) 608 | }); 609 | 610 | registry.register_function_factory("get_tuple", |params, args_type| { 611 | let idx = *params.get(0)?; 612 | let tuple_tys = match args_type.get(0) { 613 | Some(DataType::Nullable(box DataType::Tuple(tys))) => tys, 614 | _ => return None, 615 | }; 616 | if idx >= tuple_tys.len() { 617 | return None; 618 | } 619 | 620 | Some(Arc::new(Function { 621 | signature: FunctionSignature { 622 | name: "get_tuple", 623 | args_type: vec![DataType::Nullable(Box::new(DataType::Tuple( 624 | tuple_tys.to_vec(), 625 | )))], 626 | return_type: DataType::Nullable(Box::new(tuple_tys[idx].clone())), 627 | property: FunctionProperty::default().preserve_not_null(true), 628 | }, 629 | eval: Box::new(move |args, _| match &args[0] { 630 | ValueRef::Scalar(Scalar::Null) => Value::Scalar(Scalar::Null), 631 | ValueRef::Scalar(Scalar::Tuple(fields)) => Value::Scalar(fields[idx].to_owned()), 632 | ValueRef::Column(Column::Nullable { 633 | column: box Column::Tuple { fields, .. }, 634 | validity, 635 | }) => Value::Column(Column::Nullable { 636 | column: Box::new(fields[idx].to_owned()), 637 | validity: validity.clone(), 638 | }), 639 | _ => unreachable!(), 640 | }), 641 | })) 642 | }); 643 | 644 | registry 645 | } 646 | -------------------------------------------------------------------------------- /src/function.rs: -------------------------------------------------------------------------------- 1 | use std::{collections::HashMap, sync::Arc}; 2 | 3 | use educe::Educe; 4 | 5 | use crate::{ 6 | property::FunctionProperty, 7 | types::*, 8 | values::{Value, ValueRef}, 9 | }; 10 | 11 | #[derive(Debug, Clone)] 12 | pub struct FunctionSignature { 13 | pub name: &'static str, 14 | pub args_type: Vec, 15 | pub return_type: DataType, 16 | pub property: FunctionProperty, 17 | } 18 | 19 | /// `FunctionID` is a unique identifier for a function. It's used to construct 20 | /// the exactly same function from the remote execution nodes. 21 | #[derive(Debug, Clone)] 22 | pub enum FunctionID { 23 | Builtin { 24 | name: &'static str, 25 | id: usize, 26 | }, 27 | Factory { 28 | name: &'static str, 29 | id: usize, 30 | params: Vec, 31 | args_type: Vec, 32 | }, 33 | } 34 | 35 | #[derive(Educe)] 36 | #[educe(Debug)] 37 | pub struct Function { 38 | pub signature: FunctionSignature, 39 | #[educe(Debug(ignore))] 40 | #[allow(clippy::type_complexity)] 41 | pub eval: Box], &GenericMap) -> Value>, 42 | } 43 | 44 | #[derive(Default)] 45 | pub struct FunctionRegistry { 46 | pub funcs: HashMap<&'static str, Vec>>, 47 | /// A function to build function depending on the const parameters and the type of arguments (before coersion). 48 | /// 49 | /// The first argument is the const parameters and the second argument is the number of arguments. 50 | #[allow(clippy::type_complexity)] 51 | pub factories: HashMap< 52 | &'static str, 53 | Vec Option> + 'static>>, 54 | >, 55 | } 56 | 57 | impl FunctionRegistry { 58 | pub fn search_candidates( 59 | &self, 60 | name: &str, 61 | params: &[usize], 62 | args_type: &[DataType], 63 | ) -> Vec<(FunctionID, Arc)> { 64 | if params.is_empty() { 65 | let builtin_funcs = self 66 | .funcs 67 | .get_key_value(name) 68 | .map(|(name, funcs)| { 69 | funcs 70 | .iter() 71 | .enumerate() 72 | .filter_map(|(id, func)| { 73 | if func.signature.name == *name 74 | && func.signature.args_type.len() == args_type.len() 75 | { 76 | Some((FunctionID::Builtin { name, id }, func.clone())) 77 | } else { 78 | None 79 | } 80 | }) 81 | .collect::>() 82 | }) 83 | .unwrap_or_default(); 84 | 85 | if !builtin_funcs.is_empty() { 86 | return builtin_funcs; 87 | } 88 | } 89 | 90 | self.factories 91 | .get_key_value(name) 92 | .map(|(name, factories)| { 93 | factories 94 | .iter() 95 | .enumerate() 96 | .filter_map(|(id, factory)| { 97 | factory(params, args_type).map(|func| { 98 | ( 99 | FunctionID::Factory { 100 | name, 101 | id, 102 | params: params.to_vec(), 103 | args_type: args_type.to_vec(), 104 | }, 105 | func, 106 | ) 107 | }) 108 | }) 109 | .collect::>() 110 | }) 111 | .unwrap_or_default() 112 | } 113 | 114 | pub fn register_0_arg_core( 115 | &mut self, 116 | name: &'static str, 117 | property: FunctionProperty, 118 | func: F, 119 | ) where 120 | F: Fn(&GenericMap) -> Value + 'static + Clone + Copy, 121 | { 122 | self.funcs 123 | .entry(name) 124 | .or_insert_with(Vec::new) 125 | .push(Arc::new(Function { 126 | signature: FunctionSignature { 127 | name, 128 | args_type: vec![], 129 | return_type: O::data_type(), 130 | property, 131 | }, 132 | eval: Box::new(erase_function_generic_0_arg(func)), 133 | })); 134 | } 135 | 136 | pub fn register_1_arg( 137 | &mut self, 138 | name: &'static str, 139 | property: FunctionProperty, 140 | func: F, 141 | ) where 142 | F: for<'a> Fn(I1::ScalarRef<'a>) -> O::Scalar + 'static + Clone + Copy, 143 | { 144 | let has_nullable = &[I1::data_type(), O::data_type()] 145 | .iter() 146 | .any(|ty| ty.as_nullable().is_some()); 147 | 148 | assert!( 149 | !has_nullable, 150 | "Function {} has nullable argument or output, please use register_1_arg_core instead", 151 | name 152 | ); 153 | 154 | let property = property.preserve_not_null(true); 155 | 156 | self.register_1_arg_core::(name, property, move |_, _| { 157 | Value::Scalar(()) 158 | }); 159 | 160 | self.register_1_arg_core::(name, property, move |val, generics| { 161 | vectorize_1_arg(val, generics, func) 162 | }); 163 | 164 | self.register_1_arg_core::, NullableType, _>( 165 | name, 166 | property, 167 | move |val, generics| vectorize_passthrough_nullable_1_arg(val, generics, func), 168 | ); 169 | } 170 | 171 | pub fn register_with_writer_1_arg( 172 | &mut self, 173 | name: &'static str, 174 | property: FunctionProperty, 175 | func: F, 176 | ) where 177 | F: for<'a> Fn(I1::ScalarRef<'a>, &mut O::ColumnBuilder) + 'static + Clone + Copy, 178 | { 179 | let has_nullable = &[I1::data_type(), O::data_type()] 180 | .iter() 181 | .any(|ty| ty.as_nullable().is_some()); 182 | 183 | assert!( 184 | !has_nullable, 185 | "Function {} has nullable argument or output, please use register_1_arg_core instead", 186 | name 187 | ); 188 | 189 | let property = property.preserve_not_null(true); 190 | 191 | self.register_1_arg_core::(name, property, move |_, _| { 192 | Value::Scalar(()) 193 | }); 194 | 195 | self.register_1_arg_core::(name, property, move |val, generics| { 196 | vectorize_with_writer_1_arg(val, generics, func) 197 | }); 198 | 199 | self.register_1_arg_core::, NullableType, _>( 200 | name, 201 | property, 202 | move |val, generics| { 203 | vectorize_with_writer_passthrough_nullable_1_arg(val, generics, func) 204 | }, 205 | ); 206 | } 207 | 208 | pub fn register_1_arg_core( 209 | &mut self, 210 | name: &'static str, 211 | property: FunctionProperty, 212 | func: F, 213 | ) where 214 | F: Fn(ValueRef, &GenericMap) -> Value + 'static + Clone + Copy, 215 | { 216 | self.funcs 217 | .entry(name) 218 | .or_insert_with(Vec::new) 219 | .push(Arc::new(Function { 220 | signature: FunctionSignature { 221 | name, 222 | args_type: vec![I1::data_type()], 223 | return_type: O::data_type(), 224 | property, 225 | }, 226 | eval: Box::new(erase_function_generic_1_arg(func)), 227 | })); 228 | } 229 | 230 | pub fn register_2_arg( 231 | &mut self, 232 | name: &'static str, 233 | property: FunctionProperty, 234 | func: F, 235 | ) where 236 | F: for<'a, 'b> Fn(I1::ScalarRef<'a>, I2::ScalarRef<'b>) -> O::Scalar 237 | + Sized 238 | + 'static 239 | + Clone 240 | + Copy, 241 | { 242 | let has_nullable = &[I1::data_type(), I2::data_type(), O::data_type()] 243 | .iter() 244 | .any(|ty| ty.as_nullable().is_some()); 245 | 246 | assert!( 247 | !has_nullable, 248 | "Function {} has nullable argument or output, please use register_2_arg_core instead", 249 | name 250 | ); 251 | 252 | let property = property.preserve_not_null(true); 253 | 254 | self.register_2_arg_core::(name, property, move |_, _, _| { 255 | Value::Scalar(()) 256 | }); 257 | self.register_2_arg_core::(name, property, move |_, _, _| { 258 | Value::Scalar(()) 259 | }); 260 | self.register_2_arg_core::( 261 | name, 262 | property, 263 | move |_, _, _| Value::Scalar(()), 264 | ); 265 | 266 | self.register_2_arg_core::(name, property, move |lhs, rhs, generics| { 267 | vectorize_2_arg(lhs, rhs, generics, func) 268 | }); 269 | 270 | self.register_2_arg_core::, NullableType, NullableType, _>( 271 | name, 272 | property, 273 | move |lhs, rhs, generics| { 274 | vectorize_passthrough_nullable_2_arg(lhs, rhs, generics, func) 275 | }, 276 | ); 277 | } 278 | 279 | pub fn register_with_writer_2_arg( 280 | &mut self, 281 | name: &'static str, 282 | property: FunctionProperty, 283 | func: F, 284 | ) where 285 | F: for<'a, 'b> Fn(I1::ScalarRef<'a>, I2::ScalarRef<'b>, &mut O::ColumnBuilder) 286 | + Sized 287 | + 'static 288 | + Clone 289 | + Copy, 290 | { 291 | let has_nullable = &[I1::data_type(), I2::data_type(), O::data_type()] 292 | .iter() 293 | .any(|ty| ty.as_nullable().is_some()); 294 | 295 | assert!( 296 | !has_nullable, 297 | "Function {} has nullable argument or output, please use register_2_arg_core instead", 298 | name 299 | ); 300 | 301 | let property = property.preserve_not_null(true); 302 | 303 | self.register_2_arg_core::(name, property, move |_, _, _| { 304 | Value::Scalar(()) 305 | }); 306 | self.register_2_arg_core::(name, property, move |_, _, _| { 307 | Value::Scalar(()) 308 | }); 309 | self.register_2_arg_core::( 310 | name, 311 | property, 312 | move |_, _, _| Value::Scalar(()), 313 | ); 314 | 315 | self.register_2_arg_core::(name, property, move |lhs, rhs, generics| { 316 | vectorize_with_writer_2_arg(lhs, rhs, generics, func) 317 | }); 318 | 319 | self.register_2_arg_core::, NullableType, NullableType, _>( 320 | name, 321 | property, 322 | move |lhs, rhs, generics| { 323 | vectorize_with_writer_passthrough_nullable_2_arg(lhs, rhs, generics, func) 324 | }, 325 | ); 326 | } 327 | 328 | pub fn register_2_arg_core( 329 | &mut self, 330 | name: &'static str, 331 | property: FunctionProperty, 332 | func: F, 333 | ) where 334 | F: for<'a> Fn(ValueRef<'a, I1>, ValueRef<'a, I2>, &GenericMap) -> Value 335 | + Sized 336 | + 'static 337 | + Clone 338 | + Copy, 339 | { 340 | self.funcs 341 | .entry(name) 342 | .or_insert_with(Vec::new) 343 | .push(Arc::new(Function { 344 | signature: FunctionSignature { 345 | name, 346 | args_type: vec![I1::data_type(), I2::data_type()], 347 | return_type: O::data_type(), 348 | property, 349 | }, 350 | eval: Box::new(erase_function_generic_2_arg(func)), 351 | })); 352 | } 353 | 354 | pub fn register_function_factory( 355 | &mut self, 356 | name: &'static str, 357 | factory: impl Fn(&[usize], &[DataType]) -> Option> + 'static, 358 | ) { 359 | self.factories 360 | .entry(name) 361 | .or_insert_with(Vec::new) 362 | .push(Box::new(factory)); 363 | } 364 | } 365 | 366 | fn erase_function_generic_0_arg( 367 | func: impl for<'a> Fn(&GenericMap) -> Value, 368 | ) -> impl Fn(&[ValueRef], &GenericMap) -> Value { 369 | move |_args, generics| { 370 | let result = func(generics); 371 | 372 | match result { 373 | Value::Scalar(scalar) => Value::Scalar(O::upcast_scalar(scalar)), 374 | Value::Column(col) => Value::Column(O::upcast_column(col)), 375 | } 376 | } 377 | } 378 | 379 | fn erase_function_generic_1_arg( 380 | func: impl for<'a> Fn(ValueRef<'a, I1>, &GenericMap) -> Value, 381 | ) -> impl Fn(&[ValueRef], &GenericMap) -> Value { 382 | move |args, generics| { 383 | let arg1 = match &args[0] { 384 | ValueRef::Scalar(scalar) => ValueRef::Scalar(I1::try_downcast_scalar(scalar).unwrap()), 385 | ValueRef::Column(col) => ValueRef::Column(I1::try_downcast_column(col).unwrap()), 386 | }; 387 | 388 | let result = func(arg1, generics); 389 | 390 | match result { 391 | Value::Scalar(scalar) => Value::Scalar(O::upcast_scalar(scalar)), 392 | Value::Column(col) => Value::Column(O::upcast_column(col)), 393 | } 394 | } 395 | } 396 | 397 | fn erase_function_generic_2_arg( 398 | func: impl for<'a> Fn(ValueRef<'a, I1>, ValueRef<'a, I2>, &GenericMap) -> Value, 399 | ) -> impl Fn(&[ValueRef], &GenericMap) -> Value { 400 | move |args, generics| { 401 | let arg1 = match &args[0] { 402 | ValueRef::Scalar(scalar) => ValueRef::Scalar(I1::try_downcast_scalar(scalar).unwrap()), 403 | ValueRef::Column(col) => ValueRef::Column(I1::try_downcast_column(col).unwrap()), 404 | }; 405 | let arg2 = match &args[1] { 406 | ValueRef::Scalar(scalar) => ValueRef::Scalar(I2::try_downcast_scalar(scalar).unwrap()), 407 | ValueRef::Column(col) => ValueRef::Column(I2::try_downcast_column(col).unwrap()), 408 | }; 409 | 410 | let result = func(arg1, arg2, generics); 411 | 412 | match result { 413 | Value::Scalar(scalar) => Value::Scalar(O::upcast_scalar(scalar)), 414 | Value::Column(col) => Value::Column(O::upcast_column(col)), 415 | } 416 | } 417 | } 418 | 419 | pub fn vectorize_1_arg<'a, I1: ArgType, O: ArgType>( 420 | val: ValueRef<'a, I1>, 421 | generics: &GenericMap, 422 | func: impl Fn(I1::ScalarRef<'_>) -> O::Scalar, 423 | ) -> Value { 424 | match val { 425 | ValueRef::Scalar(val) => Value::Scalar(func(val)), 426 | ValueRef::Column(col) => { 427 | let iter = I1::iter_column(&col).map(func); 428 | let col = O::column_from_iter(iter, generics); 429 | Value::Column(col) 430 | } 431 | } 432 | } 433 | 434 | pub fn vectorize_with_writer_1_arg<'a, I1: ArgType, O: ArgType>( 435 | val: ValueRef<'a, I1>, 436 | generics: &GenericMap, 437 | func: impl Fn(I1::ScalarRef<'_>, &mut O::ColumnBuilder), 438 | ) -> Value { 439 | match val { 440 | ValueRef::Scalar(val) => { 441 | let mut builder = O::create_builder(1, generics); 442 | func(val, &mut builder); 443 | Value::Scalar(O::build_scalar(builder)) 444 | } 445 | ValueRef::Column(col) => { 446 | let iter = I1::iter_column(&col); 447 | let mut builder = O::create_builder(iter.size_hint().0, generics); 448 | for val in I1::iter_column(&col) { 449 | func(val, &mut builder); 450 | } 451 | Value::Column(O::build_column(builder)) 452 | } 453 | } 454 | } 455 | 456 | pub fn vectorize_passthrough_nullable_1_arg<'a, I1: ArgType, O: ArgType>( 457 | val: ValueRef<'a, NullableType>, 458 | generics: &GenericMap, 459 | func: impl for<'for_a> Fn(I1::ScalarRef<'for_a>) -> O::Scalar, 460 | ) -> Value> { 461 | match val { 462 | ValueRef::Scalar(None) => Value::Scalar(None), 463 | ValueRef::Scalar(Some(val)) => Value::Scalar(Some(func(val))), 464 | ValueRef::Column((col, validity)) => { 465 | let iter = I1::iter_column(&col).map(func); 466 | let col = O::column_from_iter(iter, generics); 467 | Value::Column((col, validity)) 468 | } 469 | } 470 | } 471 | 472 | pub fn vectorize_with_writer_passthrough_nullable_1_arg<'a, I1: ArgType, O: ArgType>( 473 | val: ValueRef<'a, NullableType>, 474 | generics: &GenericMap, 475 | func: impl Fn(I1::ScalarRef<'_>, &mut O::ColumnBuilder), 476 | ) -> Value> { 477 | match val { 478 | ValueRef::Scalar(None) => Value::Scalar(None), 479 | ValueRef::Scalar(Some(val)) => { 480 | let mut builder = O::create_builder(1, generics); 481 | func(val, &mut builder); 482 | Value::Scalar(Some(O::build_scalar(builder))) 483 | } 484 | ValueRef::Column((col, validity)) => { 485 | let iter = I1::iter_column(&col); 486 | let mut builder = O::create_builder(iter.size_hint().0, generics); 487 | for val in I1::iter_column(&col) { 488 | func(val, &mut builder); 489 | } 490 | Value::Column((O::build_column(builder), validity)) 491 | } 492 | } 493 | } 494 | 495 | pub fn vectorize_2_arg<'a, 'b, I1: ArgType, I2: ArgType, O: ArgType>( 496 | lhs: ValueRef<'a, I1>, 497 | rhs: ValueRef<'b, I2>, 498 | generics: &GenericMap, 499 | func: impl Fn(I1::ScalarRef<'_>, I2::ScalarRef<'_>) -> O::Scalar, 500 | ) -> Value { 501 | match (lhs, rhs) { 502 | (ValueRef::Scalar(lhs), ValueRef::Scalar(rhs)) => Value::Scalar(func(lhs, rhs)), 503 | (ValueRef::Scalar(lhs), ValueRef::Column(rhs)) => { 504 | let iter = I2::iter_column(&rhs).map(|rhs| func(lhs.clone(), rhs)); 505 | let col = O::column_from_iter(iter, generics); 506 | Value::Column(col) 507 | } 508 | (ValueRef::Column(lhs), ValueRef::Scalar(rhs)) => { 509 | let iter = I1::iter_column(&lhs).map(|lhs| func(lhs, rhs.clone())); 510 | let col = O::column_from_iter(iter, generics); 511 | Value::Column(col) 512 | } 513 | (ValueRef::Column(lhs), ValueRef::Column(rhs)) => { 514 | let iter = I1::iter_column(&lhs) 515 | .zip(I2::iter_column(&rhs)) 516 | .map(|(lhs, rhs)| func(lhs, rhs)); 517 | let col = O::column_from_iter(iter, generics); 518 | Value::Column(col) 519 | } 520 | } 521 | } 522 | 523 | pub fn vectorize_with_writer_2_arg<'a, 'b, I1: ArgType, I2: ArgType, O: ArgType>( 524 | lhs: ValueRef<'a, I1>, 525 | rhs: ValueRef<'b, I2>, 526 | generics: &GenericMap, 527 | func: impl Fn(I1::ScalarRef<'_>, I2::ScalarRef<'_>, &mut O::ColumnBuilder), 528 | ) -> Value { 529 | match (lhs, rhs) { 530 | (ValueRef::Scalar(lhs), ValueRef::Scalar(rhs)) => { 531 | let mut builder = O::create_builder(1, generics); 532 | func(lhs, rhs, &mut builder); 533 | Value::Scalar(O::build_scalar(builder)) 534 | } 535 | (ValueRef::Scalar(lhs), ValueRef::Column(rhs)) => { 536 | let iter = I2::iter_column(&rhs); 537 | let mut builder = O::create_builder(iter.size_hint().0, generics); 538 | for rhs in iter { 539 | func(lhs.clone(), rhs, &mut builder); 540 | } 541 | Value::Column(O::build_column(builder)) 542 | } 543 | (ValueRef::Column(lhs), ValueRef::Scalar(rhs)) => { 544 | let iter = I1::iter_column(&lhs); 545 | let mut builder = O::create_builder(iter.size_hint().0, generics); 546 | for lhs in iter { 547 | func(lhs, rhs.clone(), &mut builder); 548 | } 549 | Value::Column(O::build_column(builder)) 550 | } 551 | (ValueRef::Column(lhs), ValueRef::Column(rhs)) => { 552 | let iter = I1::iter_column(&lhs).zip(I2::iter_column(&rhs)); 553 | let mut builder = O::create_builder(iter.size_hint().0, generics); 554 | for (lhs, rhs) in iter { 555 | func(lhs, rhs, &mut builder); 556 | } 557 | Value::Column(O::build_column(builder)) 558 | } 559 | } 560 | } 561 | 562 | pub fn vectorize_passthrough_nullable_2_arg<'a, 'b, I1: ArgType, I2: ArgType, O: ArgType>( 563 | lhs: ValueRef<'a, NullableType>, 564 | rhs: ValueRef<'b, NullableType>, 565 | generics: &GenericMap, 566 | func: impl Fn(I1::ScalarRef<'_>, I2::ScalarRef<'_>) -> O::Scalar, 567 | ) -> Value> { 568 | match (lhs, rhs) { 569 | (ValueRef::Scalar(None), _) | (_, ValueRef::Scalar(None)) => Value::Scalar(None), 570 | (ValueRef::Scalar(Some(lhs)), ValueRef::Scalar(Some(rhs))) => { 571 | Value::Scalar(Some(func(lhs, rhs))) 572 | } 573 | (ValueRef::Scalar(Some(lhs)), ValueRef::Column((rhs, rhs_validity))) => { 574 | let iter = I2::iter_column(&rhs).map(|rhs| func(lhs.clone(), rhs)); 575 | let col = O::column_from_iter(iter, generics); 576 | Value::Column((col, rhs_validity)) 577 | } 578 | (ValueRef::Column((lhs, lhs_validity)), ValueRef::Scalar(Some(rhs))) => { 579 | let iter = I1::iter_column(&lhs).map(|lhs| func(lhs, rhs.clone())); 580 | let col = O::column_from_iter(iter, generics); 581 | Value::Column((col, lhs_validity)) 582 | } 583 | (ValueRef::Column((lhs, lhs_validity)), ValueRef::Column((rhs, rhs_validity))) => { 584 | let iter = I1::iter_column(&lhs) 585 | .zip(I2::iter_column(&rhs)) 586 | .map(|(lhs, rhs)| func(lhs, rhs)); 587 | let col = O::column_from_iter(iter, generics); 588 | let validity = arrow2::bitmap::or(&lhs_validity, &rhs_validity); 589 | Value::Column((col, validity)) 590 | } 591 | } 592 | } 593 | 594 | pub fn vectorize_with_writer_passthrough_nullable_2_arg< 595 | 'a, 596 | 'b, 597 | I1: ArgType, 598 | I2: ArgType, 599 | O: ArgType, 600 | >( 601 | lhs: ValueRef<'a, NullableType>, 602 | rhs: ValueRef<'b, NullableType>, 603 | generics: &GenericMap, 604 | func: impl Fn(I1::ScalarRef<'_>, I2::ScalarRef<'_>, &mut O::ColumnBuilder), 605 | ) -> Value> { 606 | match (lhs, rhs) { 607 | (ValueRef::Scalar(None), _) | (_, ValueRef::Scalar(None)) => Value::Scalar(None), 608 | (ValueRef::Scalar(Some(lhs)), ValueRef::Scalar(Some(rhs))) => { 609 | let mut builder = O::create_builder(1, generics); 610 | func(lhs, rhs, &mut builder); 611 | Value::Scalar(Some(O::build_scalar(builder))) 612 | } 613 | (ValueRef::Scalar(Some(lhs)), ValueRef::Column((rhs, rhs_validity))) => { 614 | let iter = I2::iter_column(&rhs).zip(&rhs_validity); 615 | let mut builder = O::create_builder(iter.size_hint().0, generics); 616 | for (rhs, rhs_validity) in iter { 617 | if rhs_validity { 618 | func(lhs.clone(), rhs, &mut builder); 619 | } else { 620 | O::push_default(&mut builder); 621 | } 622 | } 623 | Value::Column((O::build_column(builder), rhs_validity)) 624 | } 625 | (ValueRef::Column((lhs, lhs_validity)), ValueRef::Scalar(Some(rhs))) => { 626 | let iter = I1::iter_column(&lhs).zip(&lhs_validity); 627 | let mut builder = O::create_builder(iter.size_hint().0, generics); 628 | for (lhs, lhs_validity) in iter { 629 | if lhs_validity { 630 | func(lhs, rhs.clone(), &mut builder); 631 | } else { 632 | O::push_default(&mut builder); 633 | } 634 | } 635 | Value::Column((O::build_column(builder), lhs_validity)) 636 | } 637 | (ValueRef::Column((lhs, lhs_validity)), ValueRef::Column((rhs, rhs_validity))) => { 638 | let iter = I1::iter_column(&lhs) 639 | .zip(&lhs_validity) 640 | .zip(I2::iter_column(&rhs)) 641 | .zip(&rhs_validity); 642 | let mut builder = O::create_builder(iter.size_hint().0, generics); 643 | for (((lhs, lhs_validity), rhs), rhs_validity) in iter { 644 | if lhs_validity && rhs_validity { 645 | func(lhs, rhs, &mut builder); 646 | } else { 647 | O::push_default(&mut builder); 648 | } 649 | } 650 | Value::Column((O::build_column(builder), lhs_validity)) 651 | } 652 | } 653 | } 654 | -------------------------------------------------------------------------------- /src/values.rs: -------------------------------------------------------------------------------- 1 | use std::{iter::once, ops::Range}; 2 | 3 | use arrow2::{ 4 | bitmap::{Bitmap, MutableBitmap}, 5 | buffer::Buffer, 6 | trusted_len::TrustedLen, 7 | }; 8 | use enum_as_inner::EnumAsInner; 9 | 10 | use crate::{ 11 | types::*, 12 | util::{append_bitmap, bitmap_into_mut, buffer_into_mut, constant_bitmap}, 13 | }; 14 | 15 | #[derive(EnumAsInner)] 16 | pub enum Value { 17 | Scalar(T::Scalar), 18 | Column(T::Column), 19 | } 20 | 21 | #[derive(EnumAsInner)] 22 | pub enum ValueRef<'a, T: ValueType> { 23 | Scalar(T::ScalarRef<'a>), 24 | Column(T::Column), 25 | } 26 | 27 | #[derive(Debug, Clone, Default, EnumAsInner)] 28 | pub enum Scalar { 29 | #[default] 30 | Null, 31 | EmptyArray, 32 | Int8(i8), 33 | Int16(i16), 34 | UInt8(u8), 35 | UInt16(u16), 36 | Boolean(bool), 37 | String(Vec), 38 | Array(Column), 39 | Tuple(Vec), 40 | } 41 | 42 | #[derive(Debug, Clone, Default, EnumAsInner)] 43 | pub enum ScalarRef<'a> { 44 | #[default] 45 | Null, 46 | EmptyArray, 47 | Int8(i8), 48 | Int16(i16), 49 | UInt8(u8), 50 | UInt16(u16), 51 | Boolean(bool), 52 | String(&'a [u8]), 53 | Array(Column), 54 | Tuple(Vec>), 55 | } 56 | 57 | #[derive(Debug, Clone, EnumAsInner)] 58 | pub enum Column { 59 | Null { 60 | len: usize, 61 | }, 62 | EmptyArray { 63 | len: usize, 64 | }, 65 | Int8(Buffer), 66 | Int16(Buffer), 67 | UInt8(Buffer), 68 | UInt16(Buffer), 69 | Boolean(Bitmap), 70 | String { 71 | data: Buffer, 72 | offsets: Vec, 73 | }, 74 | Array { 75 | array: Box, 76 | offsets: Vec, 77 | }, 78 | Nullable { 79 | column: Box, 80 | validity: Bitmap, 81 | }, 82 | Tuple { 83 | fields: Vec, 84 | len: usize, 85 | }, 86 | } 87 | 88 | #[derive(Debug, Clone, EnumAsInner)] 89 | pub enum ColumnBuilder { 90 | Null { 91 | len: usize, 92 | }, 93 | EmptyArray { 94 | len: usize, 95 | }, 96 | Int8(Vec), 97 | Int16(Vec), 98 | UInt8(Vec), 99 | UInt16(Vec), 100 | Boolean(MutableBitmap), 101 | String { 102 | data: Vec, 103 | offsets: Vec, 104 | }, 105 | Array { 106 | array: Box, 107 | offsets: Vec, 108 | }, 109 | Nullable { 110 | column: Box, 111 | validity: MutableBitmap, 112 | }, 113 | Tuple { 114 | fields: Vec, 115 | len: usize, 116 | }, 117 | } 118 | 119 | impl<'a, T: ValueType> ValueRef<'a, T> { 120 | pub fn to_owned(self) -> Value { 121 | match self { 122 | ValueRef::Scalar(scalar) => Value::Scalar(T::to_owned_scalar(scalar)), 123 | ValueRef::Column(col) => Value::Column(col), 124 | } 125 | } 126 | } 127 | 128 | impl<'a, T: ValueType> Value { 129 | pub fn as_ref(&'a self) -> ValueRef<'a, T> { 130 | match self { 131 | Value::Scalar(scalar) => ValueRef::Scalar(T::to_scalar_ref(scalar)), 132 | Value::Column(col) => ValueRef::Column(col.clone()), 133 | } 134 | } 135 | } 136 | 137 | impl<'a, T: ValueType> Clone for ValueRef<'a, T> { 138 | fn clone(&self) -> Self { 139 | match self { 140 | ValueRef::Scalar(scalar) => ValueRef::Scalar(scalar.clone()), 141 | ValueRef::Column(col) => ValueRef::Column(col.clone()), 142 | } 143 | } 144 | } 145 | 146 | impl Scalar { 147 | pub fn as_ref(&self) -> ScalarRef { 148 | match self { 149 | Scalar::Null => ScalarRef::Null, 150 | Scalar::EmptyArray => ScalarRef::EmptyArray, 151 | Scalar::Int8(i) => ScalarRef::Int8(*i), 152 | Scalar::Int16(i) => ScalarRef::Int16(*i), 153 | Scalar::UInt8(i) => ScalarRef::UInt8(*i), 154 | Scalar::UInt16(i) => ScalarRef::UInt16(*i), 155 | Scalar::Boolean(b) => ScalarRef::Boolean(*b), 156 | Scalar::String(s) => ScalarRef::String(s.as_slice()), 157 | Scalar::Array(col) => ScalarRef::Array(col.clone()), 158 | Scalar::Tuple(fields) => ScalarRef::Tuple(fields.iter().map(Scalar::as_ref).collect()), 159 | } 160 | } 161 | } 162 | 163 | impl<'a> ScalarRef<'a> { 164 | pub fn to_owned(&self) -> Scalar { 165 | match self { 166 | ScalarRef::Null => Scalar::Null, 167 | ScalarRef::EmptyArray => Scalar::EmptyArray, 168 | ScalarRef::Int8(i) => Scalar::Int8(*i), 169 | ScalarRef::Int16(i) => Scalar::Int16(*i), 170 | ScalarRef::UInt8(i) => Scalar::UInt8(*i), 171 | ScalarRef::UInt16(i) => Scalar::UInt16(*i), 172 | ScalarRef::Boolean(b) => Scalar::Boolean(*b), 173 | ScalarRef::String(s) => Scalar::String(s.to_vec()), 174 | ScalarRef::Array(col) => Scalar::Array(col.clone()), 175 | ScalarRef::Tuple(fields) => { 176 | Scalar::Tuple(fields.iter().map(ScalarRef::to_owned).collect()) 177 | } 178 | } 179 | } 180 | 181 | pub fn repeat(&self, n: usize) -> ColumnBuilder { 182 | match self { 183 | ScalarRef::Null => ColumnBuilder::Null { len: n }, 184 | ScalarRef::EmptyArray => ColumnBuilder::EmptyArray { len: n }, 185 | ScalarRef::Int8(i) => ColumnBuilder::Int8(vec![*i; n]), 186 | ScalarRef::Int16(i) => ColumnBuilder::Int16(vec![*i; n]), 187 | ScalarRef::UInt8(i) => ColumnBuilder::UInt8(vec![*i; n]), 188 | ScalarRef::UInt16(i) => ColumnBuilder::UInt16(vec![*i; n]), 189 | ScalarRef::Boolean(b) => ColumnBuilder::Boolean(constant_bitmap(*b, n)), 190 | ScalarRef::String(s) => { 191 | let len = s.len(); 192 | let mut data = Vec::with_capacity(len * n); 193 | for _ in 0..n { 194 | data.extend_from_slice(s); 195 | } 196 | let offsets = once(0).chain((0..n).map(|i| len * (i + 1))).collect(); 197 | ColumnBuilder::String { data, offsets } 198 | } 199 | ScalarRef::Array(col) => { 200 | let col = ColumnBuilder::from_column(col.clone()); 201 | let len = col.len(); 202 | let mut builder = col.clone(); 203 | for _ in 1..n { 204 | builder.append(&col); 205 | } 206 | let offsets = once(0).chain((0..n).map(|i| len * (i + 1))).collect(); 207 | ColumnBuilder::Array { 208 | array: Box::new(builder), 209 | offsets, 210 | } 211 | } 212 | ScalarRef::Tuple(fields) => ColumnBuilder::Tuple { 213 | fields: fields.iter().map(|field| field.repeat(n)).collect(), 214 | len: n, 215 | }, 216 | } 217 | } 218 | } 219 | 220 | impl Column { 221 | pub fn len(&self) -> usize { 222 | match self { 223 | Column::Null { len } => *len, 224 | Column::EmptyArray { len } => *len, 225 | Column::Int8(col) => col.len(), 226 | Column::Int16(col) => col.len(), 227 | Column::UInt8(col) => col.len(), 228 | Column::UInt16(col) => col.len(), 229 | Column::Boolean(col) => col.len(), 230 | Column::String { data: _, offsets } => offsets.len() - 1, 231 | Column::Array { array: _, offsets } => offsets.len() - 1, 232 | Column::Nullable { 233 | column: _, 234 | validity, 235 | } => validity.len(), 236 | Column::Tuple { len, .. } => *len, 237 | } 238 | } 239 | 240 | pub fn index(&self, index: usize) -> ScalarRef { 241 | match self { 242 | Column::Null { .. } => ScalarRef::Null, 243 | Column::EmptyArray { .. } => ScalarRef::EmptyArray, 244 | Column::Int8(col) => ScalarRef::Int8(col[index]), 245 | Column::Int16(col) => ScalarRef::Int16(col[index]), 246 | Column::UInt8(col) => ScalarRef::UInt8(col[index]), 247 | Column::UInt16(col) => ScalarRef::UInt16(col[index]), 248 | Column::Boolean(col) => ScalarRef::Boolean(col.get(index).unwrap()), 249 | Column::String { data, offsets } => { 250 | ScalarRef::String(&data[offsets[index]..offsets[index + 1]]) 251 | } 252 | Column::Array { array, offsets } => { 253 | ScalarRef::Array((*array).clone().slice(offsets[index]..offsets[index + 1])) 254 | } 255 | Column::Nullable { column, validity } => { 256 | if validity.get(index).unwrap() { 257 | column.index(index) 258 | } else { 259 | ScalarRef::Null 260 | } 261 | } 262 | Column::Tuple { fields, .. } => { 263 | ScalarRef::Tuple(fields.iter().map(|field| field.index(index)).collect()) 264 | } 265 | } 266 | } 267 | 268 | pub fn slice(&self, range: Range) -> Self { 269 | assert!( 270 | range.end <= self.len(), 271 | "range {:?} out of len {}", 272 | range, 273 | self.len() 274 | ); 275 | match self { 276 | Column::Null { .. } => Column::Null { 277 | len: range.end - range.start, 278 | }, 279 | Column::EmptyArray { .. } => Column::EmptyArray { 280 | len: range.end - range.start, 281 | }, 282 | Column::Int8(col) => { 283 | Column::Int8(col.clone().slice(range.start, range.end - range.start)) 284 | } 285 | Column::Int16(col) => { 286 | Column::Int16(col.clone().slice(range.start, range.end - range.start)) 287 | } 288 | Column::UInt8(col) => { 289 | Column::UInt8(col.clone().slice(range.start, range.end - range.start)) 290 | } 291 | Column::UInt16(col) => { 292 | Column::UInt16(col.clone().slice(range.start, range.end - range.start)) 293 | } 294 | Column::Boolean(col) => { 295 | Column::Boolean(col.clone().slice(range.start, range.end - range.start)) 296 | } 297 | Column::String { data, offsets } => { 298 | let offsets = offsets[range.start..(range.end + 1)].to_vec(); 299 | Column::String { 300 | data: data.clone(), 301 | offsets, 302 | } 303 | } 304 | Column::Array { array, offsets } => { 305 | let offsets = offsets[range.start..(range.end + 1)].to_vec(); 306 | Column::Array { 307 | array: array.clone(), 308 | offsets, 309 | } 310 | } 311 | Column::Nullable { column, validity } => { 312 | let validity = validity.clone().slice(range.start, range.end - range.start); 313 | Column::Nullable { 314 | column: Box::new(column.slice(range)), 315 | validity, 316 | } 317 | } 318 | Column::Tuple { fields, .. } => Column::Tuple { 319 | fields: fields 320 | .iter() 321 | .map(|field| field.slice(range.clone())) 322 | .collect(), 323 | len: range.end - range.start, 324 | }, 325 | } 326 | } 327 | 328 | pub fn iter(&self) -> ColumnIterator { 329 | ColumnIterator { 330 | column: self, 331 | index: 0, 332 | len: self.len(), 333 | } 334 | } 335 | } 336 | 337 | impl ColumnBuilder { 338 | pub fn from_column(col: Column) -> Self { 339 | match col { 340 | Column::Null { len } => ColumnBuilder::Null { len }, 341 | Column::EmptyArray { len } => ColumnBuilder::EmptyArray { len }, 342 | Column::Int8(col) => ColumnBuilder::Int8(buffer_into_mut(col)), 343 | Column::Int16(col) => ColumnBuilder::Int16(buffer_into_mut(col)), 344 | Column::UInt8(col) => ColumnBuilder::UInt8(buffer_into_mut(col)), 345 | Column::UInt16(col) => ColumnBuilder::UInt16(buffer_into_mut(col)), 346 | Column::Boolean(col) => ColumnBuilder::Boolean(bitmap_into_mut(col)), 347 | Column::String { data, offsets } => ColumnBuilder::String { 348 | data: buffer_into_mut(data), 349 | offsets, 350 | }, 351 | Column::Array { array, offsets } => ColumnBuilder::Array { 352 | array: Box::new(ColumnBuilder::from_column(*array)), 353 | offsets, 354 | }, 355 | Column::Nullable { column, validity } => ColumnBuilder::Nullable { 356 | column: Box::new(ColumnBuilder::from_column(*column)), 357 | validity: bitmap_into_mut(validity), 358 | }, 359 | Column::Tuple { fields, len } => ColumnBuilder::Tuple { 360 | fields: fields 361 | .iter() 362 | .map(|col| ColumnBuilder::from_column(col.clone())) 363 | .collect(), 364 | len, 365 | }, 366 | } 367 | } 368 | 369 | pub fn len(&self) -> usize { 370 | match self { 371 | ColumnBuilder::Null { len } => *len, 372 | ColumnBuilder::EmptyArray { len } => *len, 373 | ColumnBuilder::Int8(col) => col.len(), 374 | ColumnBuilder::Int16(col) => col.len(), 375 | ColumnBuilder::UInt8(col) => col.len(), 376 | ColumnBuilder::UInt16(col) => col.len(), 377 | ColumnBuilder::Boolean(col) => col.len(), 378 | ColumnBuilder::String { data: _, offsets } => offsets.len() - 1, 379 | ColumnBuilder::Array { array: _, offsets } => offsets.len() - 1, 380 | ColumnBuilder::Nullable { 381 | column: _, 382 | validity, 383 | } => validity.len(), 384 | ColumnBuilder::Tuple { len, .. } => *len, 385 | } 386 | } 387 | 388 | pub fn with_capacity(ty: &DataType, capacity: usize) -> ColumnBuilder { 389 | match ty { 390 | DataType::Null => ColumnBuilder::Null { len: 0 }, 391 | DataType::EmptyArray => ColumnBuilder::EmptyArray { len: 0 }, 392 | DataType::Boolean => ColumnBuilder::Boolean(MutableBitmap::with_capacity(capacity)), 393 | DataType::String => { 394 | let mut offsets = Vec::with_capacity(capacity + 1); 395 | offsets.push(0); 396 | ColumnBuilder::String { 397 | data: Vec::new(), 398 | offsets, 399 | } 400 | } 401 | DataType::UInt8 => ColumnBuilder::UInt8(Vec::with_capacity(capacity)), 402 | DataType::UInt16 => ColumnBuilder::UInt16(Vec::with_capacity(capacity)), 403 | DataType::Int8 => ColumnBuilder::Int8(Vec::with_capacity(capacity)), 404 | DataType::Int16 => ColumnBuilder::Int16(Vec::with_capacity(capacity)), 405 | DataType::Nullable(ty) => ColumnBuilder::Nullable { 406 | column: Box::new(Self::with_capacity(ty, capacity)), 407 | validity: MutableBitmap::with_capacity(capacity), 408 | }, 409 | DataType::Array(ty) => { 410 | let mut offsets = Vec::with_capacity(capacity + 1); 411 | offsets.push(0); 412 | ColumnBuilder::Array { 413 | array: Box::new(Self::with_capacity(ty, 0)), 414 | offsets, 415 | } 416 | } 417 | DataType::Tuple(fields) => ColumnBuilder::Tuple { 418 | fields: fields 419 | .iter() 420 | .map(|field| Self::with_capacity(field, capacity)) 421 | .collect(), 422 | len: 0, 423 | }, 424 | DataType::Generic(_) => unreachable!(), 425 | } 426 | } 427 | 428 | pub fn push(&mut self, item: ScalarRef) { 429 | match (self, item) { 430 | (ColumnBuilder::Null { len }, ScalarRef::Null) => *len += 1, 431 | (ColumnBuilder::EmptyArray { len }, ScalarRef::EmptyArray) => *len += 1, 432 | (ColumnBuilder::Int8(col), ScalarRef::Int8(value)) => col.push(value), 433 | (ColumnBuilder::Int16(col), ScalarRef::Int16(value)) => col.push(value), 434 | (ColumnBuilder::UInt8(col), ScalarRef::UInt8(value)) => col.push(value), 435 | (ColumnBuilder::UInt16(col), ScalarRef::UInt16(value)) => col.push(value), 436 | (ColumnBuilder::Boolean(col), ScalarRef::Boolean(value)) => col.push(value), 437 | (ColumnBuilder::String { data, offsets }, ScalarRef::String(value)) => { 438 | data.extend_from_slice(value); 439 | offsets.push(data.len()); 440 | } 441 | (ColumnBuilder::Array { array, offsets }, ScalarRef::Array(value)) => { 442 | array.append(&ColumnBuilder::from_column(value)); 443 | offsets.push(array.len()); 444 | } 445 | (ColumnBuilder::Nullable { column, validity }, ScalarRef::Null) => { 446 | column.push_default(); 447 | validity.push(false); 448 | } 449 | (ColumnBuilder::Nullable { column, validity }, scalar) => { 450 | column.push(scalar); 451 | validity.push(true); 452 | } 453 | (ColumnBuilder::Tuple { fields, len }, ScalarRef::Tuple(value)) => { 454 | assert_eq!(fields.len(), value.len()); 455 | for (field, scalar) in fields.iter_mut().zip(value.iter()) { 456 | field.push(scalar.clone()); 457 | } 458 | *len += 1; 459 | } 460 | (c, s) => unreachable!("{c:?} {s:?}"), 461 | } 462 | } 463 | 464 | pub fn push_default(&mut self) { 465 | match self { 466 | ColumnBuilder::Null { len } => *len += 1, 467 | ColumnBuilder::EmptyArray { len } => *len += 1, 468 | ColumnBuilder::Int8(col) => col.push(0), 469 | ColumnBuilder::Int16(col) => col.push(0), 470 | ColumnBuilder::UInt8(col) => col.push(0), 471 | ColumnBuilder::UInt16(col) => col.push(0), 472 | ColumnBuilder::Boolean(col) => col.push(false), 473 | ColumnBuilder::String { data, offsets } => { 474 | offsets.push(data.len()); 475 | } 476 | ColumnBuilder::Array { array, offsets } => { 477 | offsets.push(array.len()); 478 | } 479 | ColumnBuilder::Nullable { column, validity } => { 480 | column.push_default(); 481 | validity.push(true); 482 | } 483 | ColumnBuilder::Tuple { fields, len } => { 484 | for field in fields { 485 | field.push_default(); 486 | } 487 | *len += 1; 488 | } 489 | } 490 | } 491 | 492 | pub fn append(&mut self, other: &ColumnBuilder) { 493 | match (self, other) { 494 | (ColumnBuilder::Null { len }, ColumnBuilder::Null { len: other_len }) => { 495 | *len += other_len; 496 | } 497 | (ColumnBuilder::EmptyArray { len }, ColumnBuilder::EmptyArray { len: other_len }) => { 498 | *len += other_len; 499 | } 500 | (ColumnBuilder::Int8(builder), ColumnBuilder::Int8(other_builder)) => { 501 | builder.extend_from_slice(other_builder); 502 | } 503 | (ColumnBuilder::Int16(builder), ColumnBuilder::Int16(other_builder)) => { 504 | builder.extend_from_slice(other_builder); 505 | } 506 | (ColumnBuilder::UInt8(builder), ColumnBuilder::UInt8(other_builder)) => { 507 | builder.extend_from_slice(other_builder); 508 | } 509 | (ColumnBuilder::UInt16(builder), ColumnBuilder::UInt16(other_builder)) => { 510 | builder.extend_from_slice(other_builder); 511 | } 512 | (ColumnBuilder::Boolean(builder), ColumnBuilder::Boolean(other_builder)) => { 513 | append_bitmap(builder, other_builder); 514 | } 515 | ( 516 | ColumnBuilder::String { data, offsets }, 517 | ColumnBuilder::String { 518 | data: other_data, 519 | offsets: other_offsets, 520 | }, 521 | ) => { 522 | data.extend_from_slice(other_data); 523 | let start = offsets.last().cloned().unwrap(); 524 | offsets.extend(other_offsets.iter().skip(1).map(|offset| start + offset)); 525 | } 526 | ( 527 | ColumnBuilder::Array { array, offsets }, 528 | ColumnBuilder::Array { 529 | array: other_array, 530 | offsets: other_offsets, 531 | }, 532 | ) => { 533 | array.append(other_array); 534 | let start = offsets.last().cloned().unwrap(); 535 | offsets.extend(other_offsets.iter().skip(1).map(|offset| start + offset)); 536 | } 537 | ( 538 | ColumnBuilder::Nullable { column, validity }, 539 | ColumnBuilder::Nullable { 540 | column: other_column, 541 | validity: other_validity, 542 | }, 543 | ) => { 544 | column.append(other_column); 545 | append_bitmap(validity, other_validity); 546 | } 547 | ( 548 | ColumnBuilder::Tuple { fields, len }, 549 | ColumnBuilder::Tuple { 550 | fields: other_fields, 551 | len: other_len, 552 | }, 553 | ) => { 554 | assert_eq!(fields.len(), other_fields.len()); 555 | for (field, other_field) in fields.iter_mut().zip(other_fields.iter()) { 556 | field.append(other_field); 557 | } 558 | *len += other_len; 559 | } 560 | _ => unreachable!(), 561 | } 562 | } 563 | 564 | pub fn build(self) -> Column { 565 | match self { 566 | ColumnBuilder::Null { len } => Column::Null { len }, 567 | ColumnBuilder::EmptyArray { len } => Column::EmptyArray { len }, 568 | ColumnBuilder::Int8(builder) => Column::Int8(builder.into()), 569 | ColumnBuilder::Int16(builder) => Column::Int16(builder.into()), 570 | ColumnBuilder::UInt8(builder) => Column::UInt8(builder.into()), 571 | ColumnBuilder::UInt16(builder) => Column::UInt16(builder.into()), 572 | ColumnBuilder::Boolean(builder) => Column::Boolean(builder.into()), 573 | ColumnBuilder::String { data, offsets } => Column::String { 574 | data: data.into(), 575 | offsets, 576 | }, 577 | ColumnBuilder::Array { array, offsets } => Column::Array { 578 | array: Box::new(array.build()), 579 | offsets, 580 | }, 581 | ColumnBuilder::Nullable { column, validity } => Column::Nullable { 582 | column: Box::new(column.build()), 583 | validity: validity.into(), 584 | }, 585 | ColumnBuilder::Tuple { fields, len } => Column::Tuple { 586 | fields: fields.into_iter().map(|field| field.build()).collect(), 587 | len, 588 | }, 589 | } 590 | } 591 | 592 | pub fn build_scalar(self) -> Scalar { 593 | match self { 594 | ColumnBuilder::Null { len } => { 595 | assert_eq!(len, 1); 596 | Scalar::Null 597 | } 598 | ColumnBuilder::EmptyArray { len } => { 599 | assert_eq!(len, 1); 600 | Scalar::EmptyArray 601 | } 602 | ColumnBuilder::Int8(builder) => { 603 | assert_eq!(builder.len(), 1); 604 | Scalar::Int8(builder[0]) 605 | } 606 | ColumnBuilder::Int16(builder) => { 607 | assert_eq!(builder.len(), 1); 608 | Scalar::Int16(builder[0]) 609 | } 610 | ColumnBuilder::UInt8(builder) => { 611 | assert_eq!(builder.len(), 1); 612 | Scalar::UInt8(builder[0]) 613 | } 614 | ColumnBuilder::UInt16(builder) => { 615 | assert_eq!(builder.len(), 1); 616 | Scalar::UInt16(builder[0]) 617 | } 618 | ColumnBuilder::Boolean(builder) => { 619 | assert_eq!(builder.len(), 1); 620 | Scalar::Boolean(builder.get(0)) 621 | } 622 | ColumnBuilder::String { data, offsets } => { 623 | assert_eq!(data.len(), 1); 624 | assert_eq!(offsets.len(), 2); 625 | Scalar::String(data[offsets[0]..offsets[1]].to_vec()) 626 | } 627 | ColumnBuilder::Array { array, offsets } => { 628 | assert_eq!(array.len(), 1); 629 | assert_eq!(offsets.len(), 2); 630 | Scalar::Array(array.build().slice(offsets[0]..offsets[1])) 631 | } 632 | ColumnBuilder::Nullable { column, validity } => { 633 | assert_eq!(column.len(), 1); 634 | assert_eq!(validity.len(), 1); 635 | if validity.get(0) { 636 | column.build_scalar() 637 | } else { 638 | Scalar::Null 639 | } 640 | } 641 | ColumnBuilder::Tuple { fields, len } => { 642 | assert_eq!(len, 1); 643 | Scalar::Tuple( 644 | fields 645 | .into_iter() 646 | .map(|field| field.build_scalar()) 647 | .collect(), 648 | ) 649 | } 650 | } 651 | } 652 | } 653 | 654 | pub struct ColumnIterator<'a> { 655 | column: &'a Column, 656 | index: usize, 657 | len: usize, 658 | } 659 | 660 | impl<'a> Iterator for ColumnIterator<'a> { 661 | type Item = ScalarRef<'a>; 662 | 663 | fn next(&mut self) -> Option { 664 | if self.index < self.len { 665 | let item = self.column.index(self.index); 666 | self.index += 1; 667 | Some(item) 668 | } else { 669 | None 670 | } 671 | } 672 | 673 | fn size_hint(&self) -> (usize, Option) { 674 | let remain = self.len - self.index; 675 | (remain, Some(remain)) 676 | } 677 | } 678 | 679 | unsafe impl<'a> TrustedLen for ColumnIterator<'a> {} 680 | --------------------------------------------------------------------------------