├── .github └── workflows │ ├── build.yml │ └── release.yml ├── .gitignore ├── .yardopts ├── CHANGELOG.md ├── Cargo.lock ├── Cargo.toml ├── Gemfile ├── LICENSE.txt ├── README.md ├── Rakefile ├── ext └── polars │ ├── Cargo.toml │ ├── extconf.rb │ └── src │ ├── allocator.rs │ ├── batched_csv.rs │ ├── conversion │ ├── any_value.rs │ ├── chunked_array.rs │ └── mod.rs │ ├── dataframe │ ├── construction.rs │ ├── export.rs │ ├── general.rs │ ├── io.rs │ ├── mod.rs │ └── serde.rs │ ├── error.rs │ ├── exceptions.rs │ ├── expr │ ├── array.rs │ ├── binary.rs │ ├── categorical.rs │ ├── datetime.rs │ ├── general.rs │ ├── list.rs │ ├── meta.rs │ ├── mod.rs │ ├── name.rs │ ├── rolling.rs │ ├── string.rs │ └── struct.rs │ ├── file.rs │ ├── functions │ ├── aggregation.rs │ ├── business.rs │ ├── eager.rs │ ├── io.rs │ ├── lazy.rs │ ├── meta.rs │ ├── misc.rs │ ├── mod.rs │ ├── random.rs │ ├── range.rs │ ├── string_cache.rs │ └── whenthen.rs │ ├── interop │ ├── arrow │ │ ├── mod.rs │ │ └── to_ruby.rs │ ├── mod.rs │ └── numo │ │ ├── mod.rs │ │ ├── numo_rs.rs │ │ ├── to_numo_df.rs │ │ └── to_numo_series.rs │ ├── lazyframe │ ├── general.rs │ ├── mod.rs │ ├── serde.rs │ └── sink.rs │ ├── lazygroupby.rs │ ├── lib.rs │ ├── map │ ├── dataframe.rs │ ├── lazy.rs │ ├── mod.rs │ └── series.rs │ ├── object.rs │ ├── on_startup.rs │ ├── prelude.rs │ ├── rb_modules.rs │ ├── series │ ├── aggregation.rs │ ├── arithmetic.rs │ ├── comparison.rs │ ├── construction.rs │ ├── export.rs │ ├── general.rs │ ├── import.rs │ ├── mod.rs │ └── scatter.rs │ ├── sql.rs │ └── utils.rs ├── lib ├── polars-df.rb ├── polars.rb └── polars │ ├── array_expr.rb │ ├── array_name_space.rb │ ├── batched_csv_reader.rb │ ├── binary_expr.rb │ ├── binary_name_space.rb │ ├── cat_expr.rb │ ├── cat_name_space.rb │ ├── config.rb │ ├── convert.rb │ ├── data_frame.rb │ ├── data_type_group.rb │ ├── data_types.rb │ ├── date_time_expr.rb │ ├── date_time_name_space.rb │ ├── dynamic_group_by.rb │ ├── exceptions.rb │ ├── expr.rb │ ├── expr_dispatch.rb │ ├── functions │ ├── aggregation │ │ ├── horizontal.rb │ │ └── vertical.rb │ ├── as_datatype.rb │ ├── col.rb │ ├── eager.rb │ ├── lazy.rb │ ├── len.rb │ ├── lit.rb │ ├── random.rb │ ├── range │ │ ├── date_range.rb │ │ ├── datetime_range.rb │ │ ├── int_range.rb │ │ └── time_range.rb │ ├── repeat.rb │ └── whenthen.rb │ ├── group_by.rb │ ├── io │ ├── avro.rb │ ├── csv.rb │ ├── database.rb │ ├── delta.rb │ ├── ipc.rb │ ├── json.rb │ ├── ndjson.rb │ └── parquet.rb │ ├── lazy_frame.rb │ ├── lazy_group_by.rb │ ├── list_expr.rb │ ├── list_name_space.rb │ ├── meta_expr.rb │ ├── name_expr.rb │ ├── plot.rb │ ├── rolling_group_by.rb │ ├── schema.rb │ ├── selectors.rb │ ├── series.rb │ ├── slice.rb │ ├── sql_context.rb │ ├── string_cache.rb │ ├── string_expr.rb │ ├── string_name_space.rb │ ├── struct_expr.rb │ ├── struct_name_space.rb │ ├── testing.rb │ ├── utils.rb │ ├── utils │ ├── constants.rb │ ├── convert.rb │ ├── parse.rb │ ├── various.rb │ └── wrap.rb │ ├── version.rb │ └── whenthen.rb ├── polars-df.gemspec └── test ├── array_expr_test.rb ├── arrow_test.rb ├── avro_test.rb ├── cat_expr_test.rb ├── config_test.rb ├── csv_test.rb ├── data_frame_test.rb ├── data_types_test.rb ├── database_test.rb ├── date_time_expr_test.rb ├── delta_test.rb ├── docs_test.rb ├── expr_test.rb ├── guide_test.rb ├── ipc_test.rb ├── json_test.rb ├── lazy_frame_test.rb ├── list_expr_test.rb ├── list_name_space_test.rb ├── meta_expr_test.rb ├── numo_test.rb ├── parquet_test.rb ├── plot_test.rb ├── selectors_test.rb ├── series_test.rb ├── sql_test.rb ├── string_cache_test.rb ├── string_expr_test.rb ├── string_name_space_test.rb ├── struct_expr_test.rb ├── support ├── data.arrow ├── data.avro ├── data.csv ├── data.json ├── data.ndjson ├── data.parquet ├── data2.csv ├── delta │ ├── _delta_log │ │ └── 00000000000000000000.json │ └── part-00001-eeffaeba-16eb-4d8b-bb8f-654bfb8e823d-c000.snappy.parquet ├── iris.csv └── types.parquet ├── test_helper.rb ├── testing_test.rb └── types_test.rb /.github/workflows/build.yml: -------------------------------------------------------------------------------- 1 | name: build 2 | on: [push, pull_request] 3 | jobs: 4 | build: 5 | runs-on: ubuntu-latest 6 | steps: 7 | - uses: actions/checkout@v4 8 | - uses: actions/cache@v4 9 | with: 10 | path: | 11 | ~/.cargo/registry 12 | ~/.cargo/git 13 | tmp 14 | key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }} 15 | - uses: ruby/setup-ruby@v1 16 | with: 17 | ruby-version: 3.4 18 | bundler-cache: true 19 | - run: bundle exec rake compile 20 | - run: bundle exec rake test 21 | - run: bundle exec yard --fail-on-warning 22 | - run: bundle exec rake test:docs 23 | 24 | - uses: ankane/setup-postgres@v1 25 | with: 26 | database: polars_ruby_test 27 | - run: ADAPTER=postgresql ruby test/database_test.rb 28 | 29 | - uses: ankane/setup-mysql@v1 30 | with: 31 | database: polars_ruby_test 32 | - run: ADAPTER=mysql ruby test/database_test.rb 33 | - run: ADAPTER=trilogy ruby test/database_test.rb 34 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: release 2 | on: workflow_dispatch 3 | jobs: 4 | release: 5 | strategy: 6 | fail-fast: false 7 | matrix: 8 | include: 9 | - platform: x86_64-linux 10 | target: x86_64-unknown-linux-gnu 11 | - platform: x86_64-linux-musl 12 | target: x86_64-unknown-linux-musl 13 | - platform: aarch64-linux 14 | target: aarch64-unknown-linux-gnu 15 | - platform: aarch64-linux-musl 16 | target: aarch64-unknown-linux-musl 17 | - platform: x86_64-darwin 18 | target: x86_64-apple-darwin 19 | # Rust uses external command to strip symbols and debuginfo on Mac 20 | # Do not do for arm64 since it interferes with code signing 21 | # and codesign binary is not present to re-sign 22 | setup: sudo ln -s /opt/osxcross/target/bin/x86_64-apple-darwin-strip /usr/local/bin/strip 23 | - platform: arm64-darwin 24 | target: aarch64-apple-darwin 25 | - platform: x64-mingw-ucrt 26 | target: x86_64-pc-windows-gnu 27 | runs-on: ubuntu-latest 28 | name: ${{ matrix.platform }} 29 | steps: 30 | - uses: actions/checkout@v4 31 | - run: | 32 | cargo install --locked --git https://github.com/ankane/cargo-3pl 33 | git clone https://github.com/ankane/3pl-source.git 34 | cargo 3pl --target ${{ matrix.target }} --require-files --source 3pl-source > LICENSE-THIRD-PARTY.txt 35 | - uses: ruby/setup-ruby@v1 36 | with: 37 | ruby-version: 3.3 38 | - uses: oxidize-rb/actions/cross-gem@v1 39 | id: cross-gem 40 | with: 41 | platform: ${{ matrix.platform }} 42 | ruby-versions: "3.4,3.3,3.2" 43 | # pre-script: ${{ matrix.setup }} 44 | - uses: actions/upload-artifact@v4 45 | with: 46 | name: cross-gem-${{ matrix.platform }} 47 | path: ${{ steps.cross-gem.outputs.gem-path }} 48 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /.bundle/ 2 | /.yardoc 3 | /_yardoc/ 4 | /coverage/ 5 | /doc/ 6 | /pkg/ 7 | /spec/reports/ 8 | /tmp/ 9 | /Gemfile.lock 10 | /target/ 11 | *.bundle 12 | *.so 13 | *.dll 14 | -------------------------------------------------------------------------------- /.yardopts: -------------------------------------------------------------------------------- 1 | --no-private 2 | --markup markdown 3 | --embed-mixins 4 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [workspace] 2 | members = ["ext/polars"] 3 | resolver = "2" 4 | 5 | [profile.release] 6 | strip = true 7 | -------------------------------------------------------------------------------- /Gemfile: -------------------------------------------------------------------------------- 1 | source "https://rubygems.org" 2 | 3 | gemspec 4 | 5 | gem "rake" 6 | gem "rake-compiler" 7 | gem "minitest" 8 | gem "activerecord" 9 | gem "numo-narray" 10 | gem "vega" 11 | gem "pg" 12 | gem "mysql2" 13 | gem "trilogy" 14 | gem "sqlite3" 15 | 16 | if ENV["TEST_DELTA"] 17 | gem "deltalake-rb", ">= 0.1.4" 18 | end 19 | 20 | # https://github.com/lsegal/yard/issues/1321 21 | gem "yard", require: false 22 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2020 Ritchie Vink 2 | Copyright (c) 2022-2025 Andrew Kane 3 | Some portions Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Rakefile: -------------------------------------------------------------------------------- 1 | require "bundler/gem_tasks" 2 | require "rake/testtask" 3 | require "rake/extensiontask" 4 | 5 | task default: :test 6 | Rake::TestTask.new do |t| 7 | t.libs << "test" 8 | t.test_files = FileList["test/**/*_test.rb"].exclude(/docs_test/) 9 | end 10 | 11 | Rake::TestTask.new("test:docs") do |t| 12 | t.libs << "test" 13 | t.pattern = "test/docs_test.rb" 14 | end 15 | 16 | platforms = [ 17 | "x86_64-linux", 18 | "x86_64-linux-musl", 19 | "aarch64-linux", 20 | "aarch64-linux-musl", 21 | "x86_64-darwin", 22 | "arm64-darwin", 23 | "x64-mingw-ucrt" 24 | ] 25 | 26 | gemspec = Bundler.load_gemspec("polars-df.gemspec") 27 | Rake::ExtensionTask.new("polars", gemspec) do |ext| 28 | ext.lib_dir = "lib/polars" 29 | ext.cross_compile = true 30 | ext.cross_platform = platforms 31 | ext.cross_compiling do |spec| 32 | spec.dependencies.reject! { |dep| dep.name == "rb_sys" } 33 | spec.files.reject! { |file| File.fnmatch?("ext/*", file, File::FNM_EXTGLOB) } 34 | end 35 | end 36 | 37 | task :remove_ext do 38 | path = "lib/polars/polars.bundle" 39 | File.unlink(path) if File.exist?(path) 40 | end 41 | 42 | Rake::Task["build"].enhance [:remove_ext] 43 | -------------------------------------------------------------------------------- /ext/polars/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "polars" 3 | version = "0.19.0" 4 | license = "MIT" 5 | authors = ["Andrew Kane "] 6 | edition = "2021" 7 | rust-version = "1.85.0" 8 | publish = false 9 | 10 | [lib] 11 | crate-type = ["cdylib"] 12 | 13 | [dependencies] 14 | ahash = "0.8" 15 | arrow = { package = "polars-arrow", version = "=0.48.1" } 16 | bytes = "1" 17 | chrono = "0.4" 18 | either = "1.8" 19 | magnus = "0.7" 20 | num-traits = "0.2" 21 | polars-core = "=0.48.1" 22 | polars-plan = "=0.48.1" 23 | polars-parquet = "=0.48.1" 24 | polars-utils = "=0.48.1" 25 | rayon = "1.9" 26 | regex = "1" 27 | serde_json = "1" 28 | 29 | [dependencies.polars] 30 | version = "=0.48.1" 31 | features = [ 32 | "abs", 33 | "approx_unique", 34 | "arg_where", 35 | "array_any_all", 36 | "array_count", 37 | "asof_join", 38 | "avro", 39 | "aws", 40 | "azure", 41 | "binary_encoding", 42 | "business", 43 | "cloud", 44 | "concat_str", 45 | "cov", 46 | "cross_join", 47 | "cse", 48 | "csv", 49 | "cum_agg", 50 | "cumulative_eval", 51 | "cutqcut", 52 | "dataframe_arithmetic", 53 | "diagonal_concat", 54 | "diff", 55 | "dot_product", 56 | "dtype-full", 57 | "dynamic_group_by", 58 | "ewma", 59 | "ewma_by", 60 | "extract_groups", 61 | "extract_jsonpath", 62 | "find_many", 63 | "fmt", 64 | "gcp", 65 | "http", 66 | "interpolate", 67 | "interpolate_by", 68 | "ipc", 69 | "ipc_streaming", 70 | "is_between", 71 | "is_first_distinct", 72 | "is_in", 73 | "is_last_distinct", 74 | "is_unique", 75 | "json", 76 | "lazy", 77 | "list_any_all", 78 | "list_count", 79 | "list_drop_nulls", 80 | "list_eval", 81 | "list_gather", 82 | "list_sample", 83 | "list_to_struct", 84 | "log", 85 | "merge_sorted", 86 | "meta", 87 | "mode", 88 | "moment", 89 | "month_start", 90 | "month_end", 91 | "offset_by", 92 | "object", 93 | "parquet", 94 | "partition_by", 95 | "pct_change", 96 | "peaks", 97 | "performant", 98 | "pivot", 99 | "product", 100 | "propagate_nans", 101 | "random", 102 | "rank", 103 | "range", 104 | "regex", 105 | "reinterpret", 106 | "repeat_by", 107 | "replace", 108 | "rle", 109 | "rolling_window", 110 | "rolling_window_by", 111 | "round_series", 112 | "row_hash", 113 | "search_sorted", 114 | "semi_anti_join", 115 | "serde-lazy", 116 | "sign", 117 | "sql", 118 | "string_encoding", 119 | "string_pad", 120 | "string_to_integer", 121 | "strings", 122 | "timezones", 123 | "to_dummies", 124 | "top_k", 125 | "trigonometry", 126 | "unique_counts", 127 | ] 128 | 129 | [target.'cfg(target_os = "linux")'.dependencies] 130 | jemallocator = { version = "0.5", features = ["disable_initial_exec_tls"] } 131 | 132 | [target.'cfg(not(any(target_os = "linux", target_os = "windows")))'.dependencies] 133 | mimalloc = { version = "0.1", default-features = false } 134 | -------------------------------------------------------------------------------- /ext/polars/extconf.rb: -------------------------------------------------------------------------------- 1 | require "mkmf" 2 | require "rb_sys/mkmf" 3 | 4 | create_rust_makefile("polars/polars") 5 | -------------------------------------------------------------------------------- /ext/polars/src/allocator.rs: -------------------------------------------------------------------------------- 1 | #[cfg(target_os = "linux")] 2 | use jemallocator::Jemalloc; 3 | 4 | #[cfg(not(any(target_os = "linux", target_os = "windows")))] 5 | use mimalloc::MiMalloc; 6 | 7 | #[global_allocator] 8 | #[cfg(target_os = "linux")] 9 | static ALLOC: Jemalloc = Jemalloc; 10 | 11 | #[global_allocator] 12 | #[cfg(not(any(target_os = "linux", target_os = "windows")))] 13 | static ALLOC: MiMalloc = MiMalloc; 14 | -------------------------------------------------------------------------------- /ext/polars/src/dataframe/export.rs: -------------------------------------------------------------------------------- 1 | use magnus::{prelude::*, IntoValue, RArray, Value}; 2 | 3 | use super::*; 4 | use crate::conversion::{ObjectValue, Wrap}; 5 | use crate::interop::arrow::to_ruby::dataframe_to_stream; 6 | use crate::RbResult; 7 | 8 | impl RbDataFrame { 9 | pub fn row_tuple(&self, idx: i64) -> Value { 10 | let idx = if idx < 0 { 11 | (self.df.borrow().height() as i64 + idx) as usize 12 | } else { 13 | idx as usize 14 | }; 15 | RArray::from_iter( 16 | self.df 17 | .borrow() 18 | .get_columns() 19 | .iter() 20 | .map(|s| match s.dtype() { 21 | DataType::Object(_) => { 22 | let obj: Option<&ObjectValue> = s.get_object(idx).map(|any| any.into()); 23 | obj.unwrap().to_value() 24 | } 25 | _ => Wrap(s.get(idx).unwrap()).into_value(), 26 | }), 27 | ) 28 | .as_value() 29 | } 30 | 31 | pub fn row_tuples(&self) -> Value { 32 | let df = &self.df; 33 | RArray::from_iter((0..df.borrow().height()).map(|idx| { 34 | RArray::from_iter( 35 | self.df 36 | .borrow() 37 | .get_columns() 38 | .iter() 39 | .map(|s| match s.dtype() { 40 | DataType::Object(_) => { 41 | let obj: Option<&ObjectValue> = s.get_object(idx).map(|any| any.into()); 42 | obj.unwrap().to_value() 43 | } 44 | _ => Wrap(s.get(idx).unwrap()).into_value(), 45 | }), 46 | ) 47 | })) 48 | .as_value() 49 | } 50 | 51 | pub fn __arrow_c_stream__(&self) -> RbResult { 52 | self.df.borrow_mut().align_chunks(); 53 | dataframe_to_stream(&self.df.borrow()) 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /ext/polars/src/dataframe/mod.rs: -------------------------------------------------------------------------------- 1 | mod construction; 2 | mod export; 3 | mod general; 4 | mod io; 5 | mod serde; 6 | 7 | use polars::prelude::*; 8 | use std::cell::RefCell; 9 | 10 | #[magnus::wrap(class = "Polars::RbDataFrame")] 11 | pub struct RbDataFrame { 12 | pub df: RefCell, 13 | } 14 | 15 | impl From for RbDataFrame { 16 | fn from(df: DataFrame) -> Self { 17 | RbDataFrame::new(df) 18 | } 19 | } 20 | 21 | impl RbDataFrame { 22 | pub fn new(df: DataFrame) -> Self { 23 | RbDataFrame { 24 | df: RefCell::new(df), 25 | } 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /ext/polars/src/dataframe/serde.rs: -------------------------------------------------------------------------------- 1 | use crate::exceptions::ComputeError; 2 | use crate::file::get_file_like; 3 | use crate::{RbDataFrame, RbResult}; 4 | use magnus::Value; 5 | use std::io::BufWriter; 6 | 7 | impl RbDataFrame { 8 | // TODO add to Ruby 9 | pub fn serialize_json(&self, rb_f: Value) -> RbResult<()> { 10 | let file = get_file_like(rb_f, true)?; 11 | let writer = BufWriter::new(file); 12 | serde_json::to_writer(writer, &self.df) 13 | .map_err(|err| ComputeError::new_err(err.to_string())) 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /ext/polars/src/error.rs: -------------------------------------------------------------------------------- 1 | use std::fmt::{Debug, Formatter}; 2 | 3 | use magnus::Error; 4 | use polars::prelude::PolarsError; 5 | 6 | use crate::exceptions::{ComputeError, InvalidOperationError}; 7 | use crate::rb_modules; 8 | 9 | pub enum RbPolarsErr { 10 | Polars(PolarsError), 11 | Other(String), 12 | } 13 | 14 | impl From for RbPolarsErr { 15 | fn from(err: PolarsError) -> Self { 16 | RbPolarsErr::Polars(err) 17 | } 18 | } 19 | 20 | impl From for RbPolarsErr { 21 | fn from(value: std::io::Error) -> Self { 22 | RbPolarsErr::Other(format!("{value:?}")) 23 | } 24 | } 25 | 26 | impl From for Error { 27 | fn from(err: RbPolarsErr) -> Self { 28 | match err { 29 | RbPolarsErr::Polars(err) => match err { 30 | PolarsError::ComputeError(err) => ComputeError::new_err(err.to_string()), 31 | PolarsError::InvalidOperation(err) => { 32 | InvalidOperationError::new_err(err.to_string()) 33 | } 34 | _ => Error::new(rb_modules::error(), err.to_string()), 35 | }, 36 | RbPolarsErr::Other(err) => Error::new(rb_modules::error(), err.to_string()), 37 | } 38 | } 39 | } 40 | 41 | impl Debug for RbPolarsErr { 42 | fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { 43 | use RbPolarsErr::*; 44 | match self { 45 | Polars(err) => write!(f, "{err:?}"), 46 | Other(err) => write!(f, "BindingsError: {err:?}"), 47 | } 48 | } 49 | } 50 | 51 | #[macro_export] 52 | macro_rules! raise_err( 53 | ($msg:expr, $err:ident) => {{ 54 | Err(PolarsError::$err($msg.into())).map_err(RbPolarsErr::from)?; 55 | unreachable!() 56 | }} 57 | ); 58 | -------------------------------------------------------------------------------- /ext/polars/src/exceptions.rs: -------------------------------------------------------------------------------- 1 | use crate::rb_modules; 2 | use magnus::{exception, Error}; 3 | use std::borrow::Cow; 4 | 5 | macro_rules! create_exception { 6 | ($type:ident, $cls:expr) => { 7 | pub struct $type {} 8 | 9 | impl $type { 10 | pub fn new_err(message: T) -> Error 11 | where 12 | T: Into>, 13 | { 14 | Error::new($cls, message) 15 | } 16 | } 17 | }; 18 | } 19 | 20 | create_exception!(RbTypeError, exception::type_error()); 21 | create_exception!(RbValueError, exception::arg_error()); 22 | create_exception!(RbOverflowError, exception::range_error()); 23 | create_exception!(ComputeError, rb_modules::compute_error()); 24 | create_exception!(InvalidOperationError, rb_modules::invalid_operation_error()); 25 | -------------------------------------------------------------------------------- /ext/polars/src/expr/array.rs: -------------------------------------------------------------------------------- 1 | use polars::prelude::*; 2 | 3 | use crate::RbExpr; 4 | 5 | impl RbExpr { 6 | pub fn array_max(&self) -> Self { 7 | self.inner.clone().arr().max().into() 8 | } 9 | 10 | pub fn array_min(&self) -> Self { 11 | self.inner.clone().arr().min().into() 12 | } 13 | 14 | pub fn array_sum(&self) -> Self { 15 | self.inner.clone().arr().sum().into() 16 | } 17 | 18 | pub fn arr_unique(&self, maintain_order: bool) -> Self { 19 | if maintain_order { 20 | self.inner.clone().arr().unique_stable().into() 21 | } else { 22 | self.inner.clone().arr().unique().into() 23 | } 24 | } 25 | 26 | pub fn arr_to_list(&self) -> Self { 27 | self.inner.clone().arr().to_list().into() 28 | } 29 | 30 | pub fn arr_all(&self) -> Self { 31 | self.inner.clone().arr().all().into() 32 | } 33 | 34 | pub fn arr_any(&self) -> Self { 35 | self.inner.clone().arr().any().into() 36 | } 37 | 38 | pub fn arr_sort(&self, descending: bool, nulls_last: bool) -> Self { 39 | self.inner 40 | .clone() 41 | .arr() 42 | .sort(SortOptions { 43 | descending, 44 | nulls_last, 45 | ..Default::default() 46 | }) 47 | .into() 48 | } 49 | 50 | pub fn arr_reverse(&self) -> Self { 51 | self.inner.clone().arr().reverse().into() 52 | } 53 | 54 | pub fn arr_arg_min(&self) -> Self { 55 | self.inner.clone().arr().arg_min().into() 56 | } 57 | 58 | pub fn arr_arg_max(&self) -> Self { 59 | self.inner.clone().arr().arg_max().into() 60 | } 61 | 62 | pub fn arr_get(&self, index: &RbExpr, null_on_oob: bool) -> Self { 63 | self.inner 64 | .clone() 65 | .arr() 66 | .get(index.inner.clone(), null_on_oob) 67 | .into() 68 | } 69 | 70 | pub fn arr_join(&self, separator: &RbExpr, ignore_nulls: bool) -> Self { 71 | self.inner 72 | .clone() 73 | .arr() 74 | .join(separator.inner.clone(), ignore_nulls) 75 | .into() 76 | } 77 | 78 | pub fn arr_contains(&self, other: &RbExpr, nulls_equal: bool) -> Self { 79 | self.inner 80 | .clone() 81 | .arr() 82 | .contains(other.inner.clone(), nulls_equal) 83 | .into() 84 | } 85 | 86 | pub fn arr_count_matches(&self, expr: &RbExpr) -> Self { 87 | self.inner 88 | .clone() 89 | .arr() 90 | .count_matches(expr.inner.clone()) 91 | .into() 92 | } 93 | } 94 | -------------------------------------------------------------------------------- /ext/polars/src/expr/binary.rs: -------------------------------------------------------------------------------- 1 | use crate::RbExpr; 2 | 3 | impl RbExpr { 4 | pub fn bin_contains(&self, lit: &RbExpr) -> Self { 5 | self.inner 6 | .clone() 7 | .binary() 8 | .contains_literal(lit.inner.clone()) 9 | .into() 10 | } 11 | 12 | pub fn bin_ends_with(&self, sub: &RbExpr) -> Self { 13 | self.inner 14 | .clone() 15 | .binary() 16 | .ends_with(sub.inner.clone()) 17 | .into() 18 | } 19 | 20 | pub fn bin_starts_with(&self, sub: &RbExpr) -> Self { 21 | self.inner 22 | .clone() 23 | .binary() 24 | .starts_with(sub.inner.clone()) 25 | .into() 26 | } 27 | 28 | pub fn bin_hex_decode(&self, strict: bool) -> Self { 29 | self.inner.clone().binary().hex_decode(strict).into() 30 | } 31 | 32 | pub fn bin_base64_decode(&self, strict: bool) -> Self { 33 | self.inner.clone().binary().base64_decode(strict).into() 34 | } 35 | 36 | pub fn bin_hex_encode(&self) -> Self { 37 | self.inner.clone().binary().hex_encode().into() 38 | } 39 | 40 | pub fn bin_base64_encode(&self) -> Self { 41 | self.inner.clone().binary().base64_encode().into() 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /ext/polars/src/expr/categorical.rs: -------------------------------------------------------------------------------- 1 | use crate::RbExpr; 2 | 3 | impl RbExpr { 4 | pub fn cat_get_categories(&self) -> Self { 5 | self.inner.clone().cat().get_categories().into() 6 | } 7 | } 8 | -------------------------------------------------------------------------------- /ext/polars/src/expr/meta.rs: -------------------------------------------------------------------------------- 1 | use magnus::RArray; 2 | 3 | use crate::{RbExpr, RbPolarsErr, RbResult}; 4 | 5 | impl RbExpr { 6 | pub fn meta_eq(&self, other: &RbExpr) -> bool { 7 | self.inner == other.inner 8 | } 9 | 10 | pub fn meta_pop(&self) -> RbResult { 11 | let exprs = self.inner.clone().meta().pop().map_err(RbPolarsErr::from)?; 12 | Ok(RArray::from_iter( 13 | exprs.iter().map(|e| RbExpr::from(e.clone())), 14 | )) 15 | } 16 | 17 | pub fn meta_root_names(&self) -> Vec { 18 | self.inner 19 | .clone() 20 | .meta() 21 | .root_names() 22 | .iter() 23 | .map(|name| name.to_string()) 24 | .collect() 25 | } 26 | 27 | pub fn meta_output_name(&self) -> RbResult { 28 | let name = self 29 | .inner 30 | .clone() 31 | .meta() 32 | .output_name() 33 | .map_err(RbPolarsErr::from)?; 34 | Ok(name.to_string()) 35 | } 36 | 37 | pub fn meta_undo_aliases(&self) -> RbExpr { 38 | self.inner.clone().meta().undo_aliases().into() 39 | } 40 | 41 | pub fn meta_has_multiple_outputs(&self) -> bool { 42 | self.inner.clone().meta().has_multiple_outputs() 43 | } 44 | 45 | pub fn meta_is_column(&self) -> bool { 46 | self.inner.clone().meta().is_column() 47 | } 48 | 49 | pub fn meta_is_regex_projection(&self) -> bool { 50 | self.inner.clone().meta().is_regex_projection() 51 | } 52 | 53 | pub fn _meta_selector_add(&self, other: &RbExpr) -> RbResult { 54 | let out = self 55 | .inner 56 | .clone() 57 | .meta() 58 | ._selector_add(other.inner.clone()) 59 | .map_err(RbPolarsErr::from)?; 60 | Ok(out.into()) 61 | } 62 | 63 | pub fn _meta_selector_sub(&self, other: &RbExpr) -> RbResult { 64 | let out = self 65 | .inner 66 | .clone() 67 | .meta() 68 | ._selector_sub(other.inner.clone()) 69 | .map_err(RbPolarsErr::from)?; 70 | Ok(out.into()) 71 | } 72 | 73 | pub fn _meta_selector_and(&self, other: &RbExpr) -> RbResult { 74 | let out = self 75 | .inner 76 | .clone() 77 | .meta() 78 | ._selector_and(other.inner.clone()) 79 | .map_err(RbPolarsErr::from)?; 80 | Ok(out.into()) 81 | } 82 | 83 | pub fn _meta_as_selector(&self) -> RbExpr { 84 | self.inner.clone().meta()._into_selector().into() 85 | } 86 | 87 | fn compute_tree_format(&self, display_as_dot: bool) -> RbResult { 88 | let e = self 89 | .inner 90 | .clone() 91 | .meta() 92 | .into_tree_formatter(display_as_dot) 93 | .map_err(RbPolarsErr::from)?; 94 | Ok(format!("{e}")) 95 | } 96 | 97 | pub fn meta_tree_format(&self) -> RbResult { 98 | self.compute_tree_format(false) 99 | } 100 | } 101 | -------------------------------------------------------------------------------- /ext/polars/src/expr/mod.rs: -------------------------------------------------------------------------------- 1 | mod array; 2 | mod binary; 3 | mod categorical; 4 | mod datetime; 5 | mod general; 6 | mod list; 7 | mod meta; 8 | mod name; 9 | mod rolling; 10 | mod string; 11 | mod r#struct; 12 | 13 | use magnus::{prelude::*, RArray}; 14 | use polars::lazy::dsl::Expr; 15 | 16 | use crate::RbResult; 17 | 18 | #[magnus::wrap(class = "Polars::RbExpr")] 19 | #[derive(Clone)] 20 | pub struct RbExpr { 21 | pub inner: Expr, 22 | } 23 | 24 | impl From for RbExpr { 25 | fn from(inner: Expr) -> Self { 26 | RbExpr { inner } 27 | } 28 | } 29 | 30 | pub fn rb_exprs_to_exprs(rb_exprs: RArray) -> RbResult> { 31 | let mut exprs = Vec::new(); 32 | for item in rb_exprs.into_iter() { 33 | exprs.push(<&RbExpr>::try_convert(item)?.inner.clone()); 34 | } 35 | Ok(exprs) 36 | } 37 | -------------------------------------------------------------------------------- /ext/polars/src/expr/name.rs: -------------------------------------------------------------------------------- 1 | use magnus::{block::Proc, value::Opaque, Ruby}; 2 | use polars::prelude::*; 3 | use polars_utils::format_pl_smallstr; 4 | 5 | use crate::RbExpr; 6 | 7 | impl RbExpr { 8 | pub fn name_keep(&self) -> Self { 9 | self.inner.clone().name().keep().into() 10 | } 11 | 12 | pub fn name_map(&self, lambda: Proc) -> Self { 13 | let lambda = Opaque::from(lambda); 14 | self.inner 15 | .clone() 16 | .name() 17 | .map(move |name| { 18 | let lambda = Ruby::get().unwrap().get_inner(lambda); 19 | let out = lambda.call::<_, String>((name.as_str(),)); 20 | match out { 21 | Ok(out) => Ok(format_pl_smallstr!("{}", out)), 22 | Err(e) => Err(PolarsError::ComputeError( 23 | format!("Ruby function in 'name.map' produced an error: {}.", e).into(), 24 | )), 25 | } 26 | }) 27 | .into() 28 | } 29 | 30 | pub fn name_prefix(&self, prefix: String) -> Self { 31 | self.inner.clone().name().prefix(&prefix).into() 32 | } 33 | 34 | pub fn name_suffix(&self, suffix: String) -> Self { 35 | self.inner.clone().name().suffix(&suffix).into() 36 | } 37 | 38 | pub fn name_to_lowercase(&self) -> Self { 39 | self.inner.clone().name().to_lowercase().into() 40 | } 41 | 42 | pub fn name_to_uppercase(&self) -> Self { 43 | self.inner.clone().name().to_uppercase().into() 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /ext/polars/src/expr/struct.rs: -------------------------------------------------------------------------------- 1 | use crate::RbExpr; 2 | 3 | impl RbExpr { 4 | pub fn struct_field_by_index(&self, index: i64) -> Self { 5 | self.inner.clone().struct_().field_by_index(index).into() 6 | } 7 | 8 | pub fn struct_field_by_name(&self, name: String) -> Self { 9 | self.inner.clone().struct_().field_by_name(&name).into() 10 | } 11 | 12 | pub fn struct_rename_fields(&self, names: Vec) -> Self { 13 | self.inner.clone().struct_().rename_fields(names).into() 14 | } 15 | 16 | pub fn struct_json_encode(&self) -> Self { 17 | self.inner.clone().struct_().json_encode().into() 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /ext/polars/src/functions/aggregation.rs: -------------------------------------------------------------------------------- 1 | use magnus::RArray; 2 | use polars::lazy::dsl; 3 | 4 | use crate::rb_exprs_to_exprs; 5 | use crate::{RbExpr, RbPolarsErr, RbResult}; 6 | 7 | pub fn all_horizontal(exprs: RArray) -> RbResult { 8 | let exprs = rb_exprs_to_exprs(exprs)?; 9 | let e = dsl::all_horizontal(exprs).map_err(RbPolarsErr::from)?; 10 | Ok(e.into()) 11 | } 12 | 13 | pub fn any_horizontal(exprs: RArray) -> RbResult { 14 | let exprs = rb_exprs_to_exprs(exprs)?; 15 | let e = dsl::any_horizontal(exprs).map_err(RbPolarsErr::from)?; 16 | Ok(e.into()) 17 | } 18 | 19 | pub fn max_horizontal(exprs: RArray) -> RbResult { 20 | let exprs = rb_exprs_to_exprs(exprs)?; 21 | let e = dsl::max_horizontal(exprs).map_err(RbPolarsErr::from)?; 22 | Ok(e.into()) 23 | } 24 | 25 | pub fn min_horizontal(exprs: RArray) -> RbResult { 26 | let exprs = rb_exprs_to_exprs(exprs)?; 27 | let e = dsl::min_horizontal(exprs).map_err(RbPolarsErr::from)?; 28 | Ok(e.into()) 29 | } 30 | 31 | pub fn sum_horizontal(exprs: RArray, ignore_nulls: bool) -> RbResult { 32 | let exprs = rb_exprs_to_exprs(exprs)?; 33 | let e = dsl::sum_horizontal(exprs, ignore_nulls).map_err(RbPolarsErr::from)?; 34 | Ok(e.into()) 35 | } 36 | 37 | pub fn mean_horizontal(exprs: RArray, ignore_nulls: bool) -> RbResult { 38 | let exprs = rb_exprs_to_exprs(exprs)?; 39 | let e = dsl::mean_horizontal(exprs, ignore_nulls).map_err(RbPolarsErr::from)?; 40 | Ok(e.into()) 41 | } 42 | -------------------------------------------------------------------------------- /ext/polars/src/functions/business.rs: -------------------------------------------------------------------------------- 1 | use polars::lazy::dsl; 2 | 3 | use crate::RbExpr; 4 | 5 | // TODO add to Ruby 6 | pub fn business_day_count( 7 | start: &RbExpr, 8 | end: &RbExpr, 9 | week_mask: [bool; 7], 10 | holidays: Vec, 11 | ) -> RbExpr { 12 | let start = start.inner.clone(); 13 | let end = end.inner.clone(); 14 | dsl::business_day_count(start, end, week_mask, holidays).into() 15 | } 16 | -------------------------------------------------------------------------------- /ext/polars/src/functions/eager.rs: -------------------------------------------------------------------------------- 1 | use magnus::RArray; 2 | use polars::functions; 3 | use polars_core::prelude::DataFrame; 4 | 5 | use crate::conversion::{get_df, get_series}; 6 | use crate::error::RbPolarsErr; 7 | use crate::{RbDataFrame, RbResult, RbSeries}; 8 | 9 | pub fn concat_df(seq: RArray) -> RbResult { 10 | use polars_core::error::PolarsResult; 11 | 12 | let mut iter = seq.into_iter(); 13 | let first = iter.next().unwrap(); 14 | 15 | let first_rdf = get_df(first)?; 16 | let identity_df = first_rdf.slice(0, 0); 17 | 18 | let mut rdfs: Vec> = vec![Ok(first_rdf)]; 19 | 20 | for item in iter { 21 | let rdf = get_df(item)?; 22 | rdfs.push(Ok(rdf)); 23 | } 24 | 25 | let identity = Ok(identity_df); 26 | 27 | let df = rdfs 28 | .into_iter() 29 | .fold(identity, |acc: PolarsResult, df| { 30 | let mut acc = acc?; 31 | acc.vstack_mut(&df?)?; 32 | Ok(acc) 33 | }) 34 | .map_err(RbPolarsErr::from)?; 35 | 36 | Ok(df.into()) 37 | } 38 | 39 | pub fn concat_series(seq: RArray) -> RbResult { 40 | let mut iter = seq.into_iter(); 41 | let first = iter.next().unwrap(); 42 | 43 | let mut s = get_series(first)?; 44 | 45 | for res in iter { 46 | let item = res; 47 | let item = get_series(item)?; 48 | s.append(&item).map_err(RbPolarsErr::from)?; 49 | } 50 | Ok(s.into()) 51 | } 52 | 53 | pub fn concat_df_diagonal(seq: RArray) -> RbResult { 54 | let mut dfs = Vec::new(); 55 | for item in seq.into_iter() { 56 | dfs.push(get_df(item)?); 57 | } 58 | let df = functions::concat_df_diagonal(&dfs).map_err(RbPolarsErr::from)?; 59 | Ok(df.into()) 60 | } 61 | 62 | pub fn concat_df_horizontal(seq: RArray) -> RbResult { 63 | let mut dfs = Vec::new(); 64 | for item in seq.into_iter() { 65 | dfs.push(get_df(item)?); 66 | } 67 | let df = functions::concat_df_horizontal(&dfs, true).map_err(RbPolarsErr::from)?; 68 | Ok(df.into()) 69 | } 70 | -------------------------------------------------------------------------------- /ext/polars/src/functions/io.rs: -------------------------------------------------------------------------------- 1 | use std::io::BufReader; 2 | 3 | use arrow::array::Utf8ViewArray; 4 | use magnus::{RHash, Value}; 5 | use polars::prelude::ArrowSchema; 6 | use polars_core::datatypes::create_enum_dtype; 7 | 8 | use crate::conversion::Wrap; 9 | use crate::file::{get_either_file, EitherRustRubyFile}; 10 | use crate::prelude::ArrowDataType; 11 | use crate::{RbPolarsErr, RbResult}; 12 | 13 | pub fn read_ipc_schema(rb_f: Value) -> RbResult { 14 | use arrow::io::ipc::read::read_file_metadata; 15 | let metadata = match get_either_file(rb_f, false)? { 16 | EitherRustRubyFile::Rust(r) => { 17 | read_file_metadata(&mut BufReader::new(r)).map_err(RbPolarsErr::from)? 18 | } 19 | EitherRustRubyFile::Rb(mut r) => read_file_metadata(&mut r).map_err(RbPolarsErr::from)?, 20 | }; 21 | 22 | let dict = RHash::new(); 23 | fields_to_rbdict(&metadata.schema, &dict)?; 24 | Ok(dict) 25 | } 26 | 27 | pub fn read_parquet_schema(rb_f: Value) -> RbResult { 28 | use polars_parquet::read::{infer_schema, read_metadata}; 29 | 30 | let metadata = match get_either_file(rb_f, false)? { 31 | EitherRustRubyFile::Rust(r) => { 32 | read_metadata(&mut BufReader::new(r)).map_err(RbPolarsErr::from)? 33 | } 34 | EitherRustRubyFile::Rb(mut r) => read_metadata(&mut r).map_err(RbPolarsErr::from)?, 35 | }; 36 | let arrow_schema = infer_schema(&metadata).map_err(RbPolarsErr::from)?; 37 | 38 | let dict = RHash::new(); 39 | fields_to_rbdict(&arrow_schema, &dict)?; 40 | Ok(dict) 41 | } 42 | 43 | fn fields_to_rbdict(schema: &ArrowSchema, dict: &RHash) -> RbResult<()> { 44 | for field in schema.iter_values() { 45 | let dt = if field.is_enum() { 46 | Wrap(create_enum_dtype(Utf8ViewArray::new_empty( 47 | ArrowDataType::Utf8View, 48 | ))) 49 | } else { 50 | Wrap(polars::prelude::DataType::from_arrow_field(field)) 51 | }; 52 | dict.aset(field.name.as_str(), dt)?; 53 | } 54 | Ok(()) 55 | } 56 | -------------------------------------------------------------------------------- /ext/polars/src/functions/meta.rs: -------------------------------------------------------------------------------- 1 | use magnus::{IntoValue, Value}; 2 | use polars_core; 3 | use polars_core::fmt::FloatFmt; 4 | use polars_core::prelude::IDX_DTYPE; 5 | use polars_core::POOL; 6 | 7 | use crate::conversion::Wrap; 8 | use crate::{RbResult, RbValueError}; 9 | 10 | pub fn get_index_type() -> Value { 11 | Wrap(IDX_DTYPE).into_value() 12 | } 13 | 14 | pub fn thread_pool_size() -> usize { 15 | POOL.current_num_threads() 16 | } 17 | 18 | pub fn set_float_fmt(fmt: String) -> RbResult<()> { 19 | let fmt = match fmt.as_str() { 20 | "full" => FloatFmt::Full, 21 | "mixed" => FloatFmt::Mixed, 22 | e => { 23 | return Err(RbValueError::new_err(format!( 24 | "fmt must be one of {{'full', 'mixed'}}, got {e}", 25 | ))) 26 | } 27 | }; 28 | polars_core::fmt::set_float_fmt(fmt); 29 | Ok(()) 30 | } 31 | 32 | pub fn get_float_fmt() -> RbResult { 33 | let strfmt = match polars_core::fmt::get_float_fmt() { 34 | FloatFmt::Full => "full", 35 | FloatFmt::Mixed => "mixed", 36 | }; 37 | Ok(strfmt.to_string()) 38 | } 39 | 40 | pub fn set_float_precision(precision: Option) -> RbResult<()> { 41 | use polars_core::fmt::set_float_precision; 42 | set_float_precision(precision); 43 | Ok(()) 44 | } 45 | 46 | pub fn get_float_precision() -> RbResult> { 47 | use polars_core::fmt::get_float_precision; 48 | Ok(get_float_precision()) 49 | } 50 | 51 | pub fn set_thousands_separator(sep: Option) -> RbResult<()> { 52 | use polars_core::fmt::set_thousands_separator; 53 | set_thousands_separator(sep); 54 | Ok(()) 55 | } 56 | 57 | pub fn get_thousands_separator() -> RbResult> { 58 | use polars_core::fmt::get_thousands_separator; 59 | Ok(Some(get_thousands_separator())) 60 | } 61 | 62 | pub fn set_decimal_separator(sep: Option) -> RbResult<()> { 63 | use polars_core::fmt::set_decimal_separator; 64 | set_decimal_separator(sep); 65 | Ok(()) 66 | } 67 | 68 | pub fn get_decimal_separator() -> RbResult> { 69 | use polars_core::fmt::get_decimal_separator; 70 | Ok(Some(get_decimal_separator())) 71 | } 72 | 73 | pub fn set_trim_decimal_zeros(trim: Option) -> RbResult<()> { 74 | use polars_core::fmt::set_trim_decimal_zeros; 75 | set_trim_decimal_zeros(trim); 76 | Ok(()) 77 | } 78 | 79 | pub fn get_trim_decimal_zeros() -> RbResult> { 80 | use polars_core::fmt::get_trim_decimal_zeros; 81 | Ok(Some(get_trim_decimal_zeros())) 82 | } 83 | -------------------------------------------------------------------------------- /ext/polars/src/functions/misc.rs: -------------------------------------------------------------------------------- 1 | use crate::conversion::Wrap; 2 | use crate::prelude::DataType; 3 | use crate::RbResult; 4 | 5 | pub fn dtype_str_repr(dtype: Wrap) -> RbResult { 6 | let dtype = dtype.0; 7 | Ok(dtype.to_string()) 8 | } 9 | -------------------------------------------------------------------------------- /ext/polars/src/functions/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod aggregation; 2 | pub mod business; 3 | pub mod eager; 4 | pub mod io; 5 | pub mod lazy; 6 | pub mod meta; 7 | pub mod misc; 8 | pub mod random; 9 | pub mod range; 10 | pub mod string_cache; 11 | pub mod whenthen; 12 | -------------------------------------------------------------------------------- /ext/polars/src/functions/random.rs: -------------------------------------------------------------------------------- 1 | use crate::RbResult; 2 | 3 | pub fn set_random_seed(seed: u64) -> RbResult<()> { 4 | polars_core::random::set_global_random_seed(seed); 5 | Ok(()) 6 | } 7 | -------------------------------------------------------------------------------- /ext/polars/src/functions/range.rs: -------------------------------------------------------------------------------- 1 | use polars::lazy::dsl; 2 | use polars_core::datatypes::{TimeUnit, TimeZone}; 3 | 4 | use crate::conversion::Wrap; 5 | use crate::prelude::*; 6 | use crate::RbExpr; 7 | 8 | pub fn int_range(start: &RbExpr, end: &RbExpr, step: i64, dtype: Wrap) -> RbExpr { 9 | let start = start.inner.clone(); 10 | let end = end.inner.clone(); 11 | let dtype = dtype.0; 12 | dsl::int_range(start, end, step, dtype).into() 13 | } 14 | 15 | pub fn int_ranges(start: &RbExpr, end: &RbExpr, step: &RbExpr, dtype: Wrap) -> RbExpr { 16 | let dtype = dtype.0; 17 | 18 | let mut result = dsl::int_ranges(start.inner.clone(), end.inner.clone(), step.inner.clone()); 19 | 20 | if dtype != DataType::Int64 { 21 | result = result.cast(DataType::List(Box::new(dtype))) 22 | } 23 | 24 | result.into() 25 | } 26 | 27 | pub fn date_range( 28 | start: &RbExpr, 29 | end: &RbExpr, 30 | interval: String, 31 | closed: Wrap, 32 | ) -> RbExpr { 33 | let start = start.inner.clone(); 34 | let end = end.inner.clone(); 35 | let interval = Duration::parse(&interval); 36 | let closed = closed.0; 37 | dsl::date_range(start, end, interval, closed).into() 38 | } 39 | 40 | pub fn date_ranges( 41 | start: &RbExpr, 42 | end: &RbExpr, 43 | interval: String, 44 | closed: Wrap, 45 | ) -> RbExpr { 46 | let start = start.inner.clone(); 47 | let end = end.inner.clone(); 48 | let interval = Duration::parse(&interval); 49 | let closed = closed.0; 50 | dsl::date_ranges(start, end, interval, closed).into() 51 | } 52 | 53 | pub fn datetime_range( 54 | start: &RbExpr, 55 | end: &RbExpr, 56 | every: String, 57 | closed: Wrap, 58 | time_unit: Option>, 59 | time_zone: Wrap>, 60 | ) -> RbExpr { 61 | let start = start.inner.clone(); 62 | let end = end.inner.clone(); 63 | let every = Duration::parse(&every); 64 | let closed = closed.0; 65 | let time_unit = time_unit.map(|x| x.0); 66 | let time_zone = time_zone.0; 67 | dsl::datetime_range(start, end, every, closed, time_unit, time_zone).into() 68 | } 69 | 70 | pub fn datetime_ranges( 71 | start: &RbExpr, 72 | end: &RbExpr, 73 | every: String, 74 | closed: Wrap, 75 | time_unit: Option>, 76 | time_zone: Wrap>, 77 | ) -> RbExpr { 78 | let start = start.inner.clone(); 79 | let end = end.inner.clone(); 80 | let every = Duration::parse(&every); 81 | let closed = closed.0; 82 | let time_unit = time_unit.map(|x| x.0); 83 | let time_zone = time_zone.0; 84 | dsl::datetime_ranges(start, end, every, closed, time_unit, time_zone).into() 85 | } 86 | 87 | pub fn time_range( 88 | start: &RbExpr, 89 | end: &RbExpr, 90 | every: String, 91 | closed: Wrap, 92 | ) -> RbExpr { 93 | let start = start.inner.clone(); 94 | let end = end.inner.clone(); 95 | let every = Duration::parse(&every); 96 | let closed = closed.0; 97 | dsl::time_range(start, end, every, closed).into() 98 | } 99 | 100 | pub fn time_ranges( 101 | start: &RbExpr, 102 | end: &RbExpr, 103 | every: String, 104 | closed: Wrap, 105 | ) -> RbExpr { 106 | let start = start.inner.clone(); 107 | let end = end.inner.clone(); 108 | let every = Duration::parse(&every); 109 | let closed = closed.0; 110 | dsl::time_ranges(start, end, every, closed).into() 111 | } 112 | -------------------------------------------------------------------------------- /ext/polars/src/functions/string_cache.rs: -------------------------------------------------------------------------------- 1 | use crate::RbResult; 2 | use magnus::{RArray, Ruby, Value}; 3 | use polars_core::StringCacheHolder; 4 | 5 | pub fn enable_string_cache() { 6 | polars_core::enable_string_cache() 7 | } 8 | 9 | pub fn disable_string_cache() { 10 | polars_core::disable_string_cache() 11 | } 12 | 13 | pub fn using_string_cache() -> bool { 14 | polars_core::using_string_cache() 15 | } 16 | 17 | #[magnus::wrap(class = "Polars::RbStringCacheHolder")] 18 | pub struct RbStringCacheHolder {} 19 | 20 | impl RbStringCacheHolder { 21 | pub fn hold() -> RbResult { 22 | let _hold = StringCacheHolder::hold(); 23 | Ruby::get().unwrap().yield_splat(RArray::new()) 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /ext/polars/src/functions/whenthen.rs: -------------------------------------------------------------------------------- 1 | use polars::lazy::dsl; 2 | 3 | use crate::RbExpr; 4 | 5 | pub fn when(condition: &RbExpr) -> RbWhen { 6 | RbWhen { 7 | inner: dsl::when(condition.inner.clone()), 8 | } 9 | } 10 | 11 | #[magnus::wrap(class = "Polars::RbWhen")] 12 | #[derive(Clone)] 13 | pub struct RbWhen { 14 | pub inner: dsl::When, 15 | } 16 | 17 | #[magnus::wrap(class = "Polars::RbThen")] 18 | #[derive(Clone)] 19 | pub struct RbThen { 20 | pub inner: dsl::Then, 21 | } 22 | 23 | #[magnus::wrap(class = "Polars::RbChainedWhen")] 24 | #[derive(Clone)] 25 | pub struct RbChainedWhen { 26 | pub inner: dsl::ChainedWhen, 27 | } 28 | 29 | #[magnus::wrap(class = "Polars::RbChainedThen")] 30 | #[derive(Clone)] 31 | pub struct RbChainedThen { 32 | pub inner: dsl::ChainedThen, 33 | } 34 | 35 | impl RbWhen { 36 | pub fn then(&self, statement: &RbExpr) -> RbThen { 37 | RbThen { 38 | inner: self.inner.clone().then(statement.inner.clone()), 39 | } 40 | } 41 | } 42 | 43 | impl RbThen { 44 | pub fn when(&self, condition: &RbExpr) -> RbChainedWhen { 45 | RbChainedWhen { 46 | inner: self.inner.clone().when(condition.inner.clone()), 47 | } 48 | } 49 | 50 | pub fn otherwise(&self, statement: &RbExpr) -> RbExpr { 51 | self.inner.clone().otherwise(statement.inner.clone()).into() 52 | } 53 | } 54 | 55 | impl RbChainedWhen { 56 | pub fn then(&self, statement: &RbExpr) -> RbChainedThen { 57 | RbChainedThen { 58 | inner: self.inner.clone().then(statement.inner.clone()), 59 | } 60 | } 61 | } 62 | 63 | impl RbChainedThen { 64 | pub fn when(&self, condition: &RbExpr) -> RbChainedWhen { 65 | RbChainedWhen { 66 | inner: self.inner.clone().when(condition.inner.clone()), 67 | } 68 | } 69 | 70 | pub fn otherwise(&self, statement: &RbExpr) -> RbExpr { 71 | self.inner.clone().otherwise(statement.inner.clone()).into() 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /ext/polars/src/interop/arrow/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod to_ruby; 2 | -------------------------------------------------------------------------------- /ext/polars/src/interop/arrow/to_ruby.rs: -------------------------------------------------------------------------------- 1 | use arrow::datatypes::ArrowDataType; 2 | use arrow::ffi; 3 | use magnus::{IntoValue, Value}; 4 | use polars::datatypes::CompatLevel; 5 | use polars::frame::DataFrame; 6 | use polars::prelude::{ArrayRef, ArrowField, PlSmallStr, PolarsResult, SchemaExt}; 7 | use polars::series::Series; 8 | use polars_core::utils::arrow; 9 | 10 | use crate::RbResult; 11 | 12 | #[magnus::wrap(class = "Polars::ArrowArrayStream")] 13 | pub struct RbArrowArrayStream { 14 | stream: ffi::ArrowArrayStream, 15 | } 16 | 17 | impl RbArrowArrayStream { 18 | pub fn to_i(&self) -> usize { 19 | (&self.stream as *const _) as usize 20 | } 21 | } 22 | 23 | pub(crate) fn dataframe_to_stream(df: &DataFrame) -> RbResult { 24 | let iter = Box::new(DataFrameStreamIterator::new(df)); 25 | let field = iter.field(); 26 | let stream = ffi::export_iterator(iter, field); 27 | Ok(RbArrowArrayStream { stream }.into_value()) 28 | } 29 | 30 | pub struct DataFrameStreamIterator { 31 | columns: Vec, 32 | dtype: ArrowDataType, 33 | idx: usize, 34 | n_chunks: usize, 35 | } 36 | 37 | impl DataFrameStreamIterator { 38 | fn new(df: &DataFrame) -> Self { 39 | let schema = df.schema().to_arrow(CompatLevel::newest()); 40 | let dtype = ArrowDataType::Struct(schema.into_iter_values().collect()); 41 | 42 | Self { 43 | columns: df 44 | .get_columns() 45 | .iter() 46 | .map(|v| v.as_materialized_series().clone()) 47 | .collect(), 48 | dtype, 49 | idx: 0, 50 | n_chunks: df.first_col_n_chunks(), 51 | } 52 | } 53 | 54 | fn field(&self) -> ArrowField { 55 | ArrowField::new(PlSmallStr::EMPTY, self.dtype.clone(), false) 56 | } 57 | } 58 | 59 | impl Iterator for DataFrameStreamIterator { 60 | type Item = PolarsResult; 61 | 62 | fn next(&mut self) -> Option { 63 | if self.idx >= self.n_chunks { 64 | None 65 | } else { 66 | // create a batch of the columns with the same chunk no. 67 | let batch_cols = self 68 | .columns 69 | .iter() 70 | .map(|s| s.to_arrow(self.idx, CompatLevel::newest())) 71 | .collect::>(); 72 | self.idx += 1; 73 | 74 | let array = arrow::array::StructArray::new( 75 | self.dtype.clone(), 76 | batch_cols[0].len(), 77 | batch_cols, 78 | None, 79 | ); 80 | Some(Ok(Box::new(array))) 81 | } 82 | } 83 | } 84 | -------------------------------------------------------------------------------- /ext/polars/src/interop/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod arrow; 2 | pub mod numo; 3 | -------------------------------------------------------------------------------- /ext/polars/src/interop/numo/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod numo_rs; 2 | pub mod to_numo_df; 3 | pub mod to_numo_series; 4 | -------------------------------------------------------------------------------- /ext/polars/src/interop/numo/numo_rs.rs: -------------------------------------------------------------------------------- 1 | use magnus::{class, prelude::*, IntoValue, Module, RArray, RClass, RModule, Value}; 2 | 3 | use crate::RbResult; 4 | 5 | pub trait Element: IntoValue { 6 | fn class_name() -> &'static str; 7 | } 8 | 9 | macro_rules! create_element { 10 | ($type:ty, $name:expr) => { 11 | impl Element for $type { 12 | fn class_name() -> &'static str { 13 | $name 14 | } 15 | } 16 | }; 17 | } 18 | 19 | create_element!(i8, "Int8"); 20 | create_element!(i16, "Int16"); 21 | create_element!(i32, "Int32"); 22 | create_element!(i64, "Int64"); 23 | create_element!(u8, "UInt8"); 24 | create_element!(u16, "UInt16"); 25 | create_element!(u32, "UInt32"); 26 | create_element!(u64, "UInt64"); 27 | create_element!(f32, "SFloat"); 28 | create_element!(f64, "DFloat"); 29 | create_element!(bool, "Bit"); 30 | 31 | impl Element for Option 32 | where 33 | Option: IntoValue, 34 | { 35 | fn class_name() -> &'static str { 36 | "RObject" 37 | } 38 | } 39 | 40 | pub struct RbArray1(T); 41 | 42 | impl RbArray1 { 43 | pub fn from_iter(values: I) -> RbResult 44 | where 45 | I: IntoIterator, 46 | { 47 | class::object() 48 | .const_get::<_, RModule>("Numo")? 49 | .const_get::<_, RClass>(T::class_name())? 50 | .funcall("cast", (RArray::from_iter(values),)) 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /ext/polars/src/interop/numo/to_numo_df.rs: -------------------------------------------------------------------------------- 1 | use magnus::Value; 2 | use polars_core::utils::try_get_supertype; 3 | 4 | use crate::dataframe::RbDataFrame; 5 | 6 | impl RbDataFrame { 7 | pub fn to_numo(&self) -> Option { 8 | let mut st = None; 9 | for s in self.df.borrow().iter() { 10 | let dt_i = s.dtype(); 11 | match st { 12 | None => st = Some(dt_i.clone()), 13 | Some(ref mut st) => { 14 | *st = try_get_supertype(st, dt_i).ok()?; 15 | } 16 | } 17 | } 18 | let _st = st?; 19 | 20 | // TODO 21 | None 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /ext/polars/src/interop/numo/to_numo_series.rs: -------------------------------------------------------------------------------- 1 | use magnus::Value; 2 | use num_traits::{Float, NumCast}; 3 | use polars_core::prelude::*; 4 | 5 | use super::numo_rs::{Element, RbArray1}; 6 | use crate::error::RbPolarsErr; 7 | use crate::raise_err; 8 | use crate::series::RbSeries; 9 | use crate::RbResult; 10 | 11 | impl RbSeries { 12 | /// Convert this Series to a Numo array. 13 | pub fn to_numo(&self) -> RbResult { 14 | series_to_numo(&self.series.borrow()) 15 | } 16 | } 17 | 18 | /// Convert a Series to a Numo array. 19 | fn series_to_numo(s: &Series) -> RbResult { 20 | series_to_numo_with_copy(s) 21 | } 22 | 23 | /// Convert a Series to a Numo array, copying data in the process. 24 | fn series_to_numo_with_copy(s: &Series) -> RbResult { 25 | use DataType::*; 26 | match s.dtype() { 27 | Int8 => numeric_series_to_numpy::(s), 28 | Int16 => numeric_series_to_numpy::(s), 29 | Int32 => numeric_series_to_numpy::(s), 30 | Int64 => numeric_series_to_numpy::(s), 31 | UInt8 => numeric_series_to_numpy::(s), 32 | UInt16 => numeric_series_to_numpy::(s), 33 | UInt32 => numeric_series_to_numpy::(s), 34 | UInt64 => numeric_series_to_numpy::(s), 35 | Float32 => numeric_series_to_numpy::(s), 36 | Float64 => numeric_series_to_numpy::(s), 37 | Boolean => boolean_series_to_numo(s), 38 | String => { 39 | let ca = s.str().unwrap(); 40 | RbArray1::from_iter(ca) 41 | } 42 | dt => { 43 | raise_err!( 44 | format!("'to_numo' not supported for dtype: {dt:?}"), 45 | ComputeError 46 | ); 47 | } 48 | } 49 | } 50 | 51 | /// Convert numeric types to f32 or f64 with NaN representing a null value. 52 | fn numeric_series_to_numpy(s: &Series) -> RbResult 53 | where 54 | T: PolarsNumericType, 55 | T::Native: Element, 56 | U: Float + Element, 57 | { 58 | let ca: &ChunkedArray = s.as_ref().as_ref(); 59 | if s.null_count() == 0 { 60 | let values = ca.into_no_null_iter(); 61 | RbArray1::::from_iter(values) 62 | } else { 63 | let mapper = |opt_v: Option| match opt_v { 64 | Some(v) => NumCast::from(v).unwrap(), 65 | None => U::nan(), 66 | }; 67 | let values = ca.iter().map(mapper); 68 | RbArray1::from_iter(values) 69 | } 70 | } 71 | 72 | /// Convert booleans to bit if no nulls are present, otherwise convert to objects. 73 | fn boolean_series_to_numo(s: &Series) -> RbResult { 74 | let ca = s.bool().unwrap(); 75 | if s.null_count() == 0 { 76 | let values = ca.into_no_null_iter(); 77 | RbArray1::::from_iter(values) 78 | } else { 79 | let values = ca.iter(); 80 | RbArray1::from_iter(values) 81 | } 82 | } 83 | -------------------------------------------------------------------------------- /ext/polars/src/lazyframe/mod.rs: -------------------------------------------------------------------------------- 1 | mod general; 2 | mod serde; 3 | mod sink; 4 | 5 | use polars::lazy::frame::LazyFrame; 6 | pub use sink::SinkTarget; 7 | use std::cell::RefCell; 8 | 9 | #[magnus::wrap(class = "Polars::RbLazyFrame")] 10 | #[derive(Clone)] 11 | pub struct RbLazyFrame { 12 | pub ldf: RefCell, 13 | } 14 | 15 | impl From for RbLazyFrame { 16 | fn from(ldf: LazyFrame) -> Self { 17 | RbLazyFrame { 18 | ldf: RefCell::new(ldf), 19 | } 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /ext/polars/src/lazyframe/serde.rs: -------------------------------------------------------------------------------- 1 | use magnus::Value; 2 | use polars::lazy::frame::LazyFrame; 3 | use polars::prelude::*; 4 | use std::io::Read; 5 | 6 | use crate::file::get_file_like; 7 | use crate::{RbLazyFrame, RbResult, RbValueError}; 8 | 9 | impl RbLazyFrame { 10 | // TODO change to serialize_json 11 | pub fn read_json(rb_f: Value) -> RbResult { 12 | // it is faster to first read to memory and then parse: https://github.com/serde-rs/json/issues/160 13 | // so don't bother with files. 14 | let mut json = String::new(); 15 | let _ = get_file_like(rb_f, false)? 16 | .read_to_string(&mut json) 17 | .unwrap(); 18 | 19 | // Safety 20 | // we skipped the serializing/deserializing of the static in lifetime in `DataType` 21 | // so we actually don't have a lifetime at all when serializing. 22 | 23 | // &str still has a lifetime. Bit its ok, because we drop it immediately 24 | // in this scope 25 | let json = unsafe { std::mem::transmute::<&'_ str, &'static str>(json.as_str()) }; 26 | 27 | let lp = serde_json::from_str::(json) 28 | .map_err(|err| RbValueError::new_err(format!("{:?}", err)))?; 29 | Ok(LazyFrame::from(lp).into()) 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /ext/polars/src/lazyframe/sink.rs: -------------------------------------------------------------------------------- 1 | use std::path::{Path, PathBuf}; 2 | use std::sync::{Arc, Mutex}; 3 | 4 | use magnus::{RHash, TryConvert, Value}; 5 | use polars::prelude::sync_on_close::SyncOnCloseType; 6 | use polars::prelude::{SinkOptions, SpecialEq}; 7 | 8 | use crate::prelude::Wrap; 9 | use crate::{RbResult, RbValueError}; 10 | 11 | #[derive(Clone)] 12 | pub enum SinkTarget { 13 | File(polars_plan::dsl::SinkTarget), 14 | } 15 | 16 | impl TryConvert for Wrap { 17 | fn try_convert(ob: Value) -> RbResult { 18 | if let Ok(v) = PathBuf::try_convert(ob) { 19 | Ok(Wrap(polars::prelude::SinkTarget::Path(Arc::new(v)))) 20 | } else { 21 | let writer = { 22 | let rb_f = ob; 23 | RbResult::Ok(crate::file::try_get_rbfile(rb_f, true)?.0.into_writeable()) 24 | }?; 25 | 26 | Ok(Wrap(polars_plan::prelude::SinkTarget::Dyn(SpecialEq::new( 27 | Arc::new(Mutex::new(Some(writer))), 28 | )))) 29 | } 30 | } 31 | } 32 | 33 | impl TryConvert for SinkTarget { 34 | fn try_convert(ob: Value) -> RbResult { 35 | Ok(Self::File( 36 | >::try_convert(ob)?.0, 37 | )) 38 | } 39 | } 40 | 41 | impl SinkTarget { 42 | pub fn base_path(&self) -> Option<&Path> { 43 | match self { 44 | Self::File(t) => match t { 45 | polars::prelude::SinkTarget::Path(p) => Some(p.as_path()), 46 | polars::prelude::SinkTarget::Dyn(_) => None, 47 | }, 48 | } 49 | } 50 | } 51 | 52 | impl TryConvert for Wrap { 53 | fn try_convert(ob: Value) -> RbResult { 54 | let parsed = match String::try_convert(ob)?.as_str() { 55 | "none" => SyncOnCloseType::None, 56 | "data" => SyncOnCloseType::Data, 57 | "all" => SyncOnCloseType::All, 58 | v => { 59 | return Err(RbValueError::new_err(format!( 60 | "`sync_on_close` must be one of {{'none', 'data', 'all'}}, got {v}", 61 | ))); 62 | } 63 | }; 64 | Ok(Wrap(parsed)) 65 | } 66 | } 67 | 68 | impl TryConvert for Wrap { 69 | fn try_convert(ob: Value) -> RbResult { 70 | let parsed = RHash::try_convert(ob)?; 71 | 72 | if parsed.len() != 3 { 73 | return Err(RbValueError::new_err( 74 | "`sink_options` must be a dictionary with the exactly 3 field.", 75 | )); 76 | } 77 | 78 | let sync_on_close = parsed.get("sync_on_close").ok_or_else(|| { 79 | RbValueError::new_err("`sink_options` must contain `sync_on_close` field") 80 | })?; 81 | let sync_on_close = Wrap::::try_convert(sync_on_close)?.0; 82 | 83 | let maintain_order = parsed.get("maintain_order").ok_or_else(|| { 84 | RbValueError::new_err("`sink_options` must contain `maintain_order` field") 85 | })?; 86 | let maintain_order = bool::try_convert(maintain_order)?; 87 | 88 | let mkdir = parsed 89 | .get("mkdir") 90 | .ok_or_else(|| RbValueError::new_err("`sink_options` must contain `mkdir` field"))?; 91 | let mkdir = bool::try_convert(mkdir)?; 92 | 93 | Ok(Wrap(SinkOptions { 94 | sync_on_close, 95 | maintain_order, 96 | mkdir, 97 | })) 98 | } 99 | } 100 | -------------------------------------------------------------------------------- /ext/polars/src/lazygroupby.rs: -------------------------------------------------------------------------------- 1 | use magnus::RArray; 2 | use polars::lazy::frame::LazyGroupBy; 3 | use std::cell::RefCell; 4 | 5 | use crate::expr::rb_exprs_to_exprs; 6 | use crate::{RbLazyFrame, RbResult}; 7 | 8 | #[magnus::wrap(class = "Polars::RbLazyGroupBy")] 9 | pub struct RbLazyGroupBy { 10 | pub lgb: RefCell>, 11 | } 12 | 13 | impl RbLazyGroupBy { 14 | pub fn agg(&self, aggs: RArray) -> RbResult { 15 | let lgb = self.lgb.borrow_mut().take().unwrap(); 16 | let aggs = rb_exprs_to_exprs(aggs)?; 17 | Ok(lgb.agg(aggs).into()) 18 | } 19 | 20 | pub fn head(&self, n: usize) -> RbLazyFrame { 21 | let lgb = self.lgb.take().unwrap(); 22 | lgb.head(Some(n)).into() 23 | } 24 | 25 | pub fn tail(&self, n: usize) -> RbLazyFrame { 26 | let lgb = self.lgb.take().unwrap(); 27 | lgb.tail(Some(n)).into() 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /ext/polars/src/map/lazy.rs: -------------------------------------------------------------------------------- 1 | use magnus::{prelude::*, RArray, Value}; 2 | use polars::prelude::*; 3 | 4 | use crate::rb_modules::*; 5 | use crate::{RbExpr, RbSeries, Wrap}; 6 | 7 | fn to_series(v: Value, name: &str) -> PolarsResult { 8 | let rb_rbseries = match v.funcall("_s", ()) { 9 | Ok(s) => s, 10 | // the lambda did not return a series, we try to create a new Ruby Series 11 | _ => { 12 | let data = RArray::new(); 13 | data.push(v).unwrap(); 14 | let res = series().funcall::<_, _, Value>("new", (name.to_string(), data)); 15 | 16 | match res { 17 | Ok(ruby_s) => ruby_s.funcall::<_, _, &RbSeries>("_s", ()).unwrap(), 18 | Err(_) => { 19 | polars_bail!(ComputeError: 20 | "expected a something that could convert to a `Series` but got: {}", 21 | unsafe { v.classname() } 22 | ) 23 | } 24 | } 25 | } 26 | }; 27 | // Finally get the actual Series 28 | Ok(rb_rbseries.series.borrow().clone()) 29 | } 30 | 31 | pub fn binary_lambda(lambda: Value, a: Series, b: Series) -> PolarsResult> { 32 | // create a RbSeries struct/object for Ruby 33 | let rbseries_a = RbSeries::new(a); 34 | let rbseries_b = RbSeries::new(b); 35 | 36 | // Wrap this RbSeries object in the Ruby side Series wrapper 37 | let ruby_series_wrapper_a: Value = utils().funcall("wrap_s", (rbseries_a,)).unwrap(); 38 | let ruby_series_wrapper_b: Value = utils().funcall("wrap_s", (rbseries_b,)).unwrap(); 39 | 40 | // call the lambda and get a Ruby side Series wrapper 41 | let result_series_wrapper: Value = 42 | match lambda.funcall("call", (ruby_series_wrapper_a, ruby_series_wrapper_b)) { 43 | Ok(rbobj) => rbobj, 44 | Err(e) => polars_bail!( 45 | ComputeError: "custom Ruby function failed: {}", e, 46 | ), 47 | }; 48 | let rbseries = if let Ok(rbexpr) = result_series_wrapper.funcall::<_, _, &RbExpr>("_rbexpr", ()) 49 | { 50 | let expr = rbexpr.inner.clone(); 51 | let df = DataFrame::empty(); 52 | let out = df 53 | .lazy() 54 | .select([expr]) 55 | .with_predicate_pushdown(false) 56 | .with_projection_pushdown(false) 57 | .collect()?; 58 | 59 | let s = out.select_at_idx(0).unwrap().clone(); 60 | RbSeries::new(s.take_materialized_series()) 61 | } else { 62 | return Some(to_series(result_series_wrapper, "")).transpose(); 63 | }; 64 | 65 | // Finally get the actual Series 66 | let binding = rbseries.series.borrow(); 67 | Ok(Some(binding.clone())) 68 | } 69 | 70 | pub fn map_single( 71 | _rbexpr: &RbExpr, 72 | _lambda: Value, 73 | _output_type: Option>, 74 | _agg_list: bool, 75 | _is_elementwise: bool, 76 | _returns_scalar: bool, 77 | ) -> RbExpr { 78 | todo!(); 79 | } 80 | -------------------------------------------------------------------------------- /ext/polars/src/object.rs: -------------------------------------------------------------------------------- 1 | pub(crate) const OBJECT_NAME: &str = "object"; 2 | -------------------------------------------------------------------------------- /ext/polars/src/on_startup.rs: -------------------------------------------------------------------------------- 1 | use std::any::Any; 2 | use std::sync::Arc; 3 | use std::sync::OnceLock; 4 | 5 | use magnus::IntoValue; 6 | use polars::prelude::*; 7 | use polars_core::chunked_array::object::builder::ObjectChunkedBuilder; 8 | use polars_core::chunked_array::object::registry; 9 | use polars_core::chunked_array::object::registry::AnonymousObjectBuilder; 10 | use polars_core::prelude::AnyValue; 11 | 12 | use crate::prelude::ObjectValue; 13 | use crate::Wrap; 14 | 15 | static POLARS_REGISTRY_INIT_LOCK: OnceLock<()> = OnceLock::new(); 16 | 17 | pub(crate) fn register_startup_deps() { 18 | POLARS_REGISTRY_INIT_LOCK.get_or_init(|| { 19 | let object_builder = Box::new(|name: PlSmallStr, capacity: usize| { 20 | Box::new(ObjectChunkedBuilder::::new(name, capacity)) 21 | as Box 22 | }); 23 | 24 | let object_converter = Arc::new(|av: AnyValue| { 25 | let object = ObjectValue { 26 | inner: Wrap(av).into_value().into(), 27 | }; 28 | Box::new(object) as Box 29 | }); 30 | let rbobject_converter = Arc::new(|av: AnyValue| { 31 | let object = Wrap(av).into_value(); 32 | Box::new(object) as Box 33 | }); 34 | 35 | let object_size = std::mem::size_of::(); 36 | let physical_dtype = ArrowDataType::FixedSizeBinary(object_size); 37 | registry::register_object_builder( 38 | object_builder, 39 | object_converter, 40 | rbobject_converter, 41 | physical_dtype, 42 | ) 43 | }); 44 | } 45 | -------------------------------------------------------------------------------- /ext/polars/src/prelude.rs: -------------------------------------------------------------------------------- 1 | pub use polars::prelude::*; 2 | 3 | pub use crate::conversion::*; 4 | -------------------------------------------------------------------------------- /ext/polars/src/rb_modules.rs: -------------------------------------------------------------------------------- 1 | use magnus::{value::Lazy, ExceptionClass, Module, RClass, RModule, Ruby}; 2 | 3 | static POLARS: Lazy = Lazy::new(|ruby| ruby.class_object().const_get("Polars").unwrap()); 4 | 5 | pub(crate) fn polars() -> RModule { 6 | Ruby::get().unwrap().get_inner(&POLARS) 7 | } 8 | 9 | static SERIES: Lazy = 10 | Lazy::new(|ruby| ruby.get_inner(&POLARS).const_get("Series").unwrap()); 11 | 12 | pub(crate) fn series() -> RClass { 13 | Ruby::get().unwrap().get_inner(&SERIES) 14 | } 15 | 16 | static UTILS: Lazy = Lazy::new(|ruby| ruby.get_inner(&POLARS).const_get("Utils").unwrap()); 17 | 18 | pub(crate) fn utils() -> RModule { 19 | Ruby::get().unwrap().get_inner(&UTILS) 20 | } 21 | 22 | static BIGDECIMAL: Lazy = 23 | Lazy::new(|ruby| ruby.class_object().const_get("BigDecimal").unwrap()); 24 | 25 | pub(crate) fn bigdecimal() -> RClass { 26 | Ruby::get().unwrap().get_inner(&BIGDECIMAL) 27 | } 28 | 29 | static DATE: Lazy = Lazy::new(|ruby| ruby.class_object().const_get("Date").unwrap()); 30 | 31 | pub(crate) fn date() -> RClass { 32 | Ruby::get().unwrap().get_inner(&DATE) 33 | } 34 | 35 | static DATETIME: Lazy = 36 | Lazy::new(|ruby| ruby.class_object().const_get("DateTime").unwrap()); 37 | 38 | pub(crate) fn datetime() -> RClass { 39 | Ruby::get().unwrap().get_inner(&DATETIME) 40 | } 41 | 42 | static ERROR: Lazy = 43 | Lazy::new(|ruby| ruby.get_inner(&POLARS).const_get("Error").unwrap()); 44 | 45 | pub(crate) fn error() -> ExceptionClass { 46 | Ruby::get().unwrap().get_inner(&ERROR) 47 | } 48 | 49 | static COMPUTE_ERROR: Lazy = 50 | Lazy::new(|ruby| ruby.get_inner(&POLARS).const_get("ComputeError").unwrap()); 51 | 52 | pub(crate) fn compute_error() -> ExceptionClass { 53 | Ruby::get().unwrap().get_inner(&COMPUTE_ERROR) 54 | } 55 | 56 | static INVALID_OPERATION_ERROR: Lazy = Lazy::new(|ruby| { 57 | ruby.get_inner(&POLARS) 58 | .const_get("InvalidOperationError") 59 | .unwrap() 60 | }); 61 | 62 | pub(crate) fn invalid_operation_error() -> ExceptionClass { 63 | Ruby::get().unwrap().get_inner(&INVALID_OPERATION_ERROR) 64 | } 65 | -------------------------------------------------------------------------------- /ext/polars/src/series/aggregation.rs: -------------------------------------------------------------------------------- 1 | use crate::error::RbPolarsErr; 2 | use crate::prelude::*; 3 | use crate::{RbResult, RbSeries}; 4 | use magnus::{IntoValue, Value}; 5 | 6 | impl RbSeries { 7 | pub fn any(&self, ignore_nulls: bool) -> RbResult> { 8 | let binding = self.series.borrow(); 9 | let s = binding.bool().map_err(RbPolarsErr::from)?; 10 | Ok(if ignore_nulls { 11 | Some(s.any()) 12 | } else { 13 | s.any_kleene() 14 | }) 15 | } 16 | 17 | pub fn all(&self, ignore_nulls: bool) -> RbResult> { 18 | let binding = self.series.borrow(); 19 | let s = binding.bool().map_err(RbPolarsErr::from)?; 20 | Ok(if ignore_nulls { 21 | Some(s.all()) 22 | } else { 23 | s.all_kleene() 24 | }) 25 | } 26 | 27 | pub fn arg_max(&self) -> Option { 28 | self.series.borrow().arg_max() 29 | } 30 | 31 | pub fn arg_min(&self) -> Option { 32 | self.series.borrow().arg_min() 33 | } 34 | 35 | pub fn max(&self) -> RbResult { 36 | Ok(Wrap( 37 | self.series 38 | .borrow() 39 | .max_reduce() 40 | .map_err(RbPolarsErr::from)? 41 | .as_any_value(), 42 | ) 43 | .into_value()) 44 | } 45 | 46 | pub fn mean(&self) -> RbResult { 47 | match self.series.borrow().dtype() { 48 | DataType::Boolean => Ok(Wrap( 49 | self.series 50 | .borrow() 51 | .cast(&DataType::UInt8) 52 | .unwrap() 53 | .mean_reduce() 54 | .as_any_value(), 55 | ) 56 | .into_value()), 57 | // For non-numeric output types we require mean_reduce. 58 | dt if dt.is_temporal() => { 59 | Ok(Wrap(self.series.borrow().mean_reduce().as_any_value()).into_value()) 60 | } 61 | _ => Ok(self.series.borrow().mean().into_value()), 62 | } 63 | } 64 | 65 | pub fn median(&self) -> RbResult { 66 | match self.series.borrow().dtype() { 67 | DataType::Boolean => Ok(Wrap( 68 | self.series 69 | .borrow() 70 | .cast(&DataType::UInt8) 71 | .unwrap() 72 | .median_reduce() 73 | .map_err(RbPolarsErr::from)? 74 | .as_any_value(), 75 | ) 76 | .into_value()), 77 | // For non-numeric output types we require median_reduce. 78 | dt if dt.is_temporal() => Ok(Wrap( 79 | self.series 80 | .borrow() 81 | .median_reduce() 82 | .map_err(RbPolarsErr::from)? 83 | .as_any_value(), 84 | ) 85 | .into_value()), 86 | _ => Ok(self.series.borrow().median().into_value()), 87 | } 88 | } 89 | 90 | pub fn min(&self) -> RbResult { 91 | Ok(Wrap( 92 | self.series 93 | .borrow() 94 | .min_reduce() 95 | .map_err(RbPolarsErr::from)? 96 | .as_any_value(), 97 | ) 98 | .into_value()) 99 | } 100 | 101 | pub fn quantile(&self, quantile: f64, interpolation: Wrap) -> RbResult { 102 | let bind = self 103 | .series 104 | .borrow() 105 | .quantile_reduce(quantile, interpolation.0); 106 | let sc = bind.map_err(RbPolarsErr::from)?; 107 | 108 | Ok(Wrap(sc.as_any_value()).into_value()) 109 | } 110 | 111 | pub fn sum(&self) -> RbResult { 112 | Ok(Wrap( 113 | self.series 114 | .borrow() 115 | .sum_reduce() 116 | .map_err(RbPolarsErr::from)? 117 | .as_any_value(), 118 | ) 119 | .into_value()) 120 | } 121 | } 122 | -------------------------------------------------------------------------------- /ext/polars/src/series/arithmetic.rs: -------------------------------------------------------------------------------- 1 | use crate::{RbPolarsErr, RbResult, RbSeries}; 2 | 3 | impl RbSeries { 4 | pub fn add(&self, other: &RbSeries) -> RbResult { 5 | Ok((&*self.series.borrow() + &*other.series.borrow()) 6 | .map(Into::into) 7 | .map_err(RbPolarsErr::from)?) 8 | } 9 | 10 | pub fn sub(&self, other: &RbSeries) -> RbResult { 11 | Ok((&*self.series.borrow() - &*other.series.borrow()) 12 | .map(Into::into) 13 | .map_err(RbPolarsErr::from)?) 14 | } 15 | 16 | pub fn mul(&self, other: &RbSeries) -> RbResult { 17 | Ok((&*self.series.borrow() * &*other.series.borrow()) 18 | .map(Into::into) 19 | .map_err(RbPolarsErr::from)?) 20 | } 21 | 22 | pub fn div(&self, other: &RbSeries) -> RbResult { 23 | Ok((&*self.series.borrow() / &*other.series.borrow()) 24 | .map(Into::into) 25 | .map_err(RbPolarsErr::from)?) 26 | } 27 | 28 | pub fn rem(&self, other: &RbSeries) -> RbResult { 29 | Ok((&*self.series.borrow() % &*other.series.borrow()) 30 | .map(Into::into) 31 | .map_err(RbPolarsErr::from)?) 32 | } 33 | } 34 | 35 | macro_rules! impl_arithmetic { 36 | ($name:ident, $type:ty, $operand:tt) => { 37 | impl RbSeries { 38 | pub fn $name(&self, other: $type) -> RbResult { 39 | Ok(RbSeries::new(&*self.series.borrow() $operand other)) 40 | } 41 | } 42 | }; 43 | } 44 | 45 | impl_arithmetic!(add_u8, u8, +); 46 | impl_arithmetic!(add_u16, u16, +); 47 | impl_arithmetic!(add_u32, u32, +); 48 | impl_arithmetic!(add_u64, u64, +); 49 | impl_arithmetic!(add_i8, i8, +); 50 | impl_arithmetic!(add_i16, i16, +); 51 | impl_arithmetic!(add_i32, i32, +); 52 | impl_arithmetic!(add_i64, i64, +); 53 | impl_arithmetic!(add_datetime, i64, +); 54 | impl_arithmetic!(add_duration, i64, +); 55 | impl_arithmetic!(add_f32, f32, +); 56 | impl_arithmetic!(add_f64, f64, +); 57 | impl_arithmetic!(sub_u8, u8, -); 58 | impl_arithmetic!(sub_u16, u16, -); 59 | impl_arithmetic!(sub_u32, u32, -); 60 | impl_arithmetic!(sub_u64, u64, -); 61 | impl_arithmetic!(sub_i8, i8, -); 62 | impl_arithmetic!(sub_i16, i16, -); 63 | impl_arithmetic!(sub_i32, i32, -); 64 | impl_arithmetic!(sub_i64, i64, -); 65 | impl_arithmetic!(sub_datetime, i64, -); 66 | impl_arithmetic!(sub_duration, i64, -); 67 | impl_arithmetic!(sub_f32, f32, -); 68 | impl_arithmetic!(sub_f64, f64, -); 69 | impl_arithmetic!(div_u8, u8, /); 70 | impl_arithmetic!(div_u16, u16, /); 71 | impl_arithmetic!(div_u32, u32, /); 72 | impl_arithmetic!(div_u64, u64, /); 73 | impl_arithmetic!(div_i8, i8, /); 74 | impl_arithmetic!(div_i16, i16, /); 75 | impl_arithmetic!(div_i32, i32, /); 76 | impl_arithmetic!(div_i64, i64, /); 77 | impl_arithmetic!(div_f32, f32, /); 78 | impl_arithmetic!(div_f64, f64, /); 79 | impl_arithmetic!(mul_u8, u8, *); 80 | impl_arithmetic!(mul_u16, u16, *); 81 | impl_arithmetic!(mul_u32, u32, *); 82 | impl_arithmetic!(mul_u64, u64, *); 83 | impl_arithmetic!(mul_i8, i8, *); 84 | impl_arithmetic!(mul_i16, i16, *); 85 | impl_arithmetic!(mul_i32, i32, *); 86 | impl_arithmetic!(mul_i64, i64, *); 87 | impl_arithmetic!(mul_f32, f32, *); 88 | impl_arithmetic!(mul_f64, f64, *); 89 | impl_arithmetic!(rem_u8, u8, %); 90 | impl_arithmetic!(rem_u16, u16, %); 91 | impl_arithmetic!(rem_u32, u32, %); 92 | impl_arithmetic!(rem_u64, u64, %); 93 | impl_arithmetic!(rem_i8, i8, %); 94 | impl_arithmetic!(rem_i16, i16, %); 95 | impl_arithmetic!(rem_i32, i32, %); 96 | impl_arithmetic!(rem_i64, i64, %); 97 | impl_arithmetic!(rem_f32, f32, %); 98 | impl_arithmetic!(rem_f64, f64, %); 99 | -------------------------------------------------------------------------------- /ext/polars/src/series/import.rs: -------------------------------------------------------------------------------- 1 | use arrow::array::Array; 2 | use arrow::ffi::{ArrowArrayStream, ArrowArrayStreamReader}; 3 | use magnus::prelude::*; 4 | use magnus::Value; 5 | use polars::prelude::*; 6 | 7 | use super::RbSeries; 8 | 9 | use crate::exceptions::RbValueError; 10 | use crate::RbResult; 11 | 12 | /// Import `arrow_c_stream` across Ruby boundary. 13 | fn call_arrow_c_stream(ob: Value) -> RbResult { 14 | let capsule = ob.funcall("arrow_c_stream", ())?; 15 | Ok(capsule) 16 | } 17 | 18 | pub(crate) fn import_stream_rbcapsule(capsule: Value) -> RbResult { 19 | let capsule_pointer: usize = capsule.funcall("to_i", ())?; 20 | 21 | // # Safety 22 | // capsule holds a valid C ArrowArrayStream pointer, as defined by the Arrow PyCapsule 23 | // Interface 24 | let mut stream = unsafe { 25 | // Takes ownership of the pointed to ArrowArrayStream 26 | // This acts to move the data out of the capsule pointer, setting the release callback to NULL 27 | let stream_ptr = Box::new(std::ptr::replace( 28 | capsule_pointer as _, 29 | ArrowArrayStream::empty(), 30 | )); 31 | ArrowArrayStreamReader::try_new(stream_ptr) 32 | .map_err(|err| RbValueError::new_err(err.to_string()))? 33 | }; 34 | 35 | let mut produced_arrays: Vec> = vec![]; 36 | while let Some(array) = unsafe { stream.next() } { 37 | produced_arrays.push(array.unwrap()); 38 | } 39 | 40 | // Series::try_from fails for an empty vec of chunks 41 | let s = if produced_arrays.is_empty() { 42 | let polars_dt = DataType::from_arrow_field(stream.field()); 43 | Series::new_empty(stream.field().name.clone(), &polars_dt) 44 | } else { 45 | Series::try_from((stream.field(), produced_arrays)).unwrap() 46 | }; 47 | Ok(RbSeries::new(s)) 48 | } 49 | 50 | impl RbSeries { 51 | pub fn from_arrow_c_stream(ob: Value) -> RbResult { 52 | let capsule = call_arrow_c_stream(ob)?; 53 | import_stream_rbcapsule(capsule) 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /ext/polars/src/series/mod.rs: -------------------------------------------------------------------------------- 1 | mod aggregation; 2 | mod arithmetic; 3 | mod comparison; 4 | mod construction; 5 | mod export; 6 | mod general; 7 | mod import; 8 | mod scatter; 9 | 10 | use magnus::{prelude::*, RArray}; 11 | use polars::prelude::*; 12 | use std::cell::RefCell; 13 | 14 | use crate::RbResult; 15 | 16 | #[magnus::wrap(class = "Polars::RbSeries")] 17 | pub struct RbSeries { 18 | pub series: RefCell, 19 | } 20 | 21 | impl From for RbSeries { 22 | fn from(series: Series) -> Self { 23 | RbSeries::new(series) 24 | } 25 | } 26 | 27 | impl RbSeries { 28 | pub fn new(series: Series) -> Self { 29 | RbSeries { 30 | series: RefCell::new(series), 31 | } 32 | } 33 | } 34 | 35 | pub fn to_series(rs: RArray) -> RbResult> { 36 | let mut series = Vec::new(); 37 | for item in rs.into_iter() { 38 | series.push(<&RbSeries>::try_convert(item)?.series.borrow().clone()); 39 | } 40 | Ok(series) 41 | } 42 | 43 | pub fn to_rbseries(s: Vec) -> RArray { 44 | RArray::from_iter( 45 | s.into_iter() 46 | .map(|c| c.take_materialized_series()) 47 | .map(RbSeries::new), 48 | ) 49 | } 50 | -------------------------------------------------------------------------------- /ext/polars/src/series/scatter.rs: -------------------------------------------------------------------------------- 1 | use arrow::array::Array; 2 | use polars::prelude::*; 3 | 4 | use crate::error::RbPolarsErr; 5 | use crate::{RbErr, RbResult, RbSeries}; 6 | 7 | impl RbSeries { 8 | pub fn scatter(&self, idx: &RbSeries, values: &RbSeries) -> RbResult<()> { 9 | let mut s = self.series.borrow_mut(); 10 | match scatter(s.clone(), &idx.series.borrow(), &values.series.borrow()) { 11 | Ok(out) => { 12 | *s = out; 13 | Ok(()) 14 | } 15 | Err(e) => Err(RbErr::from(RbPolarsErr::from(e))), 16 | } 17 | } 18 | } 19 | 20 | fn scatter(mut s: Series, idx: &Series, values: &Series) -> PolarsResult { 21 | let logical_dtype = s.dtype().clone(); 22 | let idx = idx.cast(&IDX_DTYPE)?; 23 | let idx = idx.rechunk(); 24 | let idx = idx.idx().unwrap(); 25 | let idx = idx.downcast_iter().next().unwrap(); 26 | 27 | if idx.null_count() > 0 { 28 | return Err(PolarsError::ComputeError( 29 | "index values should not be null".into(), 30 | )); 31 | } 32 | 33 | let idx = idx.values().as_slice(); 34 | 35 | let values = values.to_physical_repr().cast(&s.dtype().to_physical())?; 36 | 37 | // do not shadow, otherwise s is not dropped immediately 38 | // and we want to have mutable access 39 | s = s.to_physical_repr().into_owned(); 40 | let mutable_s = s._get_inner_mut(); 41 | 42 | let s = match logical_dtype.to_physical() { 43 | DataType::Int8 => { 44 | let ca: &mut ChunkedArray = mutable_s.as_mut(); 45 | let values = values.i8()?; 46 | std::mem::take(ca).scatter(idx, values) 47 | } 48 | DataType::Int16 => { 49 | let ca: &mut ChunkedArray = mutable_s.as_mut(); 50 | let values = values.i16()?; 51 | std::mem::take(ca).scatter(idx, values) 52 | } 53 | DataType::Int32 => { 54 | let ca: &mut ChunkedArray = mutable_s.as_mut(); 55 | let values = values.i32()?; 56 | std::mem::take(ca).scatter(idx, values) 57 | } 58 | DataType::Int64 => { 59 | let ca: &mut ChunkedArray = mutable_s.as_mut(); 60 | let values = values.i64()?; 61 | std::mem::take(ca).scatter(idx, values) 62 | } 63 | DataType::UInt8 => { 64 | let ca: &mut ChunkedArray = mutable_s.as_mut(); 65 | let values = values.u8()?; 66 | std::mem::take(ca).scatter(idx, values) 67 | } 68 | DataType::UInt16 => { 69 | let ca: &mut ChunkedArray = mutable_s.as_mut(); 70 | let values = values.u16()?; 71 | std::mem::take(ca).scatter(idx, values) 72 | } 73 | DataType::UInt32 => { 74 | let ca: &mut ChunkedArray = mutable_s.as_mut(); 75 | let values = values.u32()?; 76 | std::mem::take(ca).scatter(idx, values) 77 | } 78 | DataType::UInt64 => { 79 | let ca: &mut ChunkedArray = mutable_s.as_mut(); 80 | let values = values.u64()?; 81 | std::mem::take(ca).scatter(idx, values) 82 | } 83 | DataType::Float32 => { 84 | let ca: &mut ChunkedArray = mutable_s.as_mut(); 85 | let values = values.f32()?; 86 | std::mem::take(ca).scatter(idx, values) 87 | } 88 | DataType::Float64 => { 89 | let ca: &mut ChunkedArray = mutable_s.as_mut(); 90 | let values = values.f64()?; 91 | std::mem::take(ca).scatter(idx, values) 92 | } 93 | DataType::Boolean => { 94 | let ca = s.bool()?; 95 | let values = values.bool()?; 96 | ca.scatter(idx, values) 97 | } 98 | DataType::String => { 99 | let ca = s.str()?; 100 | let values = values.str()?; 101 | ca.scatter(idx, values) 102 | } 103 | _ => panic!("not yet implemented for dtype: {}", logical_dtype), 104 | }; 105 | 106 | s.and_then(|s| s.cast(&logical_dtype)) 107 | } 108 | -------------------------------------------------------------------------------- /ext/polars/src/sql.rs: -------------------------------------------------------------------------------- 1 | use polars::sql::SQLContext; 2 | use std::cell::RefCell; 3 | 4 | use crate::{RbLazyFrame, RbPolarsErr, RbResult}; 5 | 6 | #[magnus::wrap(class = "Polars::RbSQLContext")] 7 | #[repr(transparent)] 8 | #[derive(Clone)] 9 | pub struct RbSQLContext { 10 | pub context: RefCell, 11 | } 12 | 13 | #[allow( 14 | clippy::wrong_self_convention, 15 | clippy::should_implement_trait, 16 | clippy::len_without_is_empty 17 | )] 18 | impl RbSQLContext { 19 | #[allow(clippy::new_without_default)] 20 | pub fn new() -> RbSQLContext { 21 | RbSQLContext { 22 | context: SQLContext::new().into(), 23 | } 24 | } 25 | 26 | pub fn execute(&self, query: String) -> RbResult { 27 | Ok(self 28 | .context 29 | .borrow_mut() 30 | .execute(&query) 31 | .map_err(RbPolarsErr::from)? 32 | .into()) 33 | } 34 | 35 | pub fn get_tables(&self) -> RbResult> { 36 | Ok(self.context.borrow().get_tables()) 37 | } 38 | 39 | pub fn register(&self, name: String, lf: &RbLazyFrame) { 40 | self.context 41 | .borrow_mut() 42 | .register(&name, lf.ldf.borrow().clone()) 43 | } 44 | 45 | pub fn unregister(&self, name: String) { 46 | self.context.borrow_mut().unregister(&name) 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /ext/polars/src/utils.rs: -------------------------------------------------------------------------------- 1 | #[macro_export] 2 | macro_rules! apply_method_all_arrow_series2 { 3 | ($self:expr, $method:ident, $($args:expr),*) => { 4 | match $self.dtype() { 5 | DataType::Boolean => $self.bool().unwrap().$method($($args),*), 6 | DataType::String => $self.str().unwrap().$method($($args),*), 7 | DataType::UInt8 => $self.u8().unwrap().$method($($args),*), 8 | DataType::UInt16 => $self.u16().unwrap().$method($($args),*), 9 | DataType::UInt32 => $self.u32().unwrap().$method($($args),*), 10 | DataType::UInt64 => $self.u64().unwrap().$method($($args),*), 11 | DataType::Int8 => $self.i8().unwrap().$method($($args),*), 12 | DataType::Int16 => $self.i16().unwrap().$method($($args),*), 13 | DataType::Int32 => $self.i32().unwrap().$method($($args),*), 14 | DataType::Int64 => $self.i64().unwrap().$method($($args),*), 15 | DataType::Float32 => $self.f32().unwrap().$method($($args),*), 16 | DataType::Float64 => $self.f64().unwrap().$method($($args),*), 17 | DataType::Date => $self.date().unwrap().$method($($args),*), 18 | DataType::Datetime(_, _) => $self.datetime().unwrap().$method($($args),*), 19 | // DataType::List(_) => $self.list().unwrap().$method($($args),*), 20 | DataType::Struct(_) => $self.struct_().unwrap().$method($($args),*), 21 | dt => panic!("dtype {:?} not supported", dt) 22 | } 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /lib/polars-df.rb: -------------------------------------------------------------------------------- 1 | require_relative "polars" 2 | -------------------------------------------------------------------------------- /lib/polars.rb: -------------------------------------------------------------------------------- 1 | # ext 2 | begin 3 | require "polars/#{RUBY_VERSION.to_f}/polars" 4 | rescue LoadError 5 | require "polars/polars" 6 | end 7 | 8 | # stdlib 9 | require "bigdecimal" 10 | require "date" 11 | require "stringio" 12 | 13 | # modules 14 | require_relative "polars/expr_dispatch" 15 | require_relative "polars/array_expr" 16 | require_relative "polars/array_name_space" 17 | require_relative "polars/batched_csv_reader" 18 | require_relative "polars/binary_expr" 19 | require_relative "polars/binary_name_space" 20 | require_relative "polars/cat_expr" 21 | require_relative "polars/cat_name_space" 22 | require_relative "polars/config" 23 | require_relative "polars/convert" 24 | require_relative "polars/plot" 25 | require_relative "polars/data_frame" 26 | require_relative "polars/data_types" 27 | require_relative "polars/data_type_group" 28 | require_relative "polars/date_time_expr" 29 | require_relative "polars/date_time_name_space" 30 | require_relative "polars/dynamic_group_by" 31 | require_relative "polars/exceptions" 32 | require_relative "polars/expr" 33 | require_relative "polars/functions/as_datatype" 34 | require_relative "polars/functions/col" 35 | require_relative "polars/functions/eager" 36 | require_relative "polars/functions/lazy" 37 | require_relative "polars/functions/len" 38 | require_relative "polars/functions/lit" 39 | require_relative "polars/functions/random" 40 | require_relative "polars/functions/repeat" 41 | require_relative "polars/functions/whenthen" 42 | require_relative "polars/functions/aggregation/horizontal" 43 | require_relative "polars/functions/aggregation/vertical" 44 | require_relative "polars/functions/range/date_range" 45 | require_relative "polars/functions/range/datetime_range" 46 | require_relative "polars/functions/range/int_range" 47 | require_relative "polars/functions/range/time_range" 48 | require_relative "polars/group_by" 49 | require_relative "polars/io/avro" 50 | require_relative "polars/io/csv" 51 | require_relative "polars/io/database" 52 | require_relative "polars/io/delta" 53 | require_relative "polars/io/ipc" 54 | require_relative "polars/io/json" 55 | require_relative "polars/io/ndjson" 56 | require_relative "polars/io/parquet" 57 | require_relative "polars/lazy_frame" 58 | require_relative "polars/lazy_group_by" 59 | require_relative "polars/list_expr" 60 | require_relative "polars/list_name_space" 61 | require_relative "polars/meta_expr" 62 | require_relative "polars/name_expr" 63 | require_relative "polars/rolling_group_by" 64 | require_relative "polars/schema" 65 | require_relative "polars/selectors" 66 | require_relative "polars/series" 67 | require_relative "polars/slice" 68 | require_relative "polars/sql_context" 69 | require_relative "polars/string_cache" 70 | require_relative "polars/string_expr" 71 | require_relative "polars/string_name_space" 72 | require_relative "polars/struct_expr" 73 | require_relative "polars/struct_name_space" 74 | require_relative "polars/testing" 75 | require_relative "polars/utils" 76 | require_relative "polars/utils/constants" 77 | require_relative "polars/utils/convert" 78 | require_relative "polars/utils/parse" 79 | require_relative "polars/utils/various" 80 | require_relative "polars/utils/wrap" 81 | require_relative "polars/version" 82 | require_relative "polars/whenthen" 83 | 84 | module Polars 85 | extend Convert 86 | extend Functions 87 | extend IO 88 | 89 | # @private 90 | F = self 91 | 92 | # @private 93 | N_INFER_DEFAULT = 100 94 | 95 | # @private 96 | class ArrowArrayStream 97 | def arrow_c_stream 98 | self 99 | end 100 | end 101 | 102 | # Return the number of threads in the Polars thread pool. 103 | # 104 | # @return [Integer] 105 | def self.thread_pool_size 106 | Plr.thread_pool_size 107 | end 108 | end 109 | -------------------------------------------------------------------------------- /lib/polars/batched_csv_reader.rb: -------------------------------------------------------------------------------- 1 | module Polars 2 | # @private 3 | class BatchedCsvReader 4 | attr_accessor :_reader, :new_columns 5 | 6 | def initialize( 7 | file, 8 | has_header: true, 9 | columns: nil, 10 | sep: ",", 11 | comment_char: nil, 12 | quote_char: '"', 13 | skip_rows: 0, 14 | dtypes: nil, 15 | null_values: nil, 16 | missing_utf8_is_empty_string: false, 17 | ignore_errors: false, 18 | parse_dates: false, 19 | n_threads: nil, 20 | infer_schema_length: 100, 21 | batch_size: 50_000, 22 | n_rows: nil, 23 | encoding: "utf8", 24 | low_memory: false, 25 | rechunk: true, 26 | skip_rows_after_header: 0, 27 | row_count_name: nil, 28 | row_count_offset: 0, 29 | eol_char: "\n", 30 | new_columns: nil, 31 | raise_if_empty: true, 32 | truncate_ragged_lines: false, 33 | decimal_comma: false 34 | ) 35 | if Utils.pathlike?(file) 36 | path = Utils.normalize_filepath(file) 37 | end 38 | 39 | dtype_list = nil 40 | dtype_slice = nil 41 | if !dtypes.nil? 42 | if dtypes.is_a?(Hash) 43 | dtype_list = [] 44 | dtypes.each do |k, v| 45 | dtype_list << [k, Utils.rb_type_to_dtype(v)] 46 | end 47 | elsif dtypes.is_a?(::Array) 48 | dtype_slice = dtypes 49 | else 50 | raise ArgumentError, "dtype arg should be list or dict" 51 | end 52 | end 53 | 54 | processed_null_values = Utils._process_null_values(null_values) 55 | projection, columns = Utils.handle_projection_columns(columns) 56 | 57 | self._reader = RbBatchedCsv.new( 58 | infer_schema_length, 59 | batch_size, 60 | has_header, 61 | ignore_errors, 62 | n_rows, 63 | skip_rows, 64 | projection, 65 | sep, 66 | rechunk, 67 | columns, 68 | encoding, 69 | n_threads, 70 | path, 71 | dtype_list, 72 | dtype_slice, 73 | low_memory, 74 | comment_char, 75 | quote_char, 76 | processed_null_values, 77 | missing_utf8_is_empty_string, 78 | parse_dates, 79 | skip_rows_after_header, 80 | Utils.parse_row_index_args(row_count_name, row_count_offset), 81 | eol_char, 82 | raise_if_empty, 83 | truncate_ragged_lines, 84 | decimal_comma 85 | ) 86 | self.new_columns = new_columns 87 | end 88 | 89 | def next_batches(n) 90 | batches = _reader.next_batches(n) 91 | if !batches.nil? 92 | if new_columns 93 | batches.map { |df| Utils._update_columns(Utils.wrap_df(df), new_columns) } 94 | else 95 | batches.map { |df| Utils.wrap_df(df) } 96 | end 97 | else 98 | nil 99 | end 100 | end 101 | end 102 | end 103 | -------------------------------------------------------------------------------- /lib/polars/binary_name_space.rb: -------------------------------------------------------------------------------- 1 | module Polars 2 | # Series.bin namespace. 3 | class BinaryNameSpace 4 | include ExprDispatch 5 | 6 | self._accessor = "bin" 7 | 8 | # @private 9 | def initialize(series) 10 | self._s = series._s 11 | end 12 | 13 | # Check if binaries in Series contain a binary substring. 14 | # 15 | # @param literal [String] 16 | # The binary substring to look for 17 | # 18 | # @return [Series] 19 | # 20 | # @example 21 | # s = Polars::Series.new("colors", ["\x00\x00\x00".b, "\xff\xff\x00".b, "\x00\x00\xff".b]) 22 | # s.bin.contains("\xff".b) 23 | # # => 24 | # # shape: (3,) 25 | # # Series: 'colors' [bool] 26 | # # [ 27 | # # false 28 | # # true 29 | # # true 30 | # # ] 31 | def contains(literal) 32 | super 33 | end 34 | 35 | # Check if string values end with a binary substring. 36 | # 37 | # @param suffix [String] 38 | # Suffix substring. 39 | # 40 | # @return [Series] 41 | # 42 | # @example 43 | # s = Polars::Series.new("colors", ["\x00\x00\x00".b, "\xff\xff\x00".b, "\x00\x00\xff".b]) 44 | # s.bin.ends_with("\x00".b) 45 | # # => 46 | # # shape: (3,) 47 | # # Series: 'colors' [bool] 48 | # # [ 49 | # # true 50 | # # true 51 | # # false 52 | # # ] 53 | def ends_with(suffix) 54 | super 55 | end 56 | 57 | # Check if values start with a binary substring. 58 | # 59 | # @param prefix [String] 60 | # Prefix substring. 61 | # 62 | # @return [Series] 63 | # 64 | # @example 65 | # s = Polars::Series.new("colors", ["\x00\x00\x00".b, "\xff\xff\x00".b, "\x00\x00\xff".b]) 66 | # s.bin.starts_with("\x00".b) 67 | # # => 68 | # # shape: (3,) 69 | # # Series: 'colors' [bool] 70 | # # [ 71 | # # true 72 | # # false 73 | # # true 74 | # # ] 75 | def starts_with(prefix) 76 | super 77 | end 78 | 79 | # Decode a value using the provided encoding. 80 | # 81 | # @param encoding ["hex", "base64"] 82 | # The encoding to use. 83 | # @param strict [Boolean] 84 | # Raise an error if the underlying value cannot be decoded, 85 | # otherwise mask out with a null value. 86 | # 87 | # @return [Series] 88 | # 89 | # @example Decode values using hexadecimal encoding. 90 | # s = Polars::Series.new("colors", ["000000".b, "ffff00".b, "0000ff".b]) 91 | # s.bin.decode("hex") 92 | # # => 93 | # # shape: (3,) 94 | # # Series: 'colors' [binary] 95 | # # [ 96 | # # b"\x00\x00\x00" 97 | # # b"\xff\xff\x00" 98 | # # b"\x00\x00\xff" 99 | # # ] 100 | # 101 | # @example Decode values using Base64 encoding. 102 | # s = Polars::Series.new("colors", ["AAAA".b, "//8A".b, "AAD/".b]) 103 | # s.bin.decode("base64") 104 | # # => 105 | # # shape: (3,) 106 | # # Series: 'colors' [binary] 107 | # # [ 108 | # # b"\x00\x00\x00" 109 | # # b"\xff\xff\x00" 110 | # # b"\x00\x00\xff" 111 | # # ] 112 | # 113 | # @example Set `strict=False` to set invalid values to null instead of raising an error. 114 | # s = Polars::Series.new("colors", ["000000".b, "ffff00".b, "invalid_value".b]) 115 | # s.bin.decode("hex", strict: false) 116 | # # => 117 | # # shape: (3,) 118 | # # Series: 'colors' [binary] 119 | # # [ 120 | # # b"\x00\x00\x00" 121 | # # b"\xff\xff\x00" 122 | # # null 123 | # # ] 124 | def decode(encoding, strict: true) 125 | super 126 | end 127 | 128 | # Encode a value using the provided encoding. 129 | # 130 | # @param encoding ["hex", "base64"] 131 | # The encoding to use. 132 | # 133 | # @return [Series] 134 | # 135 | # @example Encode values using hexadecimal encoding. 136 | # s = Polars::Series.new("colors", ["\x00\x00\x00".b, "\xff\xff\x00".b, "\x00\x00\xff".b]) 137 | # s.bin.encode("hex") 138 | # # => 139 | # # shape: (3,) 140 | # # Series: 'colors' [str] 141 | # # [ 142 | # # "000000" 143 | # # "ffff00" 144 | # # "0000ff" 145 | # # ] 146 | # 147 | # @example Encode values using Base64 encoding. 148 | # s.bin.encode("base64") 149 | # # => 150 | # # shape: (3,) 151 | # # Series: 'colors' [str] 152 | # # [ 153 | # # "AAAA" 154 | # # "//8A" 155 | # # "AAD/" 156 | # # ] 157 | def encode(encoding) 158 | super 159 | end 160 | end 161 | end 162 | -------------------------------------------------------------------------------- /lib/polars/cat_expr.rb: -------------------------------------------------------------------------------- 1 | module Polars 2 | # Namespace for categorical related expressions. 3 | class CatExpr 4 | # @private 5 | attr_accessor :_rbexpr 6 | 7 | # @private 8 | def initialize(expr) 9 | self._rbexpr = expr._rbexpr 10 | end 11 | 12 | # Get the categories stored in this data type. 13 | # 14 | # @return [Expr] 15 | # 16 | # @example 17 | # df = Polars::Series.new( 18 | # "cats", ["foo", "bar", "foo", "foo", "ham"], dtype: Polars::Categorical 19 | # ).to_frame 20 | # df.select(Polars.col("cats").cat.get_categories) 21 | # # => 22 | # # shape: (3, 1) 23 | # # ┌──────┐ 24 | # # │ cats │ 25 | # # │ --- │ 26 | # # │ str │ 27 | # # ╞══════╡ 28 | # # │ foo │ 29 | # # │ bar │ 30 | # # │ ham │ 31 | # # └──────┘ 32 | def get_categories 33 | Utils.wrap_expr(_rbexpr.cat_get_categories) 34 | end 35 | end 36 | end 37 | -------------------------------------------------------------------------------- /lib/polars/cat_name_space.rb: -------------------------------------------------------------------------------- 1 | module Polars 2 | # Series.cat namespace. 3 | class CatNameSpace 4 | include ExprDispatch 5 | 6 | self._accessor = "cat" 7 | 8 | # @private 9 | def initialize(series) 10 | self._s = series._s 11 | end 12 | 13 | # Get the categories stored in this data type. 14 | # 15 | # @return [Series] 16 | # 17 | # @example 18 | # s = Polars::Series.new(["foo", "bar", "foo", "foo", "ham"], dtype: Polars::Categorical) 19 | # s.cat.get_categories 20 | # # => 21 | # # shape: (3,) 22 | # # Series: '' [str] 23 | # # [ 24 | # # "foo" 25 | # # "bar" 26 | # # "ham" 27 | # # ] 28 | def get_categories 29 | super 30 | end 31 | 32 | # Return whether or not the column is a local categorical. 33 | # 34 | # @return [Boolean] 35 | # 36 | # @example Categoricals constructed without a string cache are considered local. 37 | # s = Polars::Series.new(["a", "b", "a"], dtype: Polars::Categorical) 38 | # s.cat.is_local 39 | # # => true 40 | # 41 | # @example Categoricals constructed with a string cache are considered global. 42 | # s = nil 43 | # Polars::StringCache.new do 44 | # s = Polars::Series.new(["a", "b", "a"], dtype: Polars::Categorical) 45 | # end 46 | # s.cat.is_local 47 | # # => false 48 | def is_local 49 | _s.cat_is_local 50 | end 51 | 52 | # Convert a categorical column to its local representation. 53 | # 54 | # This may change the underlying physical representation of the column. 55 | # 56 | # @return [Series] 57 | # 58 | # @example Compare the global and local representations of a categorical. 59 | # s = nil 60 | # Polars::StringCache.new do 61 | # _ = Polars::Series.new("x", ["a", "b", "a"], dtype: Polars::Categorical) 62 | # s = Polars::Series.new("y", ["c", "b", "d"], dtype: Polars::Categorical) 63 | # end 64 | # s.to_physical 65 | # # => 66 | # # shape: (3,) 67 | # # Series: 'y' [u32] 68 | # # [ 69 | # # 2 70 | # # 1 71 | # # 3 72 | # # ] 73 | # 74 | # @example 75 | # s.cat.to_local.to_physical 76 | # # => 77 | # # shape: (3,) 78 | # # Series: 'y' [u32] 79 | # # [ 80 | # # 0 81 | # # 1 82 | # # 2 83 | # # ] 84 | def to_local 85 | Utils.wrap_s(_s.cat_to_local) 86 | end 87 | end 88 | end 89 | -------------------------------------------------------------------------------- /lib/polars/convert.rb: -------------------------------------------------------------------------------- 1 | module Polars 2 | module Convert 3 | # Construct a DataFrame from a dictionary of sequences. 4 | # 5 | # This operation clones data, unless you pass in a `Hash`. 6 | # 7 | # @param data [Hash] 8 | # Two-dimensional data represented as a hash. Hash must contain 9 | # arrays. 10 | # @param columns [Array] 11 | # Column labels to use for resulting DataFrame. If specified, overrides any 12 | # labels already present in the data. Must match data dimensions. 13 | # 14 | # @return [DataFrame] 15 | # 16 | # @example 17 | # data = {"a" => [1, 2], "b" => [3, 4]} 18 | # Polars.from_hash(data) 19 | # # => 20 | # # shape: (2, 2) 21 | # # ┌─────┬─────┐ 22 | # # │ a ┆ b │ 23 | # # │ --- ┆ --- │ 24 | # # │ i64 ┆ i64 │ 25 | # # ╞═════╪═════╡ 26 | # # │ 1 ┆ 3 │ 27 | # # │ 2 ┆ 4 │ 28 | # # └─────┴─────┘ 29 | def from_hash(data, schema: nil, columns: nil) 30 | Utils.wrap_df( 31 | DataFrame.hash_to_rbdf( 32 | data, 33 | schema: schema || columns 34 | ) 35 | ) 36 | end 37 | 38 | # Construct a DataFrame from a sequence of dictionaries. This operation clones data. 39 | # 40 | # @param hashes [Array] 41 | # Array with hashes mapping column name to value. 42 | # @param infer_schema_length [Integer] 43 | # How many hashes/rows to scan to determine the data types 44 | # if set to `nil` all rows are scanned. This will be slow. 45 | # @param schema [Object] 46 | # Schema that (partially) overwrites the inferred schema. 47 | # 48 | # @return [DataFrame] 49 | # 50 | # @example 51 | # data = [{"a" => 1, "b" => 4}, {"a" => 2, "b" => 5}, {"a" => 3, "b" => 6}] 52 | # Polars.from_hashes(data) 53 | # # => 54 | # # shape: (3, 2) 55 | # # ┌─────┬─────┐ 56 | # # │ a ┆ b │ 57 | # # │ --- ┆ --- │ 58 | # # │ i64 ┆ i64 │ 59 | # # ╞═════╪═════╡ 60 | # # │ 1 ┆ 4 │ 61 | # # │ 2 ┆ 5 │ 62 | # # │ 3 ┆ 6 │ 63 | # # └─────┴─────┘ 64 | # 65 | # @example Overwrite first column name and dtype 66 | # Polars.from_hashes(data, schema: {"c" => :i32}) 67 | # # => 68 | # # shape: (3, 2) 69 | # # ┌─────┬─────┐ 70 | # # │ c ┆ b │ 71 | # # │ --- ┆ --- │ 72 | # # │ i32 ┆ i64 │ 73 | # # ╞═════╪═════╡ 74 | # # │ 1 ┆ 4 │ 75 | # # │ 2 ┆ 5 │ 76 | # # │ 3 ┆ 6 │ 77 | # # └─────┴─────┘ 78 | # 79 | # @example Let polars infer the dtypes but inform about a 3rd column 80 | # Polars.from_hashes(data, schema: {"a" => :unknown, "b" => :unknown, "c" => :i32}) 81 | # # shape: (3, 3) 82 | # # ┌─────┬─────┬──────┐ 83 | # # │ a ┆ b ┆ c │ 84 | # # │ --- ┆ --- ┆ --- │ 85 | # # │ i64 ┆ i64 ┆ i32 │ 86 | # # ╞═════╪═════╪══════╡ 87 | # # │ 1 ┆ 4 ┆ null │ 88 | # # │ 2 ┆ 5 ┆ null │ 89 | # # │ 3 ┆ 6 ┆ null │ 90 | # # └─────┴─────┴──────┘ 91 | # def from_hashes(hashes, infer_schema_length: 50, schema: nil) 92 | # DataFrame._from_hashes(hashes, infer_schema_length: infer_schema_length, schema: schema) 93 | # end 94 | 95 | # def from_records 96 | # end 97 | end 98 | end 99 | -------------------------------------------------------------------------------- /lib/polars/data_type_group.rb: -------------------------------------------------------------------------------- 1 | module Polars 2 | class DataTypeGroup < Set 3 | end 4 | 5 | SIGNED_INTEGER_DTYPES = DataTypeGroup.new( 6 | [ 7 | Int8, 8 | Int16, 9 | Int32, 10 | Int64 11 | ] 12 | ) 13 | UNSIGNED_INTEGER_DTYPES = DataTypeGroup.new( 14 | [ 15 | UInt8, 16 | UInt16, 17 | UInt32, 18 | UInt64 19 | ] 20 | ) 21 | INTEGER_DTYPES = ( 22 | SIGNED_INTEGER_DTYPES | UNSIGNED_INTEGER_DTYPES 23 | ) 24 | FLOAT_DTYPES = DataTypeGroup.new([Float32, Float64]) 25 | NUMERIC_DTYPES = DataTypeGroup.new( 26 | FLOAT_DTYPES + INTEGER_DTYPES | [Decimal] 27 | ) 28 | end 29 | -------------------------------------------------------------------------------- /lib/polars/dynamic_group_by.rb: -------------------------------------------------------------------------------- 1 | module Polars 2 | # A dynamic grouper. 3 | # 4 | # This has an `.agg` method which allows you to run all polars expressions in a 5 | # group by context. 6 | class DynamicGroupBy 7 | def initialize( 8 | df, 9 | index_column, 10 | every, 11 | period, 12 | offset, 13 | truncate, 14 | include_boundaries, 15 | closed, 16 | by, 17 | start_by 18 | ) 19 | period = Utils.parse_as_duration_string(period) 20 | offset = Utils.parse_as_duration_string(offset) 21 | every = Utils.parse_as_duration_string(every) 22 | 23 | @df = df 24 | @time_column = index_column 25 | @every = every 26 | @period = period 27 | @offset = offset 28 | @truncate = truncate 29 | @include_boundaries = include_boundaries 30 | @closed = closed 31 | @by = by 32 | @start_by = start_by 33 | end 34 | 35 | def agg(*aggs, **named_aggs) 36 | @df.lazy 37 | .group_by_dynamic( 38 | @time_column, 39 | every: @every, 40 | period: @period, 41 | offset: @offset, 42 | truncate: @truncate, 43 | include_boundaries: @include_boundaries, 44 | closed: @closed, 45 | by: @by, 46 | start_by: @start_by 47 | ) 48 | .agg(*aggs, **named_aggs) 49 | .collect(no_optimization: true, string_cache: false) 50 | end 51 | end 52 | end 53 | -------------------------------------------------------------------------------- /lib/polars/exceptions.rb: -------------------------------------------------------------------------------- 1 | module Polars 2 | # @private 3 | # Base class for all Polars errors. 4 | class Error < StandardError; end 5 | 6 | # @private 7 | # Exception raised when an operation is not allowed (or possible) against a given object or data structure. 8 | class InvalidOperationError < Error; end 9 | 10 | # @private 11 | # Exception raised when an unsupported testing assert is made. 12 | class InvalidAssert < Error; end 13 | 14 | # @private 15 | # Exception raised when the number of returned rows does not match expectation. 16 | class RowsException < Error; end 17 | 18 | # @private 19 | # Exception raised when no rows are returned, but at least one row is expected. 20 | class NoRowsReturned < RowsException; end 21 | 22 | # @private 23 | # Exception raised when more rows than expected are returned. 24 | class TooManyRowsReturned < RowsException; end 25 | 26 | # @private 27 | class AssertionError < Error; end 28 | 29 | # @private 30 | class ComputeError < Error; end 31 | 32 | # @private 33 | class Todo < Error 34 | def message 35 | "not implemented yet" 36 | end 37 | end 38 | end 39 | -------------------------------------------------------------------------------- /lib/polars/expr_dispatch.rb: -------------------------------------------------------------------------------- 1 | module Polars 2 | # @private 3 | module ExprDispatch 4 | private 5 | 6 | def self.included(base) 7 | base.attr_accessor :_s 8 | base.singleton_class.attr_accessor :_accessor 9 | end 10 | 11 | def method_missing(method, ...) 12 | return super unless self.class.method_defined?(method) 13 | 14 | namespace = self.class._accessor 15 | 16 | s = Utils.wrap_s(_s) 17 | expr = F.col(s.name) 18 | expr = expr.send(namespace) if namespace 19 | s.to_frame.select(expr.send(method, ...)).to_series 20 | end 21 | end 22 | end 23 | -------------------------------------------------------------------------------- /lib/polars/functions/col.rb: -------------------------------------------------------------------------------- 1 | module Polars 2 | module Functions 3 | # Return an expression representing a column in a DataFrame. 4 | # 5 | # @return [Expr] 6 | def col(name, *more_names) 7 | if more_names.any? 8 | if Utils.strlike?(name) 9 | names_str = [name] 10 | names_str.concat(more_names) 11 | return Utils.wrap_expr(Plr.cols(names_str.map(&:to_s))) 12 | elsif Utils.is_polars_dtype(name) 13 | dtypes = [name] 14 | dtypes.concat(more_names) 15 | return Utils.wrap_expr(Plr.dtype_cols(dtypes)) 16 | else 17 | msg = "invalid input for `col`\n\nExpected `str` or `DataType`, got #{name.class.name}." 18 | raise TypeError, msg 19 | end 20 | end 21 | 22 | if Utils.strlike?(name) 23 | Utils.wrap_expr(Plr.col(name.to_s)) 24 | elsif Utils.is_polars_dtype(name) 25 | Utils.wrap_expr(Plr.dtype_cols([name])) 26 | elsif name.is_a?(::Array) || name.is_a?(::Set) 27 | names = Array(name) 28 | if names.empty? 29 | return Utils.wrap_expr(Plr.cols(names)) 30 | end 31 | 32 | item = names[0] 33 | if Utils.strlike?(item) 34 | Utils.wrap_expr(Plr.cols(names.map(&:to_s))) 35 | elsif Utils.is_polars_dtype(item) 36 | Utils.wrap_expr(Plr.dtype_cols(names)) 37 | else 38 | msg = "invalid input for `col`\n\nExpected iterable of type `str` or `DataType`, got iterable of type #{item.class.name}." 39 | raise TypeError, msg 40 | end 41 | else 42 | msg = "invalid input for `col`\n\nExpected `str` or `DataType`, got #{name.class.name}." 43 | raise TypeError, msg 44 | end 45 | end 46 | end 47 | end 48 | -------------------------------------------------------------------------------- /lib/polars/functions/len.rb: -------------------------------------------------------------------------------- 1 | module Polars 2 | module Functions 3 | # Return the number of rows in the context. 4 | # 5 | # This is similar to `COUNT(*)` in SQL. 6 | # 7 | # @return [Expr] 8 | # 9 | # @example 10 | # df = Polars::DataFrame.new( 11 | # { 12 | # "a" => [1, 2, nil], 13 | # "b" => [3, nil, nil], 14 | # "c" => ["foo", "bar", "foo"] 15 | # } 16 | # ) 17 | # df.select(Polars.len) 18 | # # => 19 | # # shape: (1, 1) 20 | # # ┌─────┐ 21 | # # │ len │ 22 | # # │ --- │ 23 | # # │ u32 │ 24 | # # ╞═════╡ 25 | # # │ 3 │ 26 | # # └─────┘ 27 | # 28 | # @example Generate an index column by using `len` in conjunction with `int_range`. 29 | # df.select([ 30 | # Polars.int_range(Polars.len, dtype: Polars::UInt32).alias("index"), 31 | # Polars.all 32 | # ]) 33 | # # => 34 | # # shape: (3, 4) 35 | # # ┌───────┬──────┬──────┬─────┐ 36 | # # │ index ┆ a ┆ b ┆ c │ 37 | # # │ --- ┆ --- ┆ --- ┆ --- │ 38 | # # │ u32 ┆ i64 ┆ i64 ┆ str │ 39 | # # ╞═══════╪══════╪══════╪═════╡ 40 | # # │ 0 ┆ 1 ┆ 3 ┆ foo │ 41 | # # │ 1 ┆ 2 ┆ null ┆ bar │ 42 | # # │ 2 ┆ null ┆ null ┆ foo │ 43 | # # └───────┴──────┴──────┴─────┘ 44 | def len 45 | Utils.wrap_expr(Plr.len) 46 | end 47 | alias_method :length, :len 48 | end 49 | end 50 | -------------------------------------------------------------------------------- /lib/polars/functions/lit.rb: -------------------------------------------------------------------------------- 1 | module Polars 2 | module Functions 3 | # Return an expression representing a literal value. 4 | # 5 | # @return [Expr] 6 | def lit(value, dtype: nil, allow_object: nil) 7 | if value.is_a?(::Time) || value.is_a?(::DateTime) 8 | time_unit = dtype&.time_unit || "ns" 9 | time_zone = dtype.&time_zone 10 | e = lit(Utils.datetime_to_int(value, time_unit)).cast(Datetime.new(time_unit)) 11 | if time_zone 12 | return e.dt.replace_time_zone(time_zone.to_s) 13 | else 14 | return e 15 | end 16 | elsif value.is_a?(::Date) 17 | return lit(::Time.utc(value.year, value.month, value.day)).cast(Date) 18 | elsif value.is_a?(Polars::Series) 19 | value = value._s 20 | return Utils.wrap_expr(Plr.lit(value, allow_object, false)) 21 | elsif (defined?(Numo::NArray) && value.is_a?(Numo::NArray)) || value.is_a?(::Array) 22 | return Utils.wrap_expr(Plr.lit(Series.new("literal", [value.to_a], dtype: dtype)._s, allow_object, true)) 23 | elsif dtype 24 | return Utils.wrap_expr(Plr.lit(value, allow_object, true)).cast(dtype) 25 | end 26 | 27 | Utils.wrap_expr(Plr.lit(value, allow_object, true)) 28 | end 29 | end 30 | end 31 | -------------------------------------------------------------------------------- /lib/polars/functions/random.rb: -------------------------------------------------------------------------------- 1 | module Polars 2 | module Functions 3 | # Set the global random seed for Polars. 4 | # 5 | # This random seed is used to determine things such as shuffle ordering. 6 | # 7 | # @param seed [Integer] 8 | # A non-negative integer < 2**64 used to seed the internal global 9 | # random number generator. 10 | # 11 | # @return [nil] 12 | def set_random_seed(seed) 13 | Plr.set_random_seed(seed) 14 | end 15 | end 16 | end 17 | -------------------------------------------------------------------------------- /lib/polars/functions/range/int_range.rb: -------------------------------------------------------------------------------- 1 | module Polars 2 | module Functions 3 | # Create a range expression (or Series). 4 | # 5 | # This can be used in a `select`, `with_column`, etc. Be sure that the resulting 6 | # range size is equal to the length of the DataFrame you are collecting. 7 | # 8 | # @param start [Integer, Expr, Series] 9 | # Lower bound of range. 10 | # @param stop [Integer, Expr, Series] 11 | # Upper bound of range. 12 | # @param step [Integer] 13 | # Step size of the range. 14 | # @param eager [Boolean] 15 | # If eager evaluation is `True`, a Series is returned instead of an Expr. 16 | # @param dtype [Symbol] 17 | # Apply an explicit integer dtype to the resulting expression (default is `Int64`). 18 | # 19 | # @return [Expr, Series] 20 | # 21 | # @example 22 | # Polars.arange(0, 3, eager: true) 23 | # # => 24 | # # shape: (3,) 25 | # # Series: 'arange' [i64] 26 | # # [ 27 | # # 0 28 | # # 1 29 | # # 2 30 | # # ] 31 | def int_range(start, stop = nil, step: 1, eager: false, dtype: nil) 32 | if stop.nil? 33 | stop = start 34 | start = 0 35 | end 36 | 37 | start = Utils.parse_into_expression(start) 38 | stop = Utils.parse_into_expression(stop) 39 | dtype ||= Int64 40 | dtype = dtype.to_s if dtype.is_a?(Symbol) 41 | result = Utils.wrap_expr(Plr.int_range(start, stop, step, dtype)).alias("arange") 42 | 43 | if eager 44 | return select(result).to_series 45 | end 46 | 47 | result 48 | end 49 | alias_method :arange, :int_range 50 | end 51 | end 52 | -------------------------------------------------------------------------------- /lib/polars/functions/range/time_range.rb: -------------------------------------------------------------------------------- 1 | module Polars 2 | module Functions 3 | # Generate a time range. 4 | # 5 | # @param start [Object] 6 | # Lower bound of the time range. 7 | # @param stop [Object] 8 | # Upper bound of the time range. 9 | # @param interval [String] 10 | # Interval of the range periods, specified using the Polars duration string language. 11 | # @param closed ['both', 'left', 'right', 'none'] 12 | # Define which sides of the range are closed (inclusive). 13 | # @param eager [Boolean] 14 | # Evaluate immediately and return a `Series`. 15 | # If set to `False` (default), return an expression instead. 16 | # 17 | # @return [Object] 18 | # 19 | # @example 20 | # Polars.time_range( 21 | # Time.utc(2000, 1, 1, 14, 0), 22 | # nil, 23 | # "3h15m", 24 | # eager: true 25 | # ).alias("time") 26 | # # => 27 | # # shape: (4,) 28 | # # Series: 'time' [time] 29 | # # [ 30 | # # 14:00:00 31 | # # 17:15:00 32 | # # 20:30:00 33 | # # 23:45:00 34 | # # ] 35 | def time_range( 36 | start = nil, 37 | stop = nil, 38 | interval = "1h", 39 | closed: "both", 40 | eager: false 41 | ) 42 | interval = Utils.parse_interval_argument(interval) 43 | ["y", "mo", "w", "d"].each do |unit| 44 | if interval.include?(unit) 45 | msg = "invalid interval unit for time_range: found #{unit.inspect}" 46 | raise ArgumentError, msg 47 | end 48 | end 49 | 50 | if start.nil? 51 | # date part is ignored 52 | start = ::Time.utc(2000, 1, 1, 0, 0, 0) 53 | end 54 | if stop.nil? 55 | # date part is ignored 56 | stop = ::Time.utc(2000, 1, 1, 23, 59, 59, 999999) 57 | end 58 | 59 | start_rbexpr = Utils.parse_into_expression(start) 60 | end_rbexpr = Utils.parse_into_expression(stop) 61 | 62 | result = Utils.wrap_expr(Plr.time_range(start_rbexpr, end_rbexpr, interval, closed)) 63 | 64 | if eager 65 | return Polars.select(result).to_series 66 | end 67 | 68 | result 69 | end 70 | 71 | # Create a column of time ranges. 72 | # 73 | # @param start [Object] 74 | # Lower bound of the time range. 75 | # @param stop [Object] 76 | # Upper bound of the time range. 77 | # @param interval [Integer] 78 | # Interval of the range periods, specified using the Polars duration string language. 79 | # @param closed ['both', 'left', 'right', 'none'] 80 | # Define which sides of the range are closed (inclusive). 81 | # @param eager [Boolean] 82 | # Evaluate immediately and return a `Series`. 83 | # If set to `false` (default), return an expression instead. 84 | # 85 | # @return [Object] 86 | # 87 | # @example 88 | # df = Polars::DataFrame.new( 89 | # { 90 | # "start" => [Time.utc(2000, 1, 1, 9, 0), Time.utc(2000, 1, 1, 10, 0)], 91 | # "end" => Time.utc(2000, 1, 1, 11, 0) 92 | # } 93 | # ) 94 | # df.select(time_range: Polars.time_ranges("start", "end")) 95 | # # => 96 | # # shape: (2, 1) 97 | # # ┌────────────────────────────────┐ 98 | # # │ time_range │ 99 | # # │ --- │ 100 | # # │ list[time] │ 101 | # # ╞════════════════════════════════╡ 102 | # # │ [09:00:00, 10:00:00, 11:00:00] │ 103 | # # │ [10:00:00, 11:00:00] │ 104 | # # └────────────────────────────────┘ 105 | def time_ranges( 106 | start = nil, 107 | stop = nil, 108 | interval = "1h", 109 | closed: "both", 110 | eager: false 111 | ) 112 | interval = Utils.parse_interval_argument(interval) 113 | ["y", "mo", "w", "d"].each do |unit| 114 | if interval.include?(unit) 115 | msg = "invalid interval unit for time_range: found #{unit.inspect}" 116 | raise ArgumentError, msg 117 | end 118 | end 119 | 120 | if start.nil? 121 | # date part is ignored 122 | start = ::Time.utc(2000, 1, 1, 0, 0, 0) 123 | end 124 | if stop.nil? 125 | # date part is ignored 126 | stop = ::Time.utc(2000, 1, 1, 23, 59, 59, 999999) 127 | end 128 | 129 | start_rbexpr = Utils.parse_into_expression(start) 130 | end_rbexpr = Utils.parse_into_expression(stop) 131 | 132 | result = Utils.wrap_expr(Plr.time_ranges(start_rbexpr, end_rbexpr, interval, closed)) 133 | 134 | if eager 135 | return Polars.select(result).to_series 136 | end 137 | 138 | result 139 | end 140 | end 141 | end 142 | -------------------------------------------------------------------------------- /lib/polars/functions/repeat.rb: -------------------------------------------------------------------------------- 1 | module Polars 2 | module Functions 3 | # Repeat a single value n times. 4 | # 5 | # @param value [Object] 6 | # Value to repeat. 7 | # @param n [Integer] 8 | # Repeat `n` times. 9 | # @param eager [Boolean] 10 | # Run eagerly and collect into a `Series`. 11 | # @param name [String] 12 | # Only used in `eager` mode. As expression, use `alias`. 13 | # 14 | # @return [Object] 15 | # 16 | # @example Construct a column with a repeated value in a lazy context. 17 | # Polars.select(Polars.repeat("z", 3)).to_series 18 | # # => 19 | # # shape: (3,) 20 | # # Series: 'repeat' [str] 21 | # # [ 22 | # # "z" 23 | # # "z" 24 | # # "z" 25 | # # ] 26 | # 27 | # @example Generate a Series directly by setting `eager: true`. 28 | # Polars.repeat(3, 3, dtype: Polars::Int8, eager: true) 29 | # # => 30 | # # shape: (3,) 31 | # # Series: 'repeat' [i8] 32 | # # [ 33 | # # 3 34 | # # 3 35 | # # 3 36 | # # ] 37 | def repeat(value, n, dtype: nil, eager: false, name: nil) 38 | if !name.nil? 39 | warn "the `name` argument is deprecated. Use the `alias` method instead." 40 | end 41 | 42 | if n.is_a?(Integer) 43 | n = lit(n) 44 | end 45 | 46 | value = Utils.parse_into_expression(value, str_as_lit: true) 47 | expr = Utils.wrap_expr(Plr.repeat(value, n._rbexpr, dtype)) 48 | if !name.nil? 49 | expr = expr.alias(name) 50 | end 51 | if eager 52 | return select(expr).to_series 53 | end 54 | expr 55 | end 56 | 57 | # Construct a column of length `n` filled with ones. 58 | # 59 | # This is syntactic sugar for the `repeat` function. 60 | # 61 | # @param n [Integer] 62 | # Length of the resulting column. 63 | # @param dtype [Object] 64 | # Data type of the resulting column. Defaults to Float64. 65 | # @param eager [Boolean] 66 | # Evaluate immediately and return a `Series`. If set to `false`, 67 | # return an expression instead. 68 | # 69 | # @return [Object] 70 | # 71 | # @example 72 | # Polars.ones(3, dtype: Polars::Int8, eager: true) 73 | # # => 74 | # # shape: (3,) 75 | # # Series: 'ones' [i8] 76 | # # [ 77 | # # 1 78 | # # 1 79 | # # 1 80 | # # ] 81 | def ones(n, dtype: nil, eager: true) 82 | if (zero = _one_or_zero_by_dtype(1, dtype)).nil? 83 | msg = "invalid dtype for `ones`; found #{dtype}" 84 | raise TypeError, msg 85 | end 86 | 87 | repeat(zero, n, dtype: dtype, eager: eager).alias("ones") 88 | end 89 | 90 | # Construct a column of length `n` filled with zeros. 91 | # 92 | # This is syntactic sugar for the `repeat` function. 93 | # 94 | # @param n [Integer] 95 | # Length of the resulting column. 96 | # @param dtype [Object] 97 | # Data type of the resulting column. Defaults to Float64. 98 | # @param eager [Boolean] 99 | # Evaluate immediately and return a `Series`. If set to `false`, 100 | # return an expression instead. 101 | # 102 | # @return [Object] 103 | # 104 | # @example 105 | # Polars.zeros(3, dtype: Polars::Int8, eager: true) 106 | # # => 107 | # # shape: (3,) 108 | # # Series: 'zeros' [i8] 109 | # # [ 110 | # # 0 111 | # # 0 112 | # # 0 113 | # # ] 114 | def zeros(n, dtype: nil, eager: true) 115 | if (zero = _one_or_zero_by_dtype(0, dtype)).nil? 116 | msg = "invalid dtype for `zeros`; found #{dtype}" 117 | raise TypeError, msg 118 | end 119 | 120 | repeat(zero, n, dtype: dtype, eager: eager).alias("zeros") 121 | end 122 | 123 | private 124 | 125 | def _one_or_zero_by_dtype(value, dtype) 126 | if dtype.integer? 127 | value 128 | elsif dtype.float? 129 | value.to_f 130 | elsif dtype == Boolean 131 | value != 0 132 | elsif dtype == Utf8 133 | value.to_s 134 | elsif dtype == Decimal 135 | Decimal(value.to_s) 136 | elsif [List, Array].include?(dtype) 137 | arr_width = dtype.respond_to?(:width) ? dtype.width : 1 138 | [_one_or_zero_by_dtype(value, dtype.inner)] * arr_width 139 | else 140 | nil 141 | end 142 | end 143 | end 144 | end 145 | -------------------------------------------------------------------------------- /lib/polars/functions/whenthen.rb: -------------------------------------------------------------------------------- 1 | module Polars 2 | module Functions 3 | # Start a "when, then, otherwise" expression. 4 | # 5 | # @return [When] 6 | # 7 | # @example Below we add a column with the value 1, where column "foo" > 2 and the value -1 where it isn't. 8 | # df = Polars::DataFrame.new({"foo" => [1, 3, 4], "bar" => [3, 4, 0]}) 9 | # df.with_column(Polars.when(Polars.col("foo") > 2).then(Polars.lit(1)).otherwise(Polars.lit(-1))) 10 | # # => 11 | # # shape: (3, 3) 12 | # # ┌─────┬─────┬─────────┐ 13 | # # │ foo ┆ bar ┆ literal │ 14 | # # │ --- ┆ --- ┆ --- │ 15 | # # │ i64 ┆ i64 ┆ i32 │ 16 | # # ╞═════╪═════╪═════════╡ 17 | # # │ 1 ┆ 3 ┆ -1 │ 18 | # # │ 3 ┆ 4 ┆ 1 │ 19 | # # │ 4 ┆ 0 ┆ 1 │ 20 | # # └─────┴─────┴─────────┘ 21 | # 22 | # @example Or with multiple when-then operations chained: 23 | # df.with_columns( 24 | # Polars.when(Polars.col("foo") > 2) 25 | # .then(1) 26 | # .when(Polars.col("bar") > 2) 27 | # .then(4) 28 | # .otherwise(-1) 29 | # .alias("val") 30 | # ) 31 | # # => 32 | # # shape: (3, 3) 33 | # # ┌─────┬─────┬─────┐ 34 | # # │ foo ┆ bar ┆ val │ 35 | # # │ --- ┆ --- ┆ --- │ 36 | # # │ i64 ┆ i64 ┆ i32 │ 37 | # # ╞═════╪═════╪═════╡ 38 | # # │ 1 ┆ 3 ┆ 4 │ 39 | # # │ 3 ┆ 4 ┆ 1 │ 40 | # # │ 4 ┆ 0 ┆ 1 │ 41 | # # └─────┴─────┴─────┘ 42 | # 43 | # @example The `otherwise` at the end is optional. If left out, any rows where none of the `when` expressions evaluate to True, are set to `null`: 44 | # df.with_columns(Polars.when(Polars.col("foo") > 2).then(1).alias("val")) 45 | # # => 46 | # # shape: (3, 3) 47 | # # ┌─────┬─────┬──────┐ 48 | # # │ foo ┆ bar ┆ val │ 49 | # # │ --- ┆ --- ┆ --- │ 50 | # # │ i64 ┆ i64 ┆ i32 │ 51 | # # ╞═════╪═════╪══════╡ 52 | # # │ 1 ┆ 3 ┆ null │ 53 | # # │ 3 ┆ 4 ┆ 1 │ 54 | # # │ 4 ┆ 0 ┆ 1 │ 55 | # # └─────┴─────┴──────┘ 56 | # 57 | # @example Pass multiple predicates, each of which must be met: 58 | # df.with_columns( 59 | # val: Polars.when( 60 | # Polars.col("bar") > 0, 61 | # Polars.col("foo") % 2 != 0 62 | # ) 63 | # .then(99) 64 | # .otherwise(-1) 65 | # ) 66 | # # => 67 | # # shape: (3, 3) 68 | # # ┌─────┬─────┬─────┐ 69 | # # │ foo ┆ bar ┆ val │ 70 | # # │ --- ┆ --- ┆ --- │ 71 | # # │ i64 ┆ i64 ┆ i32 │ 72 | # # ╞═════╪═════╪═════╡ 73 | # # │ 1 ┆ 3 ┆ 99 │ 74 | # # │ 3 ┆ 4 ┆ 99 │ 75 | # # │ 4 ┆ 0 ┆ -1 │ 76 | # # └─────┴─────┴─────┘ 77 | # 78 | # @example Pass conditions as keyword arguments: 79 | # df.with_columns(val: Polars.when(foo: 4, bar: 0).then(99).otherwise(-1)) 80 | # # => 81 | # # shape: (3, 3) 82 | # # ┌─────┬─────┬─────┐ 83 | # # │ foo ┆ bar ┆ val │ 84 | # # │ --- ┆ --- ┆ --- │ 85 | # # │ i64 ┆ i64 ┆ i32 │ 86 | # # ╞═════╪═════╪═════╡ 87 | # # │ 1 ┆ 3 ┆ -1 │ 88 | # # │ 3 ┆ 4 ┆ -1 │ 89 | # # │ 4 ┆ 0 ┆ 99 │ 90 | # # └─────┴─────┴─────┘ 91 | def when(*predicates, **constraints) 92 | condition = Utils.parse_predicates_constraints_into_expression(*predicates, **constraints) 93 | When.new(Plr.when(condition)) 94 | end 95 | end 96 | end 97 | -------------------------------------------------------------------------------- /lib/polars/io/avro.rb: -------------------------------------------------------------------------------- 1 | module Polars 2 | module IO 3 | # Read into a DataFrame from Apache Avro format. 4 | # 5 | # @param source [Object] 6 | # Path to a file or a file-like object. 7 | # @param columns [Object] 8 | # Columns to select. Accepts a list of column indices (starting at zero) or a list 9 | # of column names. 10 | # @param n_rows [Integer] 11 | # Stop reading from Apache Avro file after reading ``n_rows``. 12 | # 13 | # @return [DataFrame] 14 | def read_avro(source, columns: nil, n_rows: nil) 15 | if Utils.pathlike?(source) 16 | source = Utils.normalize_filepath(source) 17 | end 18 | projection, column_names = Utils.handle_projection_columns(columns) 19 | 20 | rbdf = RbDataFrame.read_avro(source, column_names, projection, n_rows) 21 | Utils.wrap_df(rbdf) 22 | end 23 | end 24 | end 25 | -------------------------------------------------------------------------------- /lib/polars/io/database.rb: -------------------------------------------------------------------------------- 1 | module Polars 2 | module IO 3 | # Read a SQL query into a DataFrame. 4 | # 5 | # @param query [Object] 6 | # ActiveRecord::Relation or ActiveRecord::Result. 7 | # @param schema_overrides [Hash] 8 | # A hash mapping column names to dtypes, used to override the schema 9 | # inferred from the query. 10 | # 11 | # @return [DataFrame] 12 | def read_database(query, schema_overrides: nil) 13 | if !defined?(ActiveRecord) 14 | raise Error, "Active Record not available" 15 | end 16 | 17 | result = 18 | if query.is_a?(ActiveRecord::Result) 19 | query 20 | elsif query.is_a?(ActiveRecord::Relation) 21 | query.connection_pool.with_connection { |c| c.select_all(query.to_sql) } 22 | elsif query.is_a?(::String) 23 | ActiveRecord::Base.connection_pool.with_connection { |c| c.select_all(query) } 24 | else 25 | raise ArgumentError, "Expected ActiveRecord::Relation, ActiveRecord::Result, or String" 26 | end 27 | 28 | data = {} 29 | schema_overrides = (schema_overrides || {}).transform_keys(&:to_s) 30 | 31 | result.columns.each_with_index do |k, i| 32 | column_type = result.column_types[i] 33 | 34 | data[k] = 35 | if column_type 36 | result.rows.map { |r| column_type.deserialize(r[i]) } 37 | else 38 | result.rows.map { |r| r[i] } 39 | end 40 | 41 | polars_type = 42 | case column_type&.type 43 | when :binary 44 | Binary 45 | when :boolean 46 | Boolean 47 | when :date 48 | Date 49 | when :datetime, :timestamp 50 | Datetime 51 | when :decimal 52 | Decimal 53 | when :float 54 | # TODO uncomment in future release 55 | # if column_type.limit && column_type.limit <= 24 56 | # Float32 57 | # else 58 | # Float64 59 | # end 60 | Float64 61 | when :integer 62 | # TODO uncomment in future release 63 | # case column_type.limit 64 | # when 1 65 | # Int8 66 | # when 2 67 | # Int16 68 | # when 4 69 | # Int32 70 | # else 71 | # Int64 72 | # end 73 | Int64 74 | when :string, :text 75 | String 76 | when :time 77 | Time 78 | # TODO fix issue with null 79 | # when :json, :jsonb 80 | # Struct 81 | end 82 | 83 | schema_overrides[k] ||= polars_type if polars_type 84 | end 85 | 86 | DataFrame.new(data, schema_overrides: schema_overrides) 87 | end 88 | alias_method :read_sql, :read_database 89 | end 90 | end 91 | -------------------------------------------------------------------------------- /lib/polars/io/delta.rb: -------------------------------------------------------------------------------- 1 | module Polars 2 | module IO 3 | # Reads into a DataFrame from a Delta lake table. 4 | # 5 | # @param source [Object] 6 | # DeltaTable or a Path or URI to the root of the Delta lake table. 7 | # @param version [Object] 8 | # Numerical version or timestamp version of the Delta lake table. 9 | # @param columns [Array] 10 | # Columns to select. Accepts a list of column names. 11 | # @param rechunk [Boolean] 12 | # Make sure that all columns are contiguous in memory by 13 | # aggregating the chunks into a single array. 14 | # @param storage_options [Hash] 15 | # Extra options for the storage backends supported by `deltalake-rb`. 16 | # @param delta_table_options [Hash] 17 | # Additional keyword arguments while reading a Delta lake Table. 18 | # 19 | # @return [DataFrame] 20 | def read_delta( 21 | source, 22 | version: nil, 23 | columns: nil, 24 | rechunk: false, 25 | storage_options: nil, 26 | delta_table_options: nil 27 | ) 28 | dl_tbl = 29 | _get_delta_lake_table( 30 | source, 31 | version: version, 32 | storage_options: storage_options, 33 | delta_table_options: delta_table_options 34 | ) 35 | 36 | dl_tbl.to_polars(columns: columns, rechunk: rechunk) 37 | end 38 | 39 | # Lazily read from a Delta lake table. 40 | # 41 | # @param source [Object] 42 | # DeltaTable or a Path or URI to the root of the Delta lake table. 43 | # @param version [Object] 44 | # Numerical version or timestamp version of the Delta lake table. 45 | # @param storage_options [Hash] 46 | # Extra options for the storage backends supported by `deltalake-rb`. 47 | # @param delta_table_options [Hash] 48 | # Additional keyword arguments while reading a Delta lake Table. 49 | # 50 | # @return [LazyFrame] 51 | def scan_delta( 52 | source, 53 | version: nil, 54 | storage_options: nil, 55 | delta_table_options: nil 56 | ) 57 | dl_tbl = 58 | _get_delta_lake_table( 59 | source, 60 | version: version, 61 | storage_options: storage_options, 62 | delta_table_options: delta_table_options 63 | ) 64 | 65 | dl_tbl.to_polars(eager: false) 66 | end 67 | 68 | private 69 | 70 | def _resolve_delta_lake_uri(table_uri, strict: true) 71 | require "uri" 72 | 73 | parsed_result = URI(table_uri) 74 | 75 | resolved_uri = 76 | if parsed_result.scheme == "" 77 | Utils.normalize_filepath(table_uri) 78 | else 79 | table_uri 80 | end 81 | 82 | resolved_uri 83 | end 84 | 85 | def _get_delta_lake_table( 86 | table_path, 87 | version: nil, 88 | storage_options: nil, 89 | delta_table_options: nil 90 | ) 91 | _check_if_delta_available 92 | 93 | if table_path.is_a?(DeltaLake::Table) 94 | return table_path 95 | end 96 | delta_table_options ||= {} 97 | resolved_uri = _resolve_delta_lake_uri(table_path) 98 | if !version.is_a?(::String) && !version.is_a?(::Time) 99 | dl_tbl = 100 | DeltaLake::Table.new( 101 | resolved_uri, 102 | version: version, 103 | storage_options: storage_options, 104 | **delta_table_options 105 | ) 106 | else 107 | dl_tbl = 108 | DeltaLake::Table.new( 109 | resolved_uri, 110 | storage_options: storage_options, 111 | **delta_table_options 112 | ) 113 | dl_tbl.load_as_version(version) 114 | end 115 | 116 | dl_tbl = DeltaLake::Table.new(table_path) 117 | dl_tbl 118 | end 119 | 120 | def _check_if_delta_available 121 | if !defined?(DeltaLake) 122 | raise Error, "Delta Lake not available" 123 | end 124 | end 125 | end 126 | end 127 | -------------------------------------------------------------------------------- /lib/polars/io/json.rb: -------------------------------------------------------------------------------- 1 | module Polars 2 | module IO 3 | # Read into a DataFrame from a JSON file. 4 | # 5 | # @param source [Object] 6 | # Path to a file or a file-like object. 7 | # 8 | # @return [DataFrame] 9 | def read_json( 10 | source, 11 | schema: nil, 12 | schema_overrides: nil, 13 | infer_schema_length: N_INFER_DEFAULT 14 | ) 15 | if Utils.pathlike?(source) 16 | source = Utils.normalize_filepath(source) 17 | end 18 | 19 | rbdf = 20 | RbDataFrame.read_json( 21 | source, 22 | infer_schema_length, 23 | schema, 24 | schema_overrides 25 | ) 26 | Utils.wrap_df(rbdf) 27 | end 28 | end 29 | end 30 | -------------------------------------------------------------------------------- /lib/polars/io/ndjson.rb: -------------------------------------------------------------------------------- 1 | module Polars 2 | module IO 3 | # Read into a DataFrame from a newline delimited JSON file. 4 | # 5 | # @param source [Object] 6 | # Path to a file or a file-like object. 7 | # 8 | # @return [DataFrame] 9 | def read_ndjson( 10 | source, 11 | schema: nil, 12 | schema_overrides: nil, 13 | ignore_errors: false 14 | ) 15 | if Utils.pathlike?(source) 16 | source = Utils.normalize_filepath(source) 17 | end 18 | 19 | rbdf = 20 | RbDataFrame.read_ndjson( 21 | source, 22 | ignore_errors, 23 | schema, 24 | schema_overrides 25 | ) 26 | Utils.wrap_df(rbdf) 27 | end 28 | 29 | # Lazily read from a newline delimited JSON file. 30 | # 31 | # This allows the query optimizer to push down predicates and projections to the scan 32 | # level, thereby potentially reducing memory overhead. 33 | # 34 | # @param source [String] 35 | # Path to a file. 36 | # @param infer_schema_length [Integer] 37 | # Infer the schema length from the first `infer_schema_length` rows. 38 | # @param batch_size [Integer] 39 | # Number of rows to read in each batch. 40 | # @param n_rows [Integer] 41 | # Stop reading from JSON file after reading `n_rows`. 42 | # @param low_memory [Boolean] 43 | # Reduce memory pressure at the expense of performance. 44 | # @param rechunk [Boolean] 45 | # Reallocate to contiguous memory when all chunks/ files are parsed. 46 | # @param row_count_name [String] 47 | # If not nil, this will insert a row count column with give name into the 48 | # DataFrame. 49 | # @param row_count_offset [Integer] 50 | # Offset to start the row_count column (only use if the name is set). 51 | # 52 | # @return [LazyFrame] 53 | def scan_ndjson( 54 | source, 55 | infer_schema_length: N_INFER_DEFAULT, 56 | batch_size: 1024, 57 | n_rows: nil, 58 | low_memory: false, 59 | rechunk: true, 60 | row_count_name: nil, 61 | row_count_offset: 0 62 | ) 63 | sources = [] 64 | if Utils.pathlike?(source) 65 | source = Utils.normalize_filepath(source) 66 | elsif source.is_a?(::Array) 67 | if Utils.is_path_or_str_sequence(source) 68 | sources = source.map { |s| Utils.normalize_filepath(s) } 69 | else 70 | sources = source 71 | end 72 | 73 | source = nil 74 | end 75 | 76 | rblf = 77 | RbLazyFrame.new_from_ndjson( 78 | source, 79 | sources, 80 | infer_schema_length, 81 | batch_size, 82 | n_rows, 83 | low_memory, 84 | rechunk, 85 | Utils.parse_row_index_args(row_count_name, row_count_offset) 86 | ) 87 | Utils.wrap_ldf(rblf) 88 | end 89 | end 90 | end 91 | -------------------------------------------------------------------------------- /lib/polars/plot.rb: -------------------------------------------------------------------------------- 1 | module Polars 2 | module Plot 3 | # Plot data. 4 | # 5 | # @return [Vega::LiteChart] 6 | def plot(x = nil, y = nil, type: nil, group: nil, stacked: nil) 7 | require "vega" 8 | 9 | raise ArgumentError, "Must specify columns" if columns.size != 2 && (!x || !y) 10 | x ||= columns[0] 11 | y ||= columns[1] 12 | type ||= begin 13 | if self[x].numeric? && self[y].numeric? 14 | "scatter" 15 | elsif self[x].utf8? && self[y].numeric? 16 | "column" 17 | elsif (self[x].dtype == Date || self[x].dtype.is_a?(Datetime)) && self[y].numeric? 18 | "line" 19 | else 20 | raise "Cannot determine type. Use the type option." 21 | end 22 | end 23 | df = self[(group.nil? ? [x, y] : [x, y, group]).map(&:to_s).uniq] 24 | data = df.rows(named: true) 25 | 26 | case type 27 | when "line", "area" 28 | x_type = 29 | if df[x].numeric? 30 | "quantitative" 31 | elsif df[x].datelike? 32 | "temporal" 33 | else 34 | "nominal" 35 | end 36 | 37 | scale = x_type == "temporal" ? {type: "utc"} : {} 38 | encoding = { 39 | x: {field: x, type: x_type, scale: scale}, 40 | y: {field: y, type: "quantitative"} 41 | } 42 | encoding[:color] = {field: group} if group 43 | 44 | Vega.lite 45 | .data(data) 46 | .mark(type: type, tooltip: true, interpolate: "cardinal", point: {size: 60}) 47 | .encoding(encoding) 48 | .config(axis: {labelFontSize: 12}) 49 | when "pie" 50 | raise ArgumentError, "Cannot use group option with pie chart" unless group.nil? 51 | 52 | Vega.lite 53 | .data(data) 54 | .mark(type: "arc", tooltip: true) 55 | .encoding( 56 | color: {field: x, type: "nominal", sort: "none", axis: {title: nil}, legend: {labelFontSize: 12}}, 57 | theta: {field: y, type: "quantitative"} 58 | ) 59 | .view(stroke: nil) 60 | when "column" 61 | encoding = { 62 | x: {field: x, type: "nominal", sort: "none", axis: {labelAngle: 0}}, 63 | y: {field: y, type: "quantitative"} 64 | } 65 | if group 66 | encoding[:color] = {field: group} 67 | encoding[:xOffset] = {field: group} unless stacked 68 | end 69 | 70 | Vega.lite 71 | .data(data) 72 | .mark(type: "bar", tooltip: true) 73 | .encoding(encoding) 74 | .config(axis: {labelFontSize: 12}) 75 | when "bar" 76 | encoding = { 77 | # TODO determine label angle 78 | y: {field: x, type: "nominal", sort: "none", axis: {labelAngle: 0}}, 79 | x: {field: y, type: "quantitative"} 80 | } 81 | if group 82 | encoding[:color] = {field: group} 83 | encoding[:yOffset] = {field: group} unless stacked 84 | end 85 | 86 | Vega.lite 87 | .data(data) 88 | .mark(type: "bar", tooltip: true) 89 | .encoding(encoding) 90 | .config(axis: {labelFontSize: 12}) 91 | when "scatter" 92 | encoding = { 93 | x: {field: x, type: "quantitative", scale: {zero: false}}, 94 | y: {field: y, type: "quantitative", scale: {zero: false}}, 95 | size: {value: 60} 96 | } 97 | encoding[:color] = {field: group} if group 98 | 99 | Vega.lite 100 | .data(data) 101 | .mark(type: "circle", tooltip: true) 102 | .encoding(encoding) 103 | .config(axis: {labelFontSize: 12}) 104 | else 105 | raise ArgumentError, "Invalid type: #{type}" 106 | end 107 | end 108 | end 109 | end 110 | -------------------------------------------------------------------------------- /lib/polars/rolling_group_by.rb: -------------------------------------------------------------------------------- 1 | module Polars 2 | # A rolling grouper. 3 | # 4 | # This has an `.agg` method which will allow you to run all polars expressions in a 5 | # group by context. 6 | class RollingGroupBy 7 | def initialize( 8 | df, 9 | index_column, 10 | period, 11 | offset, 12 | closed, 13 | group_by 14 | ) 15 | period = Utils.parse_as_duration_string(period) 16 | offset = Utils.parse_as_duration_string(offset) 17 | 18 | @df = df 19 | @time_column = index_column 20 | @period = period 21 | @offset = offset 22 | @closed = closed 23 | @group_by = group_by 24 | end 25 | 26 | def agg(*aggs, **named_aggs) 27 | @df.lazy 28 | .group_by_rolling( 29 | index_column: @time_column, period: @period, offset: @offset, closed: @closed, by: @group_by 30 | ) 31 | .agg(*aggs, **named_aggs) 32 | .collect(no_optimization: true, string_cache: false) 33 | end 34 | end 35 | end 36 | -------------------------------------------------------------------------------- /lib/polars/schema.rb: -------------------------------------------------------------------------------- 1 | module Polars 2 | class Schema 3 | def initialize(schema, check_dtypes: true) 4 | raise Todo if check_dtypes 5 | @schema = schema.to_h 6 | end 7 | 8 | def [](key) 9 | @schema[key] 10 | end 11 | 12 | def names 13 | @schema.keys 14 | end 15 | 16 | def dtypes 17 | @schema.values 18 | end 19 | 20 | def length 21 | @schema.length 22 | end 23 | 24 | def to_s 25 | "#{self.class.name}(#{@schema})" 26 | end 27 | alias_method :inspect, :to_s 28 | end 29 | end 30 | -------------------------------------------------------------------------------- /lib/polars/slice.rb: -------------------------------------------------------------------------------- 1 | module Polars 2 | # @private 3 | class Slice 4 | def initialize(obj) 5 | @obj = obj 6 | end 7 | 8 | # Apply a slice operation, taking advantage of any potential fast paths. 9 | def apply(s) 10 | # normalize slice 11 | _slice_setup(s) 12 | 13 | # check for fast-paths / single-operation calls 14 | if @slice_length == 0 15 | @obj.cleared 16 | elsif @is_unbounded && [-1, 1].include?(@stride) 17 | @stride < 0 ? @obj.reverse : @obj.clone 18 | elsif @start >= 0 && @stop >= 0 && @stride == 1 19 | @obj.slice(@start, @slice_length) 20 | elsif @stride < 0 && @slice_length == 1 21 | @obj.slice(@stop + 1, 1) 22 | else 23 | # multi-operation calls; make lazy 24 | lazyobj = _lazify(@obj) 25 | sliced = @stride > 0 ? _slice_positive(lazyobj) : _slice_negative(lazyobj) 26 | _as_original(sliced, @obj) 27 | end 28 | end 29 | 30 | private 31 | 32 | # Return lazy variant back to its original type. 33 | def _as_original(lazy, original) 34 | frame = lazy.collect 35 | original.is_a?(DataFrame) ? frame : frame.to_series 36 | end 37 | 38 | # Make lazy to ensure efficient/consistent handling. 39 | def _lazify(obj) 40 | obj.is_a?(DataFrame) ? obj.lazy : obj.to_frame.lazy 41 | end 42 | 43 | # Logic for slices with positive stride. 44 | def _slice_positive(obj) 45 | # note: at this point stride is guaranteed to be > 1 46 | obj.slice(@start, @slice_length).take_every(@stride) 47 | end 48 | 49 | # Logic for slices with negative stride. 50 | def _slice_negative(obj) 51 | stride = @stride.abs 52 | lazyslice = obj.slice(@stop + 1, @slice_length).reverse 53 | stride > 1 ? lazyslice.take_every(stride) : lazyslice 54 | end 55 | 56 | # Normalize slice bounds, identify unbounded and/or zero-length slices. 57 | def _slice_setup(s) 58 | # can normalize slice indices as we know object size 59 | obj_len = @obj.length 60 | start = if s.begin 61 | if s.begin < 0 62 | [s.begin + obj_len, 0].max 63 | else 64 | s.begin 65 | end 66 | else 67 | 0 68 | end 69 | stop = if s.end 70 | if s.end < 0 71 | s.end + (s.exclude_end? ? 0 : 1) + obj_len 72 | else 73 | s.end + (s.exclude_end? ? 0 : 1) 74 | end 75 | else 76 | obj_len 77 | end 78 | stride = 1 79 | 80 | # check if slice is actually unbounded 81 | if stride >= 1 82 | @is_unbounded = start <= 0 && stop >= obj_len 83 | else 84 | @is_unbounded = stop == -1 && start >= obj_len - 1 85 | end 86 | 87 | # determine slice length 88 | if @obj.is_empty 89 | @slice_length = 0 90 | elsif @is_unbounded 91 | @slice_length = obj_len 92 | else 93 | @slice_length = if start == stop || (stride > 0 && start > stop) || (stride < 0 && start < stop) 94 | 0 95 | else 96 | (stop - start).abs 97 | end 98 | end 99 | @start = start 100 | @stop = stop 101 | @stride = stride 102 | end 103 | end 104 | end 105 | -------------------------------------------------------------------------------- /lib/polars/string_cache.rb: -------------------------------------------------------------------------------- 1 | module Polars 2 | # Class for enabling and disabling the global string cache. 3 | # 4 | # @example Construct two Series using the same global string cache. 5 | # s1 = nil 6 | # s2 = nil 7 | # Polars::StringCache.new do 8 | # s1 = Polars::Series.new("color", ["red", "green", "red"], dtype: Polars::Categorical) 9 | # s2 = Polars::Series.new("color", ["blue", "red", "green"], dtype: Polars::Categorical) 10 | # end 11 | # 12 | # @example As both Series are constructed under the same global string cache, they can be concatenated. 13 | # Polars.concat([s1, s2]) 14 | # # => 15 | # # shape: (6,) 16 | # # Series: 'color' [cat] 17 | # # [ 18 | # # "red" 19 | # # "green" 20 | # # "red" 21 | # # "blue" 22 | # # "red" 23 | # # "green" 24 | # # ] 25 | class StringCache 26 | def initialize(&block) 27 | RbStringCacheHolder.hold(&block) 28 | end 29 | end 30 | 31 | def self.string_cache(...) 32 | StringCache.new(...) 33 | end 34 | 35 | module Functions 36 | # Enable the global string cache. 37 | # 38 | # `Categorical` columns created under the same global string cache have 39 | # the same underlying physical value when string values are equal. This allows the 40 | # columns to be concatenated or used in a join operation, for example. 41 | # 42 | # @return [nil] 43 | # 44 | # @example Construct two Series using the same global string cache. 45 | # Polars.enable_string_cache 46 | # s1 = Polars::Series.new("color", ["red", "green", "red"], dtype: Polars::Categorical) 47 | # s2 = Polars::Series.new("color", ["blue", "red", "green"], dtype: Polars::Categorical) 48 | # Polars.disable_string_cache 49 | # 50 | # @example As both Series are constructed under the same global string cache, they can be concatenated. 51 | # Polars.concat([s1, s2]) 52 | # # => 53 | # # shape: (6,) 54 | # # Series: 'color' [cat] 55 | # # [ 56 | # # "red" 57 | # # "green" 58 | # # "red" 59 | # # "blue" 60 | # # "red" 61 | # # "green" 62 | # # ] 63 | def enable_string_cache 64 | Plr.enable_string_cache 65 | end 66 | 67 | # Disable and clear the global string cache. 68 | # 69 | # @return [nil] 70 | # 71 | # @example Construct two Series using the same global string cache. 72 | # Polars.enable_string_cache 73 | # s1 = Polars::Series.new("color", ["red", "green", "red"], dtype: Polars::Categorical) 74 | # s2 = Polars::Series.new("color", ["blue", "red", "green"], dtype: Polars::Categorical) 75 | # Polars.disable_string_cache 76 | # 77 | # @example As both Series are constructed under the same global string cache, they can be concatenated. 78 | # Polars.concat([s1, s2]) 79 | # # => 80 | # # shape: (6,) 81 | # # Series: 'color' [cat] 82 | # # [ 83 | # # "red" 84 | # # "green" 85 | # # "red" 86 | # # "blue" 87 | # # "red" 88 | # # "green" 89 | # # ] 90 | def disable_string_cache 91 | Plr.disable_string_cache 92 | end 93 | 94 | # Check whether the global string cache is enabled. 95 | # 96 | # @return [Boolean] 97 | def using_string_cache 98 | Plr.using_string_cache 99 | end 100 | end 101 | end 102 | -------------------------------------------------------------------------------- /lib/polars/struct_expr.rb: -------------------------------------------------------------------------------- 1 | module Polars 2 | # Namespace for struct related expressions. 3 | class StructExpr 4 | # @private 5 | attr_accessor :_rbexpr 6 | 7 | # @private 8 | def initialize(expr) 9 | self._rbexpr = expr._rbexpr 10 | end 11 | 12 | # Retrieve one of the fields of this `Struct` as a new Series. 13 | # 14 | # @return [Expr] 15 | def [](item) 16 | if item.is_a?(::String) 17 | field(item) 18 | elsif item.is_a?(Integer) 19 | Utils.wrap_expr(_rbexpr.struct_field_by_index(item)) 20 | else 21 | raise ArgumentError, "expected type Integer or String, got #{item.class.name}" 22 | end 23 | end 24 | 25 | # Retrieve one of the fields of this `Struct` as a new Series. 26 | # 27 | # @param name [String] 28 | # Name of the field 29 | # 30 | # @return [Expr] 31 | # 32 | # @example 33 | # df = ( 34 | # Polars::DataFrame.new( 35 | # { 36 | # "int" => [1, 2], 37 | # "str" => ["a", "b"], 38 | # "bool" => [true, nil], 39 | # "list" => [[1, 2], [3]] 40 | # } 41 | # ) 42 | # .to_struct("my_struct") 43 | # .to_frame 44 | # ) 45 | # df.select(Polars.col("my_struct").struct.field("str")) 46 | # # => 47 | # # shape: (2, 1) 48 | # # ┌─────┐ 49 | # # │ str │ 50 | # # │ --- │ 51 | # # │ str │ 52 | # # ╞═════╡ 53 | # # │ a │ 54 | # # │ b │ 55 | # # └─────┘ 56 | def field(name) 57 | Utils.wrap_expr(_rbexpr.struct_field_by_name(name)) 58 | end 59 | 60 | # Rename the fields of the struct. 61 | # 62 | # @param names [Array] 63 | # New names in the order of the struct's fields 64 | # 65 | # @return [Expr] 66 | # 67 | # @example 68 | # df = ( 69 | # Polars::DataFrame.new( 70 | # { 71 | # "int" => [1, 2], 72 | # "str" => ["a", "b"], 73 | # "bool" => [true, nil], 74 | # "list" => [[1, 2], [3]] 75 | # } 76 | # ) 77 | # .to_struct("my_struct") 78 | # .to_frame 79 | # ) 80 | # df = df.with_column( 81 | # Polars.col("my_struct").struct.rename_fields(["INT", "STR", "BOOL", "LIST"]) 82 | # ) 83 | # df.select(Polars.col("my_struct").struct.field("INT")) 84 | # # => 85 | # # shape: (2, 1) 86 | # # ┌─────┐ 87 | # # │ INT │ 88 | # # │ --- │ 89 | # # │ i64 │ 90 | # # ╞═════╡ 91 | # # │ 1 │ 92 | # # │ 2 │ 93 | # # └─────┘ 94 | def rename_fields(names) 95 | Utils.wrap_expr(_rbexpr.struct_rename_fields(names)) 96 | end 97 | end 98 | end 99 | -------------------------------------------------------------------------------- /lib/polars/struct_name_space.rb: -------------------------------------------------------------------------------- 1 | module Polars 2 | # Series.struct namespace. 3 | class StructNameSpace 4 | include ExprDispatch 5 | 6 | self._accessor = "struct" 7 | 8 | # @private 9 | def initialize(series) 10 | self._s = series._s 11 | end 12 | 13 | # Retrieve one of the fields of this `Struct` as a new Series. 14 | # 15 | # @return [Series] 16 | def [](item) 17 | if item.is_a?(Integer) 18 | field(fields[item]) 19 | elsif item.is_a?(::String) 20 | field(item) 21 | else 22 | raise ArgumentError, "expected type Integer or String, got #{item.class.name}" 23 | end 24 | end 25 | 26 | # Get the names of the fields. 27 | # 28 | # @return [Array] 29 | # 30 | # @example 31 | # s = Polars::Series.new([{"a" => 1, "b" => 2}, {"a" => 3, "b" => 4}]) 32 | # s.struct.fields 33 | # # => ["a", "b"] 34 | def fields 35 | if _s.nil? 36 | [] 37 | else 38 | _s.struct_fields 39 | end 40 | end 41 | 42 | # Retrieve one of the fields of this `Struct` as a new Series. 43 | # 44 | # @param name [String] 45 | # Name of the field 46 | # 47 | # @return [Series] 48 | # 49 | # @example 50 | # s = Polars::Series.new([{"a" => 1, "b" => 2}, {"a" => 3, "b" => 4}]) 51 | # s.struct.field("a") 52 | # # => 53 | # # shape: (2,) 54 | # # Series: 'a' [i64] 55 | # # [ 56 | # # 1 57 | # # 3 58 | # # ] 59 | def field(name) 60 | super 61 | end 62 | 63 | # Rename the fields of the struct. 64 | # 65 | # @param names [Array] 66 | # New names in the order of the struct's fields 67 | # 68 | # @return [Series] 69 | # 70 | # @example 71 | # s = Polars::Series.new([{"a" => 1, "b" => 2}, {"a" => 3, "b" => 4}]) 72 | # s.struct.fields 73 | # # => ["a", "b"] 74 | # 75 | # @example 76 | # s = s.struct.rename_fields(["c", "d"]) 77 | # s.struct.fields 78 | # # => ["c", "d"] 79 | def rename_fields(names) 80 | super 81 | end 82 | 83 | # Get the struct definition as a name/dtype schema dict. 84 | # 85 | # @return [Object] 86 | # 87 | # @example 88 | # s = Polars::Series.new([{"a" => 1, "b" => 2}, {"a" => 3, "b" => 4}]) 89 | # s.struct.schema 90 | # # => {"a"=>Polars::Int64, "b"=>Polars::Int64} 91 | def schema 92 | if _s.nil? 93 | {} 94 | else 95 | _s.dtype.to_schema 96 | end 97 | end 98 | 99 | # Convert this struct Series to a DataFrame with a separate column for each field. 100 | # 101 | # @return [DataFrame] 102 | # 103 | # @example 104 | # s = Polars::Series.new([{"a" => 1, "b" => 2}, {"a" => 3, "b" => 4}]) 105 | # s.struct.unnest 106 | # # => 107 | # # shape: (2, 2) 108 | # # ┌─────┬─────┐ 109 | # # │ a ┆ b │ 110 | # # │ --- ┆ --- │ 111 | # # │ i64 ┆ i64 │ 112 | # # ╞═════╪═════╡ 113 | # # │ 1 ┆ 2 │ 114 | # # │ 3 ┆ 4 │ 115 | # # └─────┴─────┘ 116 | def unnest 117 | Utils.wrap_df(_s.struct_unnest) 118 | end 119 | end 120 | end 121 | -------------------------------------------------------------------------------- /lib/polars/utils/constants.rb: -------------------------------------------------------------------------------- 1 | module Polars 2 | module Utils 3 | SECONDS_PER_DAY = 86_400 4 | SECONDS_PER_HOUR = 3_600 5 | NS_PER_SECOND = 1_000_000_000 6 | US_PER_SECOND = 1_000_000 7 | MS_PER_SECOND = 1_000 8 | end 9 | end 10 | -------------------------------------------------------------------------------- /lib/polars/utils/convert.rb: -------------------------------------------------------------------------------- 1 | module Polars 2 | module Utils 3 | def self.parse_as_duration_string(td) 4 | if td.nil? || td.is_a?(::String) 5 | return td 6 | end 7 | _timedelta_to_duration_string(td) 8 | end 9 | 10 | def self._timedelta_to_pl_duration(td) 11 | td 12 | end 13 | 14 | def self.negate_duration_string(duration) 15 | if duration.start_with?("-") 16 | duration[1..] 17 | else 18 | "-#{duration}" 19 | end 20 | end 21 | 22 | def self.date_to_int(d) 23 | dt = d.to_datetime.to_time 24 | dt.to_i / SECONDS_PER_DAY 25 | end 26 | 27 | def self.datetime_to_int(dt, time_unit) 28 | dt = dt.to_datetime.to_time 29 | if time_unit == "ns" 30 | nanos = dt.nsec 31 | dt.to_i * NS_PER_SECOND + nanos 32 | elsif time_unit == "us" 33 | micros = dt.usec 34 | dt.to_i * US_PER_SECOND + micros 35 | elsif time_unit == "ms" 36 | millis = dt.usec / 1000 37 | dt.to_i * MS_PER_SECOND + millis 38 | elsif time_unit.nil? 39 | # Ruby has ns precision 40 | nanos = dt.nsec 41 | dt.to_i * NS_PER_SECOND + nanos 42 | else 43 | raise ArgumentError, "time_unit must be one of {{'ns', 'us', 'ms'}}, got #{tu}" 44 | end 45 | end 46 | 47 | def self._to_ruby_date(value) 48 | # days to seconds 49 | # important to create from utc. Not doing this leads 50 | # to inconsistencies dependent on the timezone you are in. 51 | ::Time.at(value * 86400).utc.to_date 52 | end 53 | 54 | def self._to_ruby_time(value) 55 | if value == 0 56 | ::Time.utc(2000, 1, 1) 57 | else 58 | seconds, nanoseconds = value.divmod(1_000_000_000) 59 | minutes, seconds = seconds.divmod(60) 60 | hours, minutes = minutes.divmod(60) 61 | ::Time.utc(2000, 1, 1, hours, minutes, seconds, nanoseconds / 1000.0) 62 | end 63 | end 64 | 65 | def self._to_ruby_datetime(value, time_unit = "ns", time_zone = nil) 66 | if time_zone.nil? || time_zone == "" || time_zone == "UTC" 67 | if time_unit == "ns" 68 | ::Time.at(value / 1000000000, value % 1000000000, :nsec).utc 69 | elsif time_unit == "us" 70 | ::Time.at(value / 1000000, value % 1000000, :usec).utc 71 | elsif time_unit == "ms" 72 | ::Time.at(value / 1000, value % 1000, :millisecond).utc 73 | else 74 | raise ArgumentError, "time_unit must be one of {{'ns', 'us', 'ms'}}, got #{time_unit}" 75 | end 76 | else 77 | raise Todo 78 | end 79 | end 80 | 81 | def self._to_ruby_duration(value, time_unit = "ns") 82 | if time_unit == "ns" 83 | value / 1e9 84 | elsif time_unit == "us" 85 | value / 1e6 86 | elsif time_unit == "ms" 87 | value / 1e3 88 | else 89 | raise ArgumentError, "time_unit must be one of {{'ns', 'us', 'ms'}}, got #{time_unit}" 90 | end 91 | end 92 | 93 | def self._to_ruby_decimal(digits, scale) 94 | BigDecimal("#{digits}e#{scale}") 95 | end 96 | end 97 | end 98 | -------------------------------------------------------------------------------- /lib/polars/utils/parse.rb: -------------------------------------------------------------------------------- 1 | module Polars 2 | module Utils 3 | def self.parse_into_expression( 4 | input, 5 | str_as_lit: false, 6 | list_as_series: false, 7 | structify: false, 8 | dtype: nil 9 | ) 10 | if input.is_a?(Expr) 11 | expr = input 12 | if structify 13 | expr = _structify_expression(expr) 14 | end 15 | elsif (input.is_a?(::String) || input.is_a?(Symbol)) && !str_as_lit 16 | expr = F.col(input) 17 | elsif input.is_a?(::Array) && list_as_series 18 | expr = F.lit(Series.new(input), dtype: dtype) 19 | else 20 | expr = F.lit(input, dtype: dtype) 21 | end 22 | 23 | expr._rbexpr 24 | end 25 | 26 | def self.parse_into_list_of_expressions(*inputs, __structify: false, **named_inputs) 27 | exprs = _parse_positional_inputs(inputs, structify: __structify) 28 | if named_inputs.any? 29 | named_exprs = _parse_named_inputs(named_inputs, structify: __structify) 30 | exprs.concat(named_exprs) 31 | end 32 | 33 | exprs 34 | end 35 | 36 | def self._parse_positional_inputs(inputs, structify: false) 37 | inputs_iter = _parse_inputs_as_iterable(inputs) 38 | inputs_iter.map { |e| parse_into_expression(e, structify: structify) } 39 | end 40 | 41 | def self._parse_inputs_as_iterable(inputs) 42 | if inputs.empty? 43 | return [] 44 | end 45 | 46 | if inputs.length == 1 && inputs[0].is_a?(::Array) 47 | return inputs[0] 48 | end 49 | 50 | inputs 51 | end 52 | 53 | def self._parse_named_inputs(named_inputs, structify: false) 54 | named_inputs.map do |name, input| 55 | parse_into_expression(input, structify: structify)._alias(name.to_s) 56 | end 57 | end 58 | 59 | def self.parse_predicates_constraints_into_expression(*predicates, **constraints) 60 | all_predicates = _parse_positional_inputs(predicates) 61 | 62 | if constraints.any? 63 | constraint_predicates = _parse_constraints(constraints) 64 | all_predicates.concat(constraint_predicates) 65 | end 66 | 67 | _combine_predicates(all_predicates) 68 | end 69 | 70 | def self._parse_constraints(constraints) 71 | constraints.map do |name, value| 72 | Polars.col(name).eq(value)._rbexpr 73 | end 74 | end 75 | 76 | def self._combine_predicates(predicates) 77 | if !predicates.any? 78 | msg = "at least one predicate or constraint must be provided" 79 | raise TypeError, msg 80 | end 81 | 82 | if predicates.length == 1 83 | return predicates[0] 84 | end 85 | 86 | Plr.all_horizontal(predicates) 87 | end 88 | end 89 | end 90 | -------------------------------------------------------------------------------- /lib/polars/utils/various.rb: -------------------------------------------------------------------------------- 1 | module Polars 2 | module Utils 3 | def self._process_null_values(null_values) 4 | if null_values.is_a?(Hash) 5 | null_values.to_a 6 | else 7 | null_values 8 | end 9 | end 10 | 11 | def self._is_iterable_of(val, eltype) 12 | val.all? { |x| x.is_a?(eltype) } 13 | end 14 | 15 | def self.is_path_or_str_sequence(val) 16 | val.is_a?(::Array) && val.all? { |x| pathlike?(x) } 17 | end 18 | 19 | def self.is_bool_sequence(val) 20 | val.is_a?(::Array) && val.all? { |x| x == true || x == false } 21 | end 22 | 23 | def self.is_int_sequence(val) 24 | val.is_a?(::Array) && _is_iterable_of(val, Integer) 25 | end 26 | 27 | def self.is_str_sequence(val, allow_str: false) 28 | if allow_str == false && val.is_a?(::String) 29 | false 30 | else 31 | val.is_a?(::Array) && _is_iterable_of(val, ::String) 32 | end 33 | end 34 | 35 | def self.arrlen(obj) 36 | if obj.is_a?(Range) 37 | # size only works for numeric ranges 38 | obj.to_a.length 39 | elsif obj.is_a?(::String) 40 | nil 41 | else 42 | obj.length 43 | end 44 | rescue 45 | nil 46 | end 47 | 48 | def self.normalize_filepath(path, check_not_directory: true) 49 | path = File.expand_path(path) if !path.is_a?(::String) || path.start_with?("~") 50 | if check_not_directory && File.exist?(path) && Dir.exist?(path) 51 | raise ArgumentError, "Expected a file path; #{path} is a directory" 52 | end 53 | path 54 | end 55 | 56 | def self.scale_bytes(sz, to:) 57 | scaling_factor = { 58 | "b" => 1, 59 | "k" => 1024, 60 | "m" => 1024 ** 2, 61 | "g" => 1024 ** 3, 62 | "t" => 1024 ** 4 63 | }[to[0]] 64 | if scaling_factor > 1 65 | sz / scaling_factor.to_f 66 | else 67 | sz 68 | end 69 | end 70 | 71 | def self.extend_bool(value, n_match, value_name, match_name) 72 | values = bool?(value) ? [value] * n_match : value 73 | if n_match != values.length 74 | msg = "the length of `#{value_name}` (#{values.length}) does not match the length of `#{match_name}` (#{n_match})" 75 | raise ValueError, msg 76 | end 77 | values 78 | end 79 | end 80 | end 81 | -------------------------------------------------------------------------------- /lib/polars/utils/wrap.rb: -------------------------------------------------------------------------------- 1 | module Polars 2 | module Utils 3 | def self.wrap_df(df) 4 | DataFrame._from_rbdf(df) 5 | end 6 | 7 | def self.wrap_ldf(ldf) 8 | LazyFrame._from_rbldf(ldf) 9 | end 10 | 11 | def self.wrap_s(s) 12 | Series._from_rbseries(s) 13 | end 14 | 15 | def self.wrap_expr(rbexpr) 16 | Expr._from_rbexpr(rbexpr) 17 | end 18 | end 19 | end 20 | -------------------------------------------------------------------------------- /lib/polars/version.rb: -------------------------------------------------------------------------------- 1 | module Polars 2 | # @private 3 | VERSION = "0.19.0" 4 | end 5 | -------------------------------------------------------------------------------- /lib/polars/whenthen.rb: -------------------------------------------------------------------------------- 1 | module Polars 2 | # @private 3 | class When 4 | attr_accessor :_when 5 | 6 | def initialize(rbwhen) 7 | self._when = rbwhen 8 | end 9 | 10 | def then(statement) 11 | statement_rbexpr = Utils.parse_into_expression(statement) 12 | Then.new(_when.then(statement_rbexpr)) 13 | end 14 | end 15 | 16 | # @private 17 | class Then < Expr 18 | attr_accessor :_then 19 | 20 | def initialize(rbthen) 21 | self._then = rbthen 22 | end 23 | 24 | def self._from_rbexpr(rbexpr) 25 | Utils.wrap_expr(rbexpr) 26 | end 27 | 28 | def _rbexpr 29 | _then.otherwise(Polars.lit(nil)._rbexpr) 30 | end 31 | 32 | def when(*predicates, **constraints) 33 | condition_rbexpr = Utils.parse_predicates_constraints_into_expression(*predicates, **constraints) 34 | ChainedWhen.new(_then.when(condition_rbexpr)) 35 | end 36 | 37 | def otherwise(statement) 38 | statement_rbexpr = Utils.parse_into_expression(statement) 39 | Utils.wrap_expr(_then.otherwise(statement_rbexpr)) 40 | end 41 | end 42 | 43 | # @private 44 | class ChainedWhen 45 | attr_accessor :_chained_when 46 | 47 | def initialize(chained_when) 48 | self._chained_when = chained_when 49 | end 50 | 51 | def then(statement) 52 | statement_rbexpr = Utils.parse_into_expression(statement) 53 | ChainedThen.new(_chained_when.then(statement_rbexpr)) 54 | end 55 | end 56 | 57 | # @private 58 | class ChainedThen < Expr 59 | attr_accessor :_chained_then 60 | 61 | def initialize(chained_then) 62 | self._chained_then = chained_then 63 | end 64 | 65 | def self._from_rbexpr(rbexpr) 66 | Utils.wrap_expr(rbexpr) 67 | end 68 | 69 | def _rbexpr 70 | _chained_then.otherwise(Polars.lit(nil)._rbexpr) 71 | end 72 | 73 | def when(*predicates, **constraints) 74 | condition_rbexpr = Utils.parse_predicates_constraints_into_expression(*predicates, **constraints) 75 | ChainedWhen.new(_chained_then.when(condition_rbexpr)) 76 | end 77 | 78 | def otherwise(statement) 79 | statement_rbexpr = Utils.parse_into_expression(statement) 80 | Utils.wrap_expr(_chained_then.otherwise(statement_rbexpr)) 81 | end 82 | end 83 | end 84 | -------------------------------------------------------------------------------- /polars-df.gemspec: -------------------------------------------------------------------------------- 1 | require_relative "lib/polars/version" 2 | 3 | Gem::Specification.new do |spec| 4 | spec.name = "polars-df" 5 | spec.version = Polars::VERSION 6 | spec.summary = "Blazingly fast DataFrames for Ruby" 7 | spec.homepage = "https://github.com/ankane/ruby-polars" 8 | spec.license = "MIT" 9 | 10 | spec.author = "Andrew Kane" 11 | spec.email = "andrew@ankane.org" 12 | 13 | spec.files = Dir["*.{md,txt}", "{ext,lib}/**/*", "Cargo.*", ".yardopts"] 14 | spec.require_path = "lib" 15 | spec.extensions = ["ext/polars/extconf.rb"] 16 | 17 | spec.required_ruby_version = ">= 3.2" 18 | 19 | spec.add_dependency "bigdecimal" 20 | spec.add_dependency "rb_sys" 21 | end 22 | -------------------------------------------------------------------------------- /test/array_expr_test.rb: -------------------------------------------------------------------------------- 1 | require_relative "test_helper" 2 | 3 | class ArrayExprTest < Minitest::Test 4 | def test_min 5 | assert_expr arr_expr.min 6 | end 7 | 8 | def test_max 9 | assert_expr arr_expr.max 10 | end 11 | 12 | def test_sum 13 | assert_expr arr_expr.sum 14 | end 15 | 16 | def arr_expr 17 | Polars.col("a").arr 18 | end 19 | end 20 | -------------------------------------------------------------------------------- /test/arrow_test.rb: -------------------------------------------------------------------------------- 1 | require_relative "test_helper" 2 | 3 | class ArrowTest < Minitest::Test 4 | def test_c_stream 5 | df = Polars::DataFrame.new({"a" => [1, 2, 3], "b" => ["one", "two", "three"]}) 6 | 7 | stream = df.arrow_c_stream 8 | assert_kind_of Polars::ArrowArrayStream, stream 9 | assert_kind_of Integer, stream.to_i 10 | 11 | assert_frame df, Polars::DataFrame.new(stream) 12 | 13 | error = assert_raises(ArgumentError) do 14 | Polars::DataFrame.new(stream) 15 | end 16 | assert_equal "the C stream was already released", error.message 17 | end 18 | end 19 | -------------------------------------------------------------------------------- /test/avro_test.rb: -------------------------------------------------------------------------------- 1 | require_relative "test_helper" 2 | 3 | class AvroTest < Minitest::Test 4 | def test_read_avro 5 | df = Polars.read_avro("test/support/data.avro") 6 | expected = Polars::DataFrame.new({"a" => [1, 2, 3], "b" => ["one", "two", "three"]}) 7 | assert_frame expected, df 8 | end 9 | 10 | def test_write_avro 11 | df = Polars::DataFrame.new({"a" => [1, 2, 3], "b" => ["one", "two", "three"]}) 12 | path = temp_path 13 | df.write_avro(path) 14 | assert_frame df, Polars.read_avro(path) 15 | end 16 | end 17 | -------------------------------------------------------------------------------- /test/cat_expr_test.rb: -------------------------------------------------------------------------------- 1 | require_relative "test_helper" 2 | 3 | class CatExprTest < Minitest::Test 4 | def cat_expr 5 | Polars.col("a").cat 6 | end 7 | end 8 | -------------------------------------------------------------------------------- /test/config_test.rb: -------------------------------------------------------------------------------- 1 | require_relative "test_helper" 2 | 3 | class ConfigTest < Minitest::Test 4 | def test_set_tbl_rows 5 | s = Polars::Series.new(1..100) 6 | Polars::Config.new do |cfg| 7 | cfg.set_tbl_rows(100) 8 | refute_match "…", s.inspect 9 | end 10 | assert_match "…", s.inspect 11 | end 12 | 13 | def test_state 14 | assert_empty Polars::Config.state(if_set: true, env_only: true) 15 | end 16 | 17 | def test_method 18 | s = Polars::Series.new(1..100) 19 | Polars.config do |cfg| 20 | cfg.set_tbl_rows(100) 21 | refute_match "…", s.inspect 22 | end 23 | assert_match "…", s.inspect 24 | end 25 | 26 | def test_thread_pool_size 27 | assert_kind_of Integer, Polars.thread_pool_size 28 | end 29 | end 30 | -------------------------------------------------------------------------------- /test/csv_test.rb: -------------------------------------------------------------------------------- 1 | require_relative "test_helper" 2 | 3 | class CsvTest < Minitest::Test 4 | def test_read_csv 5 | df = Polars.read_csv("test/support/data.csv") 6 | expected = Polars::DataFrame.new({"a" => [1, 2, 3], "b" => ["one", "two", "three"]}) 7 | assert_frame expected, df 8 | end 9 | 10 | def test_read_csv_file 11 | df = File.open("test/support/data.csv", "rb") { |f| Polars.read_csv(f) } 12 | expected = Polars::DataFrame.new({"a" => [1, 2, 3], "b" => ["one", "two", "three"]}) 13 | assert_frame expected, df 14 | end 15 | 16 | def test_read_csv_pathname 17 | require "pathname" 18 | 19 | df = Polars.read_csv(Pathname.new("test/support/data.csv")) 20 | expected = Polars::DataFrame.new({"a" => [1, 2, 3], "b" => ["one", "two", "three"]}) 21 | assert_frame expected, df 22 | end 23 | 24 | def test_read_csv_io 25 | io = StringIO.new(File.binread("test/support/data.csv")) 26 | df = Polars.read_csv(io) 27 | expected = Polars::DataFrame.new({"a" => [1, 2, 3], "b" => ["one", "two", "three"]}) 28 | assert_frame expected, df 29 | end 30 | 31 | def test_read_csv_uri 32 | require "uri" 33 | 34 | # skip remote call 35 | # Polars.read_csv(URI("https://...")) 36 | end 37 | 38 | def test_read_csv_http 39 | # skip remote call 40 | # Polars.read_csv("http://...") 41 | end 42 | 43 | def test_read_csv_https 44 | # skip remote call 45 | # Polars.read_csv("https://...") 46 | end 47 | 48 | def test_read_csv_glob 49 | expected = { 50 | a: [1, 2, 3, 4, 5], 51 | b: ["one", "two", "three", "four", "five"] 52 | } 53 | assert_frame expected, Polars.read_csv("test/support/data*.csv") 54 | end 55 | 56 | def test_read_csv_glob_mismatch 57 | # TODO use ComputeError 58 | error = assert_raises(Polars::Error) do 59 | Polars.read_csv("test/support/*.csv") 60 | end 61 | assert_match "schema lengths differ", error.message 62 | end 63 | 64 | def test_read_csv_batched 65 | reader = Polars.read_csv_batched("test/support/data.csv") 66 | batch = reader.next_batches(5) 67 | assert_equal 1, batch.size 68 | expected = Polars::DataFrame.new({"a" => [1, 2, 3], "b" => ["one", "two", "three"]}) 69 | assert_frame expected, batch[0] 70 | assert_nil reader.next_batches(5) 71 | end 72 | 73 | def test_scan_csv 74 | df = Polars.scan_csv("test/support/data.csv") 75 | expected = Polars::DataFrame.new({"a" => [1, 2, 3], "b" => ["one", "two", "three"]}) 76 | assert_frame expected, df.collect 77 | end 78 | 79 | def test_scan_csv_io 80 | io = StringIO.new(File.binread("test/support/data.csv")) 81 | df = Polars.scan_csv(io) 82 | expected = Polars::DataFrame.new({"a" => [1, 2, 3], "b" => ["one", "two", "three"]}) 83 | assert_frame expected, df.collect 84 | end 85 | 86 | def test_write_csv 87 | df = Polars::DataFrame.new({"a" => [1, 2, 3], "b" => ["one", "two", "three"]}) 88 | path = temp_path 89 | assert_nil df.write_csv(path) 90 | assert_equal "a,b\n1,one\n2,two\n3,three\n", File.read(path) 91 | end 92 | 93 | def test_write_csv_to_string 94 | df = Polars::DataFrame.new({"a" => [1, 2, 3], "b" => ["one", "two", "three"]}) 95 | assert_equal "a,b\n1,one\n2,two\n3,three\n", df.write_csv 96 | end 97 | 98 | def test_to_csv 99 | df = Polars::DataFrame.new({"a" => [1, 2, 3], "b" => ["one", "two", "three"]}) 100 | assert_equal "a,b\n1,one\n2,two\n3,three\n", df.to_csv 101 | end 102 | 103 | def test_sink_csv 104 | df = Polars::DataFrame.new({"a" => [1, 2, 3], "b" => ["one", "two", "three"]}) 105 | path = temp_path 106 | assert_nil df.lazy.sink_csv(path) 107 | assert_equal "a,b\n1,one\n2,two\n3,three\n", File.read(path) 108 | assert_frame df, Polars.read_csv(path) 109 | end 110 | 111 | def test_sink_csv_io 112 | df = Polars::DataFrame.new({"a" => [1, 2, 3], "b" => ["one", "two", "three"]}) 113 | io = StringIO.new 114 | assert_nil df.lazy.sink_csv(io) 115 | io.rewind 116 | assert_equal "a,b\n1,one\n2,two\n3,three\n", io.read 117 | end 118 | 119 | def test_has_header_true 120 | df = Polars.read_csv("test/support/data.csv", has_header: true) 121 | assert_equal ["a", "b"], df.columns 122 | assert_equal 3, df.height 123 | end 124 | 125 | def test_has_header_false 126 | df = Polars.read_csv("test/support/data.csv", has_header: false) 127 | assert_equal ["column_1", "column_2"], df.columns 128 | assert_equal 4, df.height 129 | end 130 | end 131 | -------------------------------------------------------------------------------- /test/date_time_expr_test.rb: -------------------------------------------------------------------------------- 1 | require_relative "test_helper" 2 | 3 | class DateTimeExprTest < Minitest::Test 4 | def test_strftime 5 | assert_expr dt_expr.strftime("fmt") 6 | end 7 | 8 | def test_year 9 | assert_expr dt_expr.year 10 | end 11 | 12 | def test_iso_year 13 | assert_expr dt_expr.iso_year 14 | end 15 | 16 | def test_quarter 17 | assert_expr dt_expr.quarter 18 | end 19 | 20 | def test_month 21 | assert_expr dt_expr.month 22 | end 23 | 24 | def test_week 25 | assert_expr dt_expr.week 26 | end 27 | 28 | def test_weekday 29 | assert_expr dt_expr.weekday 30 | end 31 | 32 | def test_day 33 | assert_expr dt_expr.day 34 | end 35 | 36 | def test_ordinal_day 37 | assert_expr dt_expr.ordinal_day 38 | end 39 | 40 | def test_hour 41 | assert_expr dt_expr.hour 42 | end 43 | 44 | def test_minute 45 | assert_expr dt_expr.minute 46 | end 47 | 48 | def test_second 49 | assert_expr dt_expr.second 50 | end 51 | 52 | def test_millisecond 53 | assert_expr dt_expr.millisecond 54 | end 55 | 56 | def test_microsecond 57 | assert_expr dt_expr.microsecond 58 | end 59 | 60 | def test_nanosecond 61 | assert_expr dt_expr.nanosecond 62 | end 63 | 64 | def test_epoch 65 | assert_expr dt_expr.epoch 66 | assert_expr dt_expr.epoch("s") 67 | assert_expr dt_expr.epoch("d") 68 | end 69 | 70 | def test_timestamp 71 | assert_expr dt_expr.timestamp 72 | end 73 | 74 | def test_with_time_unit 75 | assert_expr dt_expr.with_time_unit("us") 76 | end 77 | 78 | def test_cast_time_unit 79 | assert_expr dt_expr.cast_time_unit("us") 80 | end 81 | 82 | def test_convert_time_zone 83 | assert_expr dt_expr.convert_time_zone("Etc/UTC") 84 | end 85 | 86 | def test_replace_time_zone 87 | assert_expr dt_expr.replace_time_zone("Etc/UTC") 88 | end 89 | 90 | def test_days 91 | assert_expr dt_expr.days 92 | end 93 | 94 | def test_hours 95 | assert_expr dt_expr.hours 96 | end 97 | 98 | def test_minutes 99 | assert_expr dt_expr.minutes 100 | end 101 | 102 | def test_seconds 103 | assert_expr dt_expr.seconds 104 | end 105 | 106 | def test_milliseconds 107 | assert_expr dt_expr.milliseconds 108 | end 109 | 110 | def test_microseconds 111 | assert_expr dt_expr.microseconds 112 | end 113 | 114 | def test_nanoseconds 115 | assert_expr dt_expr.nanoseconds 116 | end 117 | 118 | def test_offset_by 119 | assert_expr dt_expr.offset_by("1y") 120 | end 121 | 122 | def dt_expr 123 | Polars.col("a").dt 124 | end 125 | end 126 | -------------------------------------------------------------------------------- /test/delta_test.rb: -------------------------------------------------------------------------------- 1 | require_relative "test_helper" 2 | 3 | class DeltaTest < Minitest::Test 4 | def setup 5 | skip unless ENV["TEST_DELTA"] 6 | end 7 | 8 | def test_read_delta 9 | df = Polars.read_delta("test/support/delta") 10 | expected = Polars::DataFrame.new({"a" => [1, 2, 3], "b" => ["one", "two", "three"]}) 11 | assert_frame expected, df 12 | end 13 | 14 | def test_read_delta_table 15 | df = Polars.read_delta(DeltaLake::Table.new("test/support/delta")) 16 | expected = Polars::DataFrame.new({"a" => [1, 2, 3], "b" => ["one", "two", "three"]}) 17 | assert_frame expected, df 18 | end 19 | 20 | def test_read_delta_columns 21 | df = Polars.read_delta("test/support/delta", columns: ["a"]) 22 | expected = Polars::DataFrame.new({"a" => [1, 2, 3]}) 23 | assert_frame expected, df 24 | end 25 | 26 | def test_read_delta_columns_order 27 | df = Polars.read_delta("test/support/delta", columns: ["b", "a"]) 28 | assert_equal ["b", "a"], df.columns 29 | end 30 | 31 | def test_scan_delta 32 | lf = Polars.scan_delta("test/support/delta") 33 | expected = Polars::DataFrame.new({"a" => [1, 2, 3], "b" => ["one", "two", "three"]}) 34 | assert_frame expected, lf.collect 35 | end 36 | 37 | def test_scan_delta_table 38 | lf = Polars.scan_delta(DeltaLake::Table.new("test/support/delta")) 39 | expected = Polars::DataFrame.new({"a" => [1, 2, 3], "b" => ["one", "two", "three"]}) 40 | assert_frame expected, lf.collect 41 | end 42 | 43 | def test_write_delta 44 | df = Polars::DataFrame.new({"a" => [1, 2, 3], "b" => ["one", "two", "three"]}) 45 | path = temp_path 46 | assert_nil df.write_delta(path) 47 | assert_equal df, Polars.read_delta(path) 48 | end 49 | 50 | def test_write_delta_mode_error 51 | df = Polars::DataFrame.new({"a" => [1, 2, 3], "b" => ["one", "two", "three"]}) 52 | path = temp_path 53 | assert_nil df.write_delta(path) 54 | assert_raises(DeltaLake::Error) do 55 | df.write_delta(path, mode: "error") 56 | end 57 | end 58 | 59 | def test_write_delta_mode_append 60 | df = Polars::DataFrame.new({"a" => [1, 2, 3], "b" => ["one", "two", "three"]}) 61 | df2 = Polars::DataFrame.new({"a" => [4, 5, 6], "b" => ["four", "five", "six"]}) 62 | path = temp_path 63 | assert_nil df.write_delta(path) 64 | assert_nil df2.write_delta(path, mode: "append") 65 | assert_equal Polars.concat([df, df2]), Polars.read_delta(path) 66 | end 67 | 68 | def test_write_delta_mode_overwrite 69 | df = Polars::DataFrame.new({"a" => [1, 2, 3], "b" => ["one", "two", "three"]}) 70 | df2 = Polars::DataFrame.new({"a" => [4, 5, 6], "b" => ["four", "five", "six"]}) 71 | path = temp_path 72 | assert_nil df.write_delta(path) 73 | assert_nil df2.write_delta(path, mode: "overwrite") 74 | assert_equal df2, Polars.read_delta(path) 75 | assert_equal df2, Polars.read_delta(path, version: 1) 76 | assert_equal df2, Polars.scan_delta(path, version: 1).collect 77 | assert_equal df, Polars.read_delta(path, version: 0) 78 | assert_equal df, Polars.scan_delta(path, version: 0).collect 79 | end 80 | 81 | def test_write_delta_mode_ignore 82 | df = Polars::DataFrame.new({"a" => [1, 2, 3], "b" => ["one", "two", "three"]}) 83 | df2 = Polars::DataFrame.new({"a" => [4, 5, 6], "b" => ["four", "five", "six"]}) 84 | path = temp_path 85 | assert_nil df.write_delta(path) 86 | assert_nil df2.write_delta(path, mode: "ignore") 87 | assert_equal df, Polars.read_delta(path) 88 | end 89 | 90 | def test_write_delta_mode_merge 91 | df = Polars::DataFrame.new({"a" => [1, 2, 3], "b" => ["one", "two", "three"]}) 92 | df2 = Polars::DataFrame.new({"a" => [2, 3, 4], "b" => ["four", "five", "six"]}) 93 | path = temp_path 94 | assert_nil df.write_delta(path) 95 | delta_merge_options = { 96 | predicate: "target.a = source.a", 97 | source_alias: "source", 98 | target_alias: "target" 99 | } 100 | df2.write_delta(path, mode: "merge", delta_merge_options: delta_merge_options) 101 | .when_matched_update({"a" => "source.a", "b" => "source.b"}) 102 | .execute 103 | expected = Polars::DataFrame.new({"a" => [2, 3, 1], "b" => ["four", "five", "one"]}) 104 | assert_equal expected, Polars.read_delta(path) 105 | end 106 | end 107 | -------------------------------------------------------------------------------- /test/expr_test.rb: -------------------------------------------------------------------------------- 1 | require_relative "test_helper" 2 | 3 | class ExprTest < Minitest::Test 4 | def test_lit 5 | assert_lit "true", Polars.lit(true) 6 | assert_lit "false", Polars.lit(false) 7 | assert_lit "null", Polars.lit(nil) 8 | assert_lit "dyn int: 1", Polars.lit(1) 9 | assert_lit "dyn int: 1.strict_cast(Int8)", Polars.lit(1, dtype: Polars::Int8) 10 | assert_lit "dyn float: 1.5", Polars.lit(1.5) 11 | assert_lit "\"hello\"", Polars.lit("hello") 12 | assert_lit "b\"hello\"", Polars.lit("hello".b) 13 | assert_lit "Series", Polars.lit(Polars::Series.new([1, 2, 3])) 14 | assert_lit "Series[a]", Polars.lit(Polars::Series.new("a", [1, 2, 3])) 15 | assert_lit "[]", Polars.lit([]) 16 | assert_lit "[1, 2, 3]", Polars.lit([1, 2, 3]) 17 | assert_lit "[1, 2, 3]", Polars.lit(Numo::NArray.cast([1, 2, 3])) 18 | assert_lit "dyn int: 1640995200000000000.strict_cast(Datetime(Nanoseconds, None)).strict_cast(Date)", Polars.lit(Date.new(2022, 1, 1)) 19 | assert_lit "dyn int: 1640995200000000000.strict_cast(Datetime(Nanoseconds, None))", Polars.lit(Time.utc(2022, 1, 1)) 20 | assert_lit "dyn int: 1640995200000000000.strict_cast(Datetime(Nanoseconds, None))", Polars.lit(DateTime.new(2022, 1, 1)) 21 | 22 | error = assert_raises(ArgumentError) do 23 | Polars.lit(Object.new) 24 | end 25 | assert_match "could not convert value", error.message 26 | end 27 | 28 | def test_min 29 | df = Polars::DataFrame.new({"a" => [1, 5, 3], "b" => [4, 2, 6]}) 30 | assert_frame ({"a" => [1]}), df.select(Polars.min("a")) 31 | assert_frame ({"a" => [1], "b" => [2]}), df.select(Polars.min(["a", "b"])) 32 | end 33 | 34 | def test_get 35 | df = Polars::DataFrame.new({"a" => [1, 2], "b" => [3, 4], "c" => [5, 6]}) 36 | error = assert_raises(TypeError) do 37 | df.select(Polars.nth(1, "a")) 38 | end 39 | assert_equal "no implicit conversion of String into Integer", error.message 40 | assert_frame ({"a" => [2]}), df.select(Polars.col("a").get(1)) 41 | end 42 | 43 | def assert_lit(expected, lit) 44 | assert_equal expected, lit.inspect 45 | end 46 | end 47 | -------------------------------------------------------------------------------- /test/ipc_test.rb: -------------------------------------------------------------------------------- 1 | require_relative "test_helper" 2 | 3 | class IpcTest < Minitest::Test 4 | def test_read_ipc 5 | df = Polars.read_ipc("test/support/data.arrow") 6 | expected = Polars::DataFrame.new({"a" => [1, 2, 3], "b" => ["one", "two", "three"]}) 7 | assert_frame expected, df 8 | end 9 | 10 | def test_read_ipc_file 11 | df = File.open("test/support/data.arrow", "rb") { |f| Polars.read_ipc(f) } 12 | expected = Polars::DataFrame.new({"a" => [1, 2, 3], "b" => ["one", "two", "three"]}) 13 | assert_frame expected, df 14 | end 15 | 16 | def test_scan_ipc 17 | df = Polars.scan_ipc("test/support/data.arrow") 18 | expected = Polars::DataFrame.new({"a" => [1, 2, 3], "b" => ["one", "two", "three"]}) 19 | assert_frame expected, df.collect 20 | end 21 | 22 | def test_scan_ipc_file 23 | df = File.open("test/support/data.arrow", "rb") { |f| Polars.scan_ipc(f) } 24 | expected = Polars::DataFrame.new({"a" => [1, 2, 3], "b" => ["one", "two", "three"]}) 25 | assert_frame expected, df.collect 26 | end 27 | 28 | def test_read_ipc_schema 29 | schema = Polars.read_ipc_schema("test/support/data.arrow") 30 | assert_equal ({"a" => Polars::Int64, "b" => Polars::String}), schema 31 | end 32 | 33 | def test_write_ipc 34 | df = Polars::DataFrame.new({"a" => [1, 2, 3], "b" => ["one", "two", "three"]}) 35 | path = temp_path 36 | df.write_ipc(path) 37 | assert_frame df, Polars.read_ipc(path) 38 | end 39 | 40 | def test_write_ipc_io 41 | df = Polars::DataFrame.new({"a" => [1, 2, 3], "b" => ["one", "two", "three"]}) 42 | io = StringIO.new 43 | df.write_ipc(io) 44 | io.rewind 45 | assert_frame df, Polars.read_ipc(io) 46 | end 47 | 48 | def test_write_ipc_to_string 49 | df = Polars::DataFrame.new({"a" => [1, 2, 3], "b" => ["one", "two", "three"]}) 50 | output = df.write_ipc(nil) 51 | assert output.start_with?("ARROW") 52 | assert_equal Encoding::BINARY, output.encoding 53 | end 54 | 55 | def test_write_ipc_stream 56 | df = Polars::DataFrame.new({"a" => [1, 2, 3], "b" => ["one", "two", "three"]}) 57 | output = df.write_ipc_stream(nil) 58 | assert_equal Encoding::BINARY, output.encoding 59 | assert_equal df, Polars.read_ipc_stream(StringIO.new(output)) 60 | end 61 | 62 | def test_sink_ipc 63 | df = Polars::DataFrame.new({"a" => [1, 2, 3], "b" => ["one", "two", "three"]}) 64 | path = temp_path 65 | assert_nil df.lazy.sink_ipc(path) 66 | assert_frame df, Polars.read_ipc(path, memory_map: false) 67 | end 68 | 69 | def test_sink_ipc_io 70 | df = Polars::DataFrame.new({"a" => [1, 2, 3], "b" => ["one", "two", "three"]}) 71 | io = StringIO.new 72 | assert_nil df.lazy.sink_ipc(io) 73 | io.rewind 74 | assert_frame df, Polars.read_ipc(io) 75 | end 76 | end 77 | -------------------------------------------------------------------------------- /test/json_test.rb: -------------------------------------------------------------------------------- 1 | require_relative "test_helper" 2 | 3 | class JsonTest < Minitest::Test 4 | def test_read_json 5 | df = Polars.read_json("test/support/data.json") 6 | expected = Polars::DataFrame.new({"a" => [1, 2, 3], "b" => ["one", "two", "three"]}) 7 | assert_frame expected, df 8 | end 9 | 10 | def test_read_json_pathname 11 | require "pathname" 12 | 13 | df = Polars.read_json(Pathname.new("test/support/data.json")) 14 | expected = Polars::DataFrame.new({"a" => [1, 2, 3], "b" => ["one", "two", "three"]}) 15 | assert_frame expected, df 16 | end 17 | 18 | def test_read_json_io 19 | io = StringIO.new(File.binread("test/support/data.json")) 20 | df = Polars.read_json(io) 21 | expected = Polars::DataFrame.new({"a" => [1, 2, 3], "b" => ["one", "two", "three"]}) 22 | assert_frame expected, df 23 | end 24 | 25 | def test_write_json 26 | df = Polars::DataFrame.new({"a" => [1, 2, 3], "b" => ["one", "two", "three"]}) 27 | path = temp_path 28 | assert_nil df.write_json(path) 29 | end 30 | 31 | def test_write_json_pathname 32 | require "pathname" 33 | 34 | df = Polars::DataFrame.new({"a" => [1, 2, 3], "b" => ["one", "two", "three"]}) 35 | assert_nil df.write_json(Pathname.new(temp_path)) 36 | end 37 | 38 | def test_write_json_io 39 | df = Polars::DataFrame.new({"a" => [1, 2, 3], "b" => ["one", "two", "three"]}) 40 | io = StringIO.new 41 | df.write_json(io) 42 | io.rewind 43 | assert_frame df, Polars.read_json(io) 44 | end 45 | 46 | def test_write_json_nil 47 | df = Polars::DataFrame.new({"a" => [1, 2, 3], "b" => ["one", "two", "three"]}) 48 | io = StringIO.new(df.write_json) 49 | assert_frame df, Polars.read_json(io) 50 | end 51 | 52 | def test_read_ndjson 53 | df = Polars.read_ndjson("test/support/data.ndjson") 54 | expected = Polars::DataFrame.new({"a" => [1, 2, 3], "b" => ["one", "two", "three"]}) 55 | assert_frame expected, df 56 | end 57 | 58 | def test_read_ndjson_pathname 59 | require "pathname" 60 | 61 | df = Polars.read_ndjson(Pathname.new("test/support/data.ndjson")) 62 | expected = Polars::DataFrame.new({"a" => [1, 2, 3], "b" => ["one", "two", "three"]}) 63 | assert_frame expected, df 64 | end 65 | 66 | def test_read_ndjson_io 67 | io = StringIO.new(File.binread("test/support/data.ndjson")) 68 | df = Polars.read_ndjson(io) 69 | expected = Polars::DataFrame.new({"a" => [1, 2, 3], "b" => ["one", "two", "three"]}) 70 | assert_frame expected, df 71 | end 72 | 73 | def test_scan_ndjson 74 | df = Polars.scan_ndjson("test/support/data.ndjson") 75 | expected = Polars::DataFrame.new({"a" => [1, 2, 3], "b" => ["one", "two", "three"]}) 76 | assert_frame expected, df.collect 77 | end 78 | 79 | def test_scan_ndjson_io 80 | io = StringIO.new(File.binread("test/support/data.ndjson")) 81 | df = Polars.scan_ndjson(io) 82 | expected = Polars::DataFrame.new({"a" => [1, 2, 3], "b" => ["one", "two", "three"]}) 83 | assert_frame expected, df.collect 84 | end 85 | 86 | def test_scan_ndjson_cloud 87 | skip unless cloud? 88 | 89 | df = Polars.scan_ndjson(cloud_file("data.ndjson")) 90 | expected = Polars::DataFrame.new({"a" => [1, 2, 3], "b" => ["one", "two", "three"]}) 91 | assert_frame expected, df.collect 92 | end 93 | 94 | def test_write_ndjson 95 | df = Polars::DataFrame.new({"a" => [1, 2, 3], "b" => ["one", "two", "three"]}) 96 | path = temp_path 97 | assert_nil df.write_ndjson(path) 98 | assert_frame df, Polars.read_ndjson(path) 99 | end 100 | 101 | def test_write_ndjson_pathname 102 | require "pathname" 103 | 104 | df = Polars::DataFrame.new({"a" => [1, 2, 3], "b" => ["one", "two", "three"]}) 105 | assert_nil df.write_ndjson(Pathname.new(temp_path)) 106 | end 107 | 108 | def test_write_ndjson_io 109 | df = Polars::DataFrame.new({"a" => [1, 2, 3], "b" => ["one", "two", "three"]}) 110 | io = StringIO.new 111 | df.write_ndjson(io) 112 | io.rewind 113 | assert_frame df, Polars.read_ndjson(io) 114 | end 115 | 116 | def test_write_ndjson_nil 117 | df = Polars::DataFrame.new({"a" => [1, 2, 3], "b" => ["one", "two", "three"]}) 118 | io = StringIO.new(df.write_ndjson) 119 | assert_frame df, Polars.read_ndjson(io) 120 | end 121 | 122 | def test_sink_ndjson 123 | df = Polars::DataFrame.new({"a" => [1, 2, 3], "b" => ["one", "two", "three"]}) 124 | path = temp_path 125 | assert_nil df.lazy.sink_ndjson(path) 126 | assert_frame df, Polars.read_ndjson(path) 127 | end 128 | 129 | def test_sink_ndjson_io 130 | df = Polars::DataFrame.new({"a" => [1, 2, 3], "b" => ["one", "two", "three"]}) 131 | io = StringIO.new 132 | assert_nil df.lazy.sink_ndjson(io) 133 | io.rewind 134 | assert_frame df, Polars.read_ndjson(io) 135 | end 136 | end 137 | -------------------------------------------------------------------------------- /test/lazy_frame_test.rb: -------------------------------------------------------------------------------- 1 | require_relative "test_helper" 2 | 3 | class LazyFrameTest < Minitest::Test 4 | def test_to_s 5 | df = Polars::DataFrame.new({"a" => [1, 2, 3]}).lazy 6 | assert_match "naive plan:", df.select("a").to_s 7 | end 8 | 9 | def test_select 10 | df = Polars::DataFrame.new( 11 | { 12 | "foo" => [1, 2, 3], 13 | "bar" => [6, 7, 8], 14 | "ham" => ["a", "b", "c"] 15 | } 16 | ).lazy 17 | df.select("foo").collect 18 | df.select(["foo", "bar"]).collect 19 | df.select(Polars.col("foo") + 1).collect 20 | end 21 | 22 | def test_unnest 23 | df = ( 24 | Polars::DataFrame.new( 25 | { 26 | "before" => ["foo", "bar"], 27 | "t_a" => [1, 2], 28 | "t_b" => ["a", "b"], 29 | "t_c" => [true, nil], 30 | "t_d" => [[1, 2], [3]], 31 | "after" => ["baz", "womp"] 32 | } 33 | ) 34 | .lazy 35 | .select( 36 | ["before", Polars.struct(Polars.col("^t_.$")).alias("t_struct"), "after"] 37 | ) 38 | ) 39 | df.fetch 40 | df.unnest("t_struct").fetch 41 | end 42 | 43 | def test_write_json 44 | df = Polars::DataFrame.new( 45 | { 46 | "foo" => [1, 2, 3], 47 | "bar" => [6, 7, 8], 48 | "ham" => ["a", "b", "c"] 49 | } 50 | ).lazy 51 | path = temp_path 52 | df.select("foo").write_json(path) 53 | assert_frame df.select("foo").collect, Polars::LazyFrame.read_json(path).collect 54 | end 55 | 56 | def test_pearson_corr 57 | df = Polars::DataFrame.new({ 58 | a: [1, 2, 3, 4], 59 | b: [2, 4, 6, 7] 60 | }) 61 | .lazy 62 | .select( 63 | Polars.corr("a", "b", method: "pearson") 64 | ) 65 | .collect 66 | assert_in_delta 0.989778, df["a"][0] 67 | end 68 | 69 | def test_describe_optimized_plan 70 | df = Polars::DataFrame.new({"a" => [1, 2, 3]}).lazy 71 | assert_match "PROJECT", df.select("a").describe_optimized_plan 72 | end 73 | 74 | def test_concat 75 | df1 = Polars::LazyFrame.new({"a" => [1], "b" => [3]}) 76 | df2 = Polars::LazyFrame.new({"a" => [2], "b" => [4]}) 77 | Polars.concat([df1, df2]) 78 | Polars.concat([df1, df2], how: "vertical_relaxed") 79 | Polars.concat([df1, df2], how: "diagonal") 80 | end 81 | 82 | def test_concat_horizontal 83 | df1 = Polars::LazyFrame.new({"a" => [1, 2]}) 84 | df2 = Polars::LazyFrame.new({"b" => [3, 4]}) 85 | df = Polars.concat([df1, df2], how: "horizontal").collect 86 | expected = Polars::DataFrame.new({"a" => [1, 2], "b" => [3, 4]}) 87 | assert_frame expected, df 88 | end 89 | end 90 | -------------------------------------------------------------------------------- /test/list_expr_test.rb: -------------------------------------------------------------------------------- 1 | require_relative "test_helper" 2 | 3 | class ListExprTest < Minitest::Test 4 | def test_lengths 5 | assert_expr list_expr.lengths 6 | end 7 | 8 | def test_sum 9 | assert_expr list_expr.sum 10 | end 11 | 12 | def test_max 13 | assert_expr list_expr.max 14 | end 15 | 16 | def test_min 17 | assert_expr list_expr.min 18 | end 19 | 20 | def test_mean 21 | assert_expr list_expr.mean 22 | end 23 | 24 | def test_sort 25 | assert_expr list_expr.sort 26 | end 27 | 28 | def test_reverse 29 | assert_expr list_expr.reverse 30 | end 31 | 32 | def test_unique 33 | assert_expr list_expr.unique 34 | end 35 | 36 | def test_get 37 | assert_expr list_expr.get(0) 38 | end 39 | 40 | def test_getitem 41 | assert_expr list_expr[0] 42 | end 43 | 44 | def test_first 45 | assert_expr list_expr.first 46 | end 47 | 48 | def test_last 49 | assert_expr list_expr.last 50 | end 51 | 52 | def test_contains 53 | assert_expr list_expr.contains(0) 54 | end 55 | 56 | def test_join 57 | assert_expr list_expr.join(",") 58 | end 59 | 60 | def test_arg_min 61 | assert_expr list_expr.arg_min 62 | end 63 | 64 | def test_arg_max 65 | assert_expr list_expr.arg_max 66 | end 67 | 68 | def test_diff 69 | assert_expr list_expr.diff 70 | end 71 | 72 | def test_shift 73 | assert_expr list_expr.shift 74 | end 75 | 76 | def test_slice 77 | assert_expr list_expr.slice(0) 78 | end 79 | 80 | def test_head 81 | assert_expr list_expr.head 82 | end 83 | 84 | def test_tail 85 | assert_expr list_expr.tail 86 | end 87 | 88 | def test_eval 89 | rank_pct = Polars.element.rank(reverse: true) / Polars.col("").count 90 | assert_expr list_expr.eval(rank_pct) 91 | end 92 | 93 | def list_expr 94 | Polars.col("a").list 95 | end 96 | end 97 | -------------------------------------------------------------------------------- /test/list_name_space_test.rb: -------------------------------------------------------------------------------- 1 | require_relative "test_helper" 2 | 3 | class ListNameSpaceTest < Minitest::Test 4 | def test_get 5 | s = Polars::Series.new([[0, 1, 2], [0]]) 6 | 7 | error = assert_raises(Polars::ComputeError) do 8 | s.list.get(1) 9 | end 10 | assert_equal "get index is out of bounds", error.message 11 | 12 | s.list.get(1, null_on_oob: true) 13 | end 14 | end 15 | -------------------------------------------------------------------------------- /test/meta_expr_test.rb: -------------------------------------------------------------------------------- 1 | require_relative "test_helper" 2 | 3 | class MetaExprTest < Minitest::Test 4 | def test_equal 5 | assert(meta_expr == meta_expr) 6 | end 7 | 8 | def test_not_equal 9 | refute(meta_expr != meta_expr) 10 | end 11 | 12 | def test_pop 13 | assert_empty meta_expr.pop 14 | end 15 | 16 | def test_root_names 17 | assert_equal ["a"], meta_expr.root_names 18 | end 19 | 20 | def test_output_name 21 | assert_equal "a", meta_expr.output_name 22 | end 23 | 24 | def test_undo_aliases 25 | assert_equal Polars.col("a"), meta_expr.undo_aliases 26 | end 27 | 28 | def meta_expr 29 | Polars.col("a").meta 30 | end 31 | end 32 | -------------------------------------------------------------------------------- /test/numo_test.rb: -------------------------------------------------------------------------------- 1 | require_relative "test_helper" 2 | 3 | class NumoTest < Minitest::Test 4 | def test_roundtrip 5 | assert_roundtrip Numo::Int8 6 | assert_roundtrip Numo::Int16 7 | assert_roundtrip Numo::Int32 8 | assert_roundtrip Numo::Int64 9 | assert_roundtrip Numo::UInt8 10 | assert_roundtrip Numo::UInt16 11 | assert_roundtrip Numo::UInt32 12 | assert_roundtrip Numo::UInt64 13 | assert_roundtrip Numo::SFloat 14 | assert_roundtrip Numo::DFloat 15 | end 16 | 17 | def test_series_int 18 | s = Polars::Series.new([1, 2, 3]) 19 | assert_kind_of Numo::Int64, s.to_numo 20 | assert_equal s.to_a, s.to_numo.to_a 21 | end 22 | 23 | def test_series_int_nil 24 | s = Polars::Series.new([1, nil, 3]) 25 | assert_kind_of Numo::DFloat, s.to_numo 26 | assert s.to_numo[1].nan? 27 | end 28 | 29 | def test_series_float 30 | s = Polars::Series.new([1.5, 2.5, 3.5]) 31 | assert_kind_of Numo::DFloat, s.to_numo 32 | assert_equal s.to_a, s.to_numo.to_a 33 | end 34 | 35 | def test_series_bool 36 | s = Polars::Series.new([true, false, true]) 37 | assert_kind_of Numo::Bit, s.to_numo 38 | assert_equal [1, 0, 1], s.to_numo.to_a 39 | end 40 | 41 | def test_series_bool_nil 42 | s = Polars::Series.new([true, false, nil]) 43 | assert_kind_of Numo::RObject, s.to_numo 44 | assert_equal [true, false, nil], s.to_numo.to_a 45 | end 46 | 47 | def test_series_str 48 | s = Polars::Series.new(["one", nil, "three"]) 49 | assert_kind_of Numo::RObject, s.to_numo 50 | assert_equal s.to_a, s.to_numo.to_a 51 | end 52 | 53 | def test_series_date 54 | today = Date.today 55 | s = Polars::Series.new([today - 2, nil, today]) 56 | assert_kind_of Numo::RObject, s.to_numo 57 | assert_equal s.to_a, s.to_numo.to_a 58 | end 59 | 60 | def test_series_2d 61 | s = Polars::Series.new(Numo::Int64.cast([[1, 2], [3, 4]])) 62 | assert_series [[1, 2], [3, 4]], s, dtype: Polars::Array.new(Polars::Int64, 2) 63 | end 64 | 65 | def test_data_frame 66 | df = Polars::DataFrame.new({"a" => [1, 2, 3], "b" => ["one", "two", "three"]}) 67 | assert_kind_of Numo::RObject, df.to_numo 68 | assert_equal [[1, "one"], [2, "two"], [3, "three"]], df.to_numo.to_a 69 | end 70 | 71 | def assert_roundtrip(cls) 72 | v = cls.cast([1, 2, 3]) 73 | assert_equal v, Polars::Series.new(v).to_numo 74 | end 75 | end 76 | -------------------------------------------------------------------------------- /test/plot_test.rb: -------------------------------------------------------------------------------- 1 | require_relative "test_helper" 2 | 3 | class PlotTest < Minitest::Test 4 | def test_default_type_column 5 | df = Polars::DataFrame.new({"a" => ["one", "two", "three"], "b" => [1, 2, 3]}) 6 | assert_plot_type "column", df.plot("a", "b") 7 | end 8 | 9 | def test_default_type_scatter 10 | df = Polars::DataFrame.new({"a" => [1, 2, 3], "b" => [4, 5, 6]}) 11 | assert_plot_type "scatter", df.plot("a", "b") 12 | end 13 | 14 | def test_default_type_line 15 | today = Date.today 16 | df = Polars::DataFrame.new({"a" => [today - 2, today - 1, today], "b" => [1, 2, 3]}) 17 | assert_plot_type "line", df.plot("a", "b") 18 | end 19 | 20 | def test_default_columns 21 | df = Polars::DataFrame.new({"a" => ["one", "two", "three"], "b" => [1, 2, 3]}) 22 | assert_plot_type "column", df.plot 23 | end 24 | 25 | def test_default_columns_not_two 26 | df = Polars::DataFrame.new({"a" => ["one", "two", "three"]}) 27 | error = assert_raises(ArgumentError) do 28 | df.plot 29 | end 30 | assert_equal "Must specify columns", error.message 31 | end 32 | 33 | def test_type 34 | df = Polars::DataFrame.new({"a" => ["one", "two", "three"], "b" => [1, 2, 3]}) 35 | assert_plot_type "pie", df.plot("a", "b", type: "pie") 36 | assert_plot_type "line", df.plot("a", "b", type: "line") 37 | assert_plot_type "column", df.plot("a", "b", type: "column") 38 | assert_plot_type "bar", df.plot("a", "b", type: "bar") 39 | assert_plot_type "area", df.plot("a", "b", type: "area") 40 | assert_plot_type "scatter", df.plot("b", "b", type: "scatter") 41 | end 42 | 43 | def test_group_option 44 | df = Polars::DataFrame.new({"a" => ["one", "two", "three"], "b" => [1, 2, 3], "c" => ["group1", "group1", "group2"]}) 45 | assert_group df.plot("a", "b", type: "line", group: "c") 46 | assert_group df.plot("a", "b", type: "column", group: "c") 47 | assert_group df.plot("a", "b", type: "bar", group: "c") 48 | assert_group df.plot("a", "b", type: "area", group: "c") 49 | assert_group df.plot("b", "b", type: "scatter", group: "c") 50 | end 51 | 52 | def test_group_option_pie 53 | df = Polars::DataFrame.new({"a" => ["one", "two", "three"], "b" => [1, 2, 3], "c" => ["group1", "group1", "group2"]}) 54 | error = assert_raises(ArgumentError) do 55 | df.plot("a", "b", type: "pie", group: "c") 56 | end 57 | assert_equal "Cannot use group option with pie chart", error.message 58 | end 59 | 60 | def test_group_method 61 | df = Polars::DataFrame.new({"a" => ["one", "two", "three"], "b" => [1, 2, 3], "c" => ["group1", "group1", "group2"]}) 62 | assert_group df.group_by("c").plot("a", "b", type: "line") 63 | assert_group df.group_by("c").plot("a", "b", type: "column") 64 | assert_group df.group_by("c").plot("a", "b", type: "bar") 65 | assert_group df.group_by("c").plot("a", "b", type: "area") 66 | assert_group df.group_by("c").plot("b", "b", type: "scatter") 67 | end 68 | 69 | def test_group_method_multiple_columns 70 | df = Polars::DataFrame.new({"a" => ["one", "two", "three"], "b" => [1, 2, 3], "c" => ["group1", "group1", "group2"]}) 71 | error = assert_raises(ArgumentError) do 72 | df.group_by(["c", "c"]).plot("a", "b") 73 | end 74 | assert_equal "Multiple groups not supported", error.message 75 | end 76 | 77 | def test_group_method_group_option 78 | df = Polars::DataFrame.new({"a" => ["one", "two", "three"], "b" => [1, 2, 3], "c" => ["group1", "group1", "group2"]}) 79 | error = assert_raises(ArgumentError) do 80 | df.group_by("c").plot("a", "b", group: "c") 81 | end 82 | assert_equal "unknown keyword: :group", error.message 83 | end 84 | 85 | def test_type_unknown 86 | df = Polars::DataFrame.new({"a" => ["one", "two", "three"]}) 87 | error = assert_raises do 88 | df.plot("a", "a") 89 | end 90 | assert_equal "Cannot determine type. Use the type option.", error.message 91 | end 92 | 93 | def assert_plot_type(expected, plot) 94 | assert_kind_of Vega::LiteChart, plot 95 | 96 | case expected 97 | when "column" 98 | assert_equal "bar", plot.spec[:mark][:type] 99 | when "pie" 100 | assert_equal "arc", plot.spec[:mark][:type] 101 | when "scatter" 102 | assert_equal "circle", plot.spec[:mark][:type] 103 | else 104 | assert_equal expected, plot.spec[:mark][:type] 105 | end 106 | end 107 | 108 | def assert_group(plot) 109 | assert_kind_of Vega::LiteChart, plot 110 | assert_equal "c", plot.spec[:encoding][:color][:field] 111 | end 112 | end 113 | -------------------------------------------------------------------------------- /test/selectors_test.rb: -------------------------------------------------------------------------------- 1 | require_relative "test_helper" 2 | 3 | class SelectorsTest < Minitest::Test 4 | def test_inspect 5 | assert_inspect "Polars.cs.numeric()", Polars.cs.numeric 6 | assert_inspect "~Polars.cs.numeric()", ~Polars.cs.numeric 7 | assert_inspect "(Polars.cs.all() - Polars.cs.numeric())", Polars.cs.all - Polars.cs.numeric 8 | assert_inspect "(Polars.cs.float() & Polars.cs.integer())", Polars.cs.float & Polars.cs.integer 9 | assert_inspect "(Polars.cs.float() | Polars.cs.integer())", Polars.cs.float | Polars.cs.integer 10 | assert_inspect "(Polars.cs.float() ^ Polars.cs.integer())", Polars.cs.float ^ Polars.cs.integer 11 | end 12 | 13 | def test_starts_with_escape 14 | df = Polars::DataFrame.new({".+" => [1], "a" => [2]}) 15 | assert_equal [".+"], df.select(Polars.cs.starts_with(".+")).columns 16 | end 17 | 18 | def assert_inspect(expected, obj) 19 | assert_equal expected, obj.inspect 20 | end 21 | end 22 | -------------------------------------------------------------------------------- /test/sql_test.rb: -------------------------------------------------------------------------------- 1 | require_relative "test_helper" 2 | 3 | class SqlTest < Minitest::Test 4 | def test_works 5 | lf = Polars::LazyFrame.new({"a" => [1, 2, 3], "b" => ["x", nil, "z"]}) 6 | res = Polars::SQLContext.new(frame: lf).execute( 7 | "SELECT b, a*2 AS two_a FROM frame WHERE b IS NOT NULL" 8 | ) 9 | expected = Polars::DataFrame.new({"b" => ["x", "z"], "two_a" => [2, 6]}) 10 | assert_frame expected, res.collect 11 | end 12 | end 13 | -------------------------------------------------------------------------------- /test/string_cache_test.rb: -------------------------------------------------------------------------------- 1 | require_relative "test_helper" 2 | 3 | class StringCacheTest < Minitest::Test 4 | def test_works 5 | refute Polars.using_string_cache 6 | Polars::StringCache.new do 7 | assert Polars.using_string_cache 8 | end 9 | refute Polars.using_string_cache 10 | end 11 | 12 | def test_no_block 13 | error = assert_raises(LocalJumpError) do 14 | Polars::StringCache.new 15 | end 16 | assert_equal "no block given", error.message 17 | end 18 | 19 | def test_method 20 | Polars.string_cache do 21 | assert Polars.using_string_cache 22 | end 23 | end 24 | end 25 | -------------------------------------------------------------------------------- /test/string_expr_test.rb: -------------------------------------------------------------------------------- 1 | require_relative "test_helper" 2 | 3 | class StringExprTest < Minitest::Test 4 | def test_lengths 5 | assert_expr str_expr.lengths 6 | end 7 | 8 | def test_n_chars 9 | assert_expr str_expr.n_chars 10 | end 11 | 12 | def test_concat 13 | assert_expr str_expr.concat 14 | end 15 | 16 | def test_to_uppercase 17 | assert_expr str_expr.to_uppercase 18 | end 19 | 20 | def test_to_lowercase 21 | assert_expr str_expr.to_lowercase 22 | end 23 | 24 | def test_strip 25 | assert_expr str_expr.strip 26 | end 27 | 28 | def test_lstrip 29 | assert_expr str_expr.lstrip 30 | end 31 | 32 | def test_rstrip 33 | assert_expr str_expr.rstrip 34 | end 35 | 36 | def test_zfill 37 | assert_expr str_expr.zfill(1) 38 | end 39 | 40 | def test_ljust 41 | assert_expr str_expr.ljust(1) 42 | end 43 | 44 | def test_rjust 45 | assert_expr str_expr.rjust(1) 46 | end 47 | 48 | def test_contains 49 | assert_expr str_expr.contains("pattern") 50 | end 51 | 52 | def test_ends_with 53 | assert_expr str_expr.ends_with("sub") 54 | end 55 | 56 | def test_starts_with 57 | assert_expr str_expr.starts_with("sub") 58 | end 59 | 60 | def test_extract 61 | assert_expr str_expr.extract("pattern") 62 | end 63 | 64 | def test_extract_all 65 | assert_expr str_expr.extract_all("pattern") 66 | end 67 | 68 | def test_count_match 69 | assert_expr str_expr.count_match("pattern") 70 | end 71 | 72 | def test_split 73 | assert_expr str_expr.split("by") 74 | assert_expr str_expr.split("by", inclusive: true) 75 | end 76 | 77 | def test_split_exact 78 | assert_expr str_expr.split_exact("by", 1) 79 | assert_expr str_expr.split_exact("by", 1, inclusive: true) 80 | end 81 | 82 | def test_splitn 83 | assert_expr str_expr.splitn("by", 1) 84 | end 85 | 86 | def test_replace 87 | assert_expr str_expr.replace("pattern", "value") 88 | end 89 | 90 | def test_replace_all 91 | assert_expr str_expr.replace_all("pattern", "value") 92 | end 93 | 94 | def test_slice 95 | assert_expr str_expr.slice(1) 96 | end 97 | 98 | def str_expr 99 | Polars.col("a").str 100 | end 101 | end 102 | -------------------------------------------------------------------------------- /test/string_name_space_test.rb: -------------------------------------------------------------------------------- 1 | require_relative "test_helper" 2 | 3 | class StringNameSpaceTest < Minitest::Test 4 | def test_to_datetime 5 | s = Polars::Series.new(["2022-08-31 00:00:00.123456789"]) 6 | assert_equal "us", s.str.to_datetime("%Y-%m-%d %H:%M:%S%.f").dtype.time_unit 7 | end 8 | end 9 | -------------------------------------------------------------------------------- /test/struct_expr_test.rb: -------------------------------------------------------------------------------- 1 | require_relative "test_helper" 2 | 3 | class StructExprTest < Minitest::Test 4 | def test_get 5 | assert_expr struct_expr[0] 6 | assert_expr struct_expr["a"] 7 | 8 | error = assert_raises(ArgumentError) do 9 | struct_expr[Object.new] 10 | end 11 | assert_equal "expected type Integer or String, got Object", error.message 12 | end 13 | 14 | def test_field 15 | assert_expr struct_expr.field("a") 16 | end 17 | 18 | def test_rename_fields 19 | assert_expr struct_expr.rename_fields(["a"]) 20 | end 21 | 22 | def struct_expr 23 | Polars.col("a").struct 24 | end 25 | end 26 | -------------------------------------------------------------------------------- /test/support/data.arrow: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ankane/ruby-polars/97b2c7128cb3b06b25f5a4daa679446bc550ea41/test/support/data.arrow -------------------------------------------------------------------------------- /test/support/data.avro: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ankane/ruby-polars/97b2c7128cb3b06b25f5a4daa679446bc550ea41/test/support/data.avro -------------------------------------------------------------------------------- /test/support/data.csv: -------------------------------------------------------------------------------- 1 | a,b 2 | 1,one 3 | 2,two 4 | 3,"three" 5 | -------------------------------------------------------------------------------- /test/support/data.json: -------------------------------------------------------------------------------- 1 | [{"a":1,"b":"one"},{"a":2,"b":"two"},{"a":3,"b":"three"}] -------------------------------------------------------------------------------- /test/support/data.ndjson: -------------------------------------------------------------------------------- 1 | {"a":1,"b":"one"} 2 | {"a":2,"b":"two"} 3 | {"a":3,"b":"three"} 4 | -------------------------------------------------------------------------------- /test/support/data.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ankane/ruby-polars/97b2c7128cb3b06b25f5a4daa679446bc550ea41/test/support/data.parquet -------------------------------------------------------------------------------- /test/support/data2.csv: -------------------------------------------------------------------------------- 1 | a,b 2 | 4,four 3 | 5,five 4 | -------------------------------------------------------------------------------- /test/support/delta/_delta_log/00000000000000000000.json: -------------------------------------------------------------------------------- 1 | {"protocol":{"minReaderVersion":1,"minWriterVersion":2}} 2 | {"metaData":{"id":"3ccdc12c-f920-4c15-aa38-b9e723393c01","name":null,"description":null,"format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"a\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"b\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"createdTime":1733266607579,"configuration":{}}} 3 | {"add":{"path":"part-00001-eeffaeba-16eb-4d8b-bb8f-654bfb8e823d-c000.snappy.parquet","partitionValues":{},"size":779,"modificationTime":1733266607580,"dataChange":true,"stats":"{\"numRecords\":3,\"minValues\":{\"b\":\"one\",\"a\":1},\"maxValues\":{\"b\":\"two\",\"a\":3},\"nullCount\":{\"b\":0,\"a\":0}}","tags":null,"deletionVector":null,"baseRowId":null,"defaultRowCommitVersion":null,"clusteringProvider":null}} 4 | {"commitInfo":{"timestamp":1733266607580,"operation":"WRITE","operationParameters":{"mode":"ErrorIfExists"},"clientVersion":"delta-rs.0.22.2","operationMetrics":{"execution_time_ms":0,"num_added_files":1,"num_added_rows":3,"num_partitions":0,"num_removed_files":0}}} -------------------------------------------------------------------------------- /test/support/delta/part-00001-eeffaeba-16eb-4d8b-bb8f-654bfb8e823d-c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ankane/ruby-polars/97b2c7128cb3b06b25f5a4daa679446bc550ea41/test/support/delta/part-00001-eeffaeba-16eb-4d8b-bb8f-654bfb8e823d-c000.snappy.parquet -------------------------------------------------------------------------------- /test/support/iris.csv: -------------------------------------------------------------------------------- 1 | sepal_length,sepal_width,petal_length,petal_width,species 2 | 1.0,2.0,3.0,4.0,setosa 3 | -------------------------------------------------------------------------------- /test/support/types.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ankane/ruby-polars/97b2c7128cb3b06b25f5a4daa679446bc550ea41/test/support/types.parquet -------------------------------------------------------------------------------- /test/test_helper.rb: -------------------------------------------------------------------------------- 1 | require "bundler/setup" 2 | Bundler.require(:default) 3 | require "minitest/autorun" 4 | require "active_record" 5 | 6 | logger = ActiveSupport::Logger.new(ENV["VERBOSE"] ? STDOUT : nil) 7 | 8 | ActiveRecord::Base.logger = logger 9 | ActiveRecord::Migration.verbose = ENV["VERBOSE"] 10 | 11 | case ENV["ADAPTER"] 12 | when "postgresql" 13 | ActiveRecord::Base.establish_connection adapter: "postgresql", database: "polars_ruby_test" 14 | when "mysql" 15 | ActiveRecord::Base.establish_connection adapter: "mysql2", database: "polars_ruby_test" 16 | when "trilogy" 17 | ActiveRecord::Base.establish_connection adapter: "trilogy", database: "polars_ruby_test", host: "127.0.0.1" 18 | else 19 | ActiveRecord::Base.establish_connection adapter: "sqlite3", database: ":memory:" 20 | end 21 | 22 | if ActiveSupport::VERSION::STRING.to_f == 8.0 23 | ActiveSupport.to_time_preserves_timezone = :zone 24 | elsif ActiveSupport::VERSION::STRING.to_f == 7.2 25 | ActiveSupport.to_time_preserves_timezone = true 26 | end 27 | 28 | ActiveRecord::Schema.define do 29 | create_table :users, force: true do |t| 30 | t.string :name 31 | t.integer :number 32 | t.float :inexact 33 | t.boolean :active 34 | t.datetime :joined_at 35 | t.date :joined_on 36 | t.binary :bin 37 | t.decimal :dec, precision: 10, scale: 3 38 | t.text :txt 39 | t.time :joined_time 40 | if ENV["ADAPTER"] == "postgresql" 41 | t.column :settings, :jsonb 42 | else 43 | t.column :settings, :json 44 | end 45 | end 46 | end 47 | 48 | class User < ActiveRecord::Base 49 | end 50 | 51 | class Minitest::Test 52 | include Polars::Testing 53 | 54 | def assert_series(exp, act, dtype: nil, **options) 55 | assert_kind_of Polars::Series, act 56 | if exp.is_a?(Polars::Series) 57 | assert_series_equal(exp, act, **options) 58 | elsif exp.any? { |e| e.is_a?(Float) && e.nan? } 59 | assert exp.zip(act.to_a).all? { |e, a| e.nan? ? a.nan? : e == a } 60 | else 61 | assert_equal exp.to_a, act.to_a 62 | end 63 | assert_equal dtype, act.dtype if dtype 64 | end 65 | 66 | def assert_frame(exp, act, **options) 67 | exp = Polars::DataFrame.new(exp) if exp.is_a?(Hash) 68 | assert_frame_equal(exp, act, **options) 69 | end 70 | 71 | def assert_expr(act) 72 | assert_kind_of Polars::Expr, act 73 | end 74 | 75 | def temp_path 76 | require "securerandom" 77 | 78 | # TODO clean up 79 | File.join(Dir.tmpdir, SecureRandom.alphanumeric(20)) 80 | end 81 | 82 | def in_temp_dir 83 | Dir.mktmpdir do |dir| 84 | Dir.chdir(dir) do 85 | yield 86 | end 87 | end 88 | end 89 | 90 | def cloud? 91 | !cloud_prefix.nil? 92 | end 93 | 94 | def cloud_prefix 95 | ENV["CLOUD_PREFIX"] 96 | end 97 | 98 | def cloud_file(filename) 99 | "#{cloud_prefix}/#{filename}" 100 | end 101 | end 102 | -------------------------------------------------------------------------------- /test/testing_test.rb: -------------------------------------------------------------------------------- 1 | require_relative "test_helper" 2 | 3 | class TestingTest < Minitest::Test 4 | def test_assert_frame_equal 5 | df = Polars::DataFrame.new({"a" => [1, 2, 3]}) 6 | assert_frame_equal df, df 7 | end 8 | 9 | def test_assert_frame_equal_different_dtype 10 | df = Polars::DataFrame.new({"a" => [1, 2, 3]}) 11 | df2 = Polars::DataFrame.new({"a" => [1.0, 2.0, 3.0]}) 12 | error = assert_raises(Polars::AssertionError) do 13 | assert_frame_equal df, df2 14 | end 15 | assert_match "DataFrames are different (dtypes do not match)", error.message 16 | assert_frame_equal df, df2, check_dtype: false 17 | end 18 | 19 | def test_assert_frame_equal_different_columns 20 | df = Polars::DataFrame.new({"a" => [1, 2, 3]}) 21 | df2 = Polars::DataFrame.new({"b" => [1, 2, 3]}) 22 | error = assert_raises(Polars::AssertionError) do 23 | assert_frame_equal df, df2 24 | end 25 | assert_equal "columns [\"b\"] in left DataFrames, but not in right", error.message 26 | end 27 | 28 | def test_assert_frame_not_equal 29 | df = Polars::DataFrame.new({"a" => [1, 2, 3]}) 30 | error = assert_raises(Polars::AssertionError) do 31 | assert_frame_not_equal df, df 32 | end 33 | assert_equal "frames are equal", error.message 34 | end 35 | 36 | def test_assert_series_equal 37 | s = Polars::Series.new([1, 2, 3]) 38 | assert_series_equal s, s 39 | end 40 | 41 | def test_assert_series_not_equal 42 | s = Polars::Series.new([1, 2, 3]) 43 | error = assert_raises(Polars::AssertionError) do 44 | assert_series_not_equal s, s 45 | end 46 | assert_equal "Series are equal", error.message 47 | end 48 | end 49 | --------------------------------------------------------------------------------