├── rust-toolchain ├── tests ├── benchmarks │ ├── __init__.py │ └── test_downsamplers.py ├── requirements-linting.txt ├── requirements.txt ├── test_config.py ├── test_rust_mods.py ├── test_algos_python_compliance.py └── test_tsdownsample.py ├── tsdownsample ├── py.typed ├── _python │ ├── __init__.py │ └── downsamplers.py ├── _rust │ └── __init__.py ├── __init__.py ├── downsamplers.py └── downsampling_interface.py ├── notebooks └── requirements.txt ├── .github ├── FUNDING.yml └── workflows │ ├── codspeed.yml │ ├── codeql.yml │ ├── ci-downsample_rs.yml │ └── ci-tsdownsample.yml ├── downsample_rs ├── README.md ├── dev_utils │ ├── src │ │ ├── lib.rs │ │ ├── config.rs │ │ └── utils.rs │ └── Cargo.toml ├── src │ ├── types.rs │ ├── lib.rs │ ├── helpers.rs │ ├── lttb.rs │ ├── minmaxlttb.rs │ ├── searchsorted.rs │ ├── minmax.rs │ └── m4.rs ├── Cargo.toml ├── LICENSE └── benches │ ├── bench_lttb.rs │ ├── bench_m4.rs │ ├── bench_minmax.rs │ ├── bench_minmaxlttb.rs │ └── results ├── Cargo.toml ├── LICENSE ├── Makefile ├── pyproject.toml ├── .gitignore ├── CONTRIBUTING.md ├── README.md └── src └── lib.rs /rust-toolchain: -------------------------------------------------------------------------------- 1 | nightly 2 | -------------------------------------------------------------------------------- /tests/benchmarks/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tsdownsample/py.typed: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /tsdownsample/_python/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/requirements-linting.txt: -------------------------------------------------------------------------------- 1 | black 2 | ruff 3 | mypy 4 | -------------------------------------------------------------------------------- /notebooks/requirements.txt: -------------------------------------------------------------------------------- 1 | tsdownsample 2 | numpy 3 | pandas -------------------------------------------------------------------------------- /tests/requirements.txt: -------------------------------------------------------------------------------- 1 | pytest 2 | pytest-cov 3 | pytest-benchmark 4 | -------------------------------------------------------------------------------- /tsdownsample/_rust/__init__.py: -------------------------------------------------------------------------------- 1 | # In this folder the compiled rust code should be placed. 2 | -------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | # These are supported funding model platforms 2 | 3 | github: [jvdd, jonasvdd] 4 | -------------------------------------------------------------------------------- /downsample_rs/README.md: -------------------------------------------------------------------------------- 1 | # downsample_rs 2 | 3 | Implementation of (time series) downsampling algorithms in rust. 4 | -------------------------------------------------------------------------------- /downsample_rs/dev_utils/src/lib.rs: -------------------------------------------------------------------------------- 1 | pub mod config; 2 | pub use config::*; 3 | pub mod utils; 4 | pub use utils::*; 5 | -------------------------------------------------------------------------------- /downsample_rs/dev_utils/src/config.rs: -------------------------------------------------------------------------------- 1 | // pub const ARRAY_LENGTH_SHORT: usize = 512; 2 | pub const ARRAY_LENGTH_SHORT: usize = 1_024; 3 | // pub const ARRAY_LENGTH_LONG: usize = 512 * 10; 4 | pub const ARRAY_LENGTH_LONG: usize = 102_400; 5 | -------------------------------------------------------------------------------- /downsample_rs/dev_utils/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "dev_utils" 3 | version = "0.1.1" 4 | authors = ["Jeroen Van Der Donckt"] 5 | edition = "2021" 6 | description = "Shared utilities for development (tests & benchmarks)" 7 | 8 | [dependencies] 9 | rand = { version = "0.7.2", default-features = false } 10 | rand_distr = { version = "0.2.2", default-features = false } 11 | -------------------------------------------------------------------------------- /downsample_rs/src/types.rs: -------------------------------------------------------------------------------- 1 | use std::ops::{Add, Div, Mul, Sub}; 2 | 3 | pub trait Num: 4 | Copy 5 | + PartialOrd 6 | + Add 7 | + Sub 8 | + Mul 9 | + Div 10 | { 11 | } 12 | 13 | // Implement the trait for all types that satisfy the trait bounds 14 | impl Num for T where 15 | T: Copy + PartialOrd + Add + Sub + Mul + Div 16 | { 17 | } 18 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "tsdownsample" # Same name as the Python package 3 | version = "0.1.0" 4 | edition = "2021" 5 | authors = ["Jeroen Van Der Donckt"] 6 | description = "Python bindings for time series downsampling algorithms" 7 | repository = "https://github.com/predict-idlab/tsdownsample" 8 | license = "MIT" 9 | 10 | [dependencies] 11 | downsample_rs = { path = "downsample_rs", features = ["half"]} 12 | pyo3 = { version = "0.26", features = ["extension-module"] } 13 | numpy = { version = "0.26", features = ["half"] } 14 | half = { version = "2.3.1", default-features = false } 15 | paste = { version = "1.0.14", default-features = false } 16 | 17 | [lib] 18 | name = "tsdownsample" 19 | crate-type = ["cdylib"] 20 | -------------------------------------------------------------------------------- /tsdownsample/__init__.py: -------------------------------------------------------------------------------- 1 | """tsdownsample: high performance downsampling of time series data for visualization.""" 2 | 3 | from .downsamplers import ( 4 | EveryNthDownsampler, 5 | LTTBDownsampler, 6 | M4Downsampler, 7 | MinMaxDownsampler, 8 | MinMaxLTTBDownsampler, 9 | NaNM4Downsampler, 10 | NaNMinMaxDownsampler, 11 | NaNMinMaxLTTBDownsampler, 12 | ) 13 | 14 | __version__ = "0.1.4.1" 15 | __author__ = "Jeroen Van Der Donckt" 16 | 17 | __all__ = [ 18 | "EveryNthDownsampler", 19 | "MinMaxDownsampler", 20 | "M4Downsampler", 21 | "LTTBDownsampler", 22 | "MinMaxLTTBDownsampler", 23 | "NaNMinMaxDownsampler", 24 | "NaNM4Downsampler", 25 | "NaNMinMaxLTTBDownsampler", 26 | ] 27 | -------------------------------------------------------------------------------- /tests/test_config.py: -------------------------------------------------------------------------------- 1 | # Store some global configuration for tests 2 | 3 | import numpy as np 4 | 5 | _core_supported_dtypes = [ 6 | np.float32, 7 | np.float64, 8 | np.int16, 9 | np.int32, 10 | np.int64, 11 | np.uint16, 12 | np.uint32, 13 | np.uint64, 14 | np.datetime64, 15 | np.timedelta64, 16 | ] 17 | 18 | supported_dtypes_x = _core_supported_dtypes 19 | supported_dtypes_y = _core_supported_dtypes + [np.float16, np.int8, np.uint8, np.bool_] 20 | 21 | _core_rust_primitive_types = ["f32", "f64", "i16", "i32", "i64", "u16", "u32", "u64"] 22 | 23 | rust_primitive_types_x = _core_rust_primitive_types 24 | rust_primitive_types_y = _core_rust_primitive_types + ["f16", "i8", "u8"] 25 | rust_primitive_types_y_nan = ["f16", "f32", "f64"] 26 | -------------------------------------------------------------------------------- /downsample_rs/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "downsample_rs" 3 | version = "0.1.0" 4 | edition = "2021" 5 | authors = ["Jeroen Van Der Donckt"] 6 | description = "Downsample time series data" 7 | license = "MIT" 8 | 9 | [dependencies] 10 | # TODO: perhaps use polars? 11 | argminmax = { version = "0.6.1", features = ["half"] } 12 | half = { version = "2.3.1", default-features = false , features=["num-traits"], optional = true} 13 | num-traits = { version = "0.2.17", default-features = false } 14 | once_cell = "1" 15 | rayon = { version = "1.8.0", default-features = false } 16 | 17 | [dev-dependencies] 18 | rstest = { version = "0.18.2", default-features = false } 19 | rstest_reuse = { version = "0.6", default-features = false } 20 | criterion = "0.5.1" 21 | dev_utils = { path = "dev_utils" } 22 | 23 | [[bench]] 24 | name = "bench_m4" 25 | harness = false 26 | 27 | [[bench]] 28 | name = "bench_minmax" 29 | harness = false 30 | 31 | [[bench]] 32 | name = "bench_lttb" 33 | harness = false 34 | 35 | [[bench]] 36 | name = "bench_minmaxlttb" 37 | harness = false 38 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Jeroen Van Der Donckt 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /downsample_rs/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Jeroen Van Der Donckt 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /downsample_rs/dev_utils/src/utils.rs: -------------------------------------------------------------------------------- 1 | use std::ops::{Add, Sub}; 2 | 3 | use rand::{thread_rng, Rng}; 4 | use rand_distr::Uniform; 5 | 6 | // random array that samples between min and max of T 7 | pub fn get_random_array(n: usize, min_value: T, max_value: T) -> Vec 8 | where 9 | T: Copy + rand::distributions::uniform::SampleUniform, 10 | { 11 | let rng = thread_rng(); 12 | let uni = Uniform::new_inclusive(min_value, max_value); 13 | let arr: Vec = rng.sample_iter(uni).take(n).collect(); 14 | arr 15 | } 16 | 17 | // worst case array that alternates between increasing max and decreasing min values 18 | pub fn get_worst_case_array(n: usize, step: T) -> Vec 19 | where 20 | T: Copy + Default + Sub + Add, 21 | { 22 | let mut arr: Vec = Vec::with_capacity(n); 23 | let mut min_value: T = Default::default(); 24 | let mut max_value: T = Default::default(); 25 | for i in 0..n { 26 | if i % 2 == 0 { 27 | arr.push(min_value); 28 | min_value = min_value - step; 29 | } else { 30 | arr.push(max_value); 31 | max_value = max_value + step; 32 | } 33 | } 34 | arr 35 | } 36 | -------------------------------------------------------------------------------- /downsample_rs/src/lib.rs: -------------------------------------------------------------------------------- 1 | // It is necessary to import this at the root of the crate 2 | // See: https://github.com/la10736/rstest/tree/master/rstest_reuse#use-rstest_resuse-at-the-top-of-your-crate 3 | #[cfg(test)] 4 | use rstest_reuse; 5 | 6 | pub mod minmax; 7 | pub use minmax::*; 8 | pub mod lttb; 9 | pub use lttb::*; 10 | pub mod minmaxlttb; 11 | pub use minmaxlttb::*; 12 | pub mod m4; 13 | pub use m4::*; 14 | pub(crate) mod helpers; 15 | pub(crate) mod searchsorted; 16 | pub(crate) mod types; 17 | 18 | use once_cell::sync::Lazy; 19 | use rayon::{ThreadPool, ThreadPoolBuilder}; 20 | 21 | // Inspired by: https://github.com/pola-rs/polars/blob/9a69062aa0beb2a1bc5d57294cac49961fc91058/crates/polars-core/src/lib.rs#L49 22 | pub static POOL: Lazy = Lazy::new(|| { 23 | ThreadPoolBuilder::new() 24 | .num_threads( 25 | std::env::var("TSDOWNSAMPLE_MAX_THREADS") 26 | .map(|s| s.parse::().expect("integer")) 27 | .unwrap_or_else(|_| { 28 | std::thread::available_parallelism() 29 | .unwrap_or(std::num::NonZeroUsize::new(1).unwrap()) 30 | .get() 31 | }), 32 | ) 33 | .build() 34 | .expect("could not spawn threads") 35 | }); 36 | -------------------------------------------------------------------------------- /.github/workflows/codspeed.yml: -------------------------------------------------------------------------------- 1 | name: CodSpeed Benchmarks 2 | 3 | on: 4 | pull_request: 5 | push: 6 | branches: 7 | - main 8 | # `workflow_dispatch` allows CodSpeed to trigger backtest 9 | # performance analysis in order to generate initial data. 10 | workflow_dispatch: 11 | 12 | jobs: 13 | Benchmarks: 14 | runs-on: ubuntu-latest 15 | steps: 16 | - uses: actions/checkout@v4 17 | - uses: actions/setup-python@v4 18 | with: 19 | python-version: '3.10' 20 | 21 | - name: Install Rust toolchain 22 | uses: actions-rs/toolchain@v1 23 | with: 24 | profile: minimal 25 | toolchain: nightly 26 | components: clippy, rustfmt 27 | - name: Setup Rust 28 | run: | 29 | rustup update nightly --no-self-update 30 | rustup default nightly 31 | - name: Cache rust 32 | uses: Swatinem/rust-cache@v2 33 | 34 | - name: install develop version 35 | run: make install 36 | 37 | - run: pip install -r tests/requirements.txt 38 | - run: pip install pytest-codspeed 39 | 40 | - run: pip freeze 41 | 42 | # this is required so that pytest uses the installed package 43 | # - run: rm tests/__init__.py 44 | 45 | - name: Run CodSpeed benchmarks 46 | uses: CodSpeedHQ/action@v3 47 | with: 48 | run: pytest tests/benchmarks/ --codspeed 49 | -------------------------------------------------------------------------------- /downsample_rs/src/helpers.rs: -------------------------------------------------------------------------------- 1 | use num_traits::AsPrimitive; 2 | 3 | use crate::types::Num; 4 | 5 | // ------------ AVERAGE 6 | 7 | // TODO: future work -> this can be optimized by using SIMD instructions (similar to the argminmax crate) 8 | // TODO: this implementation can overfow (but numpy does the same) 9 | 10 | // This trait implements the average function for all types that this crate 11 | // supports. It is used in the lttb algorithm. 12 | // We intend to use the same implementation for all types as is used in the 13 | // numpy (Python) library (- which uses add reduce): 14 | // - f64 & f32: use the data type to calculate the average 15 | // - f16: cast to f32 and calculate the average 16 | // - signed & unsigned integers: cast to f64 and calculate the average 17 | // Note: the only difference with the numpy implementation is that this 18 | // implementation always returns a f64, while numpy returns f32 for f32 and f16 19 | // (however the calculation is done in f32 - only the result is casted to f64). 20 | // See more details: https://github.com/numpy/numpy/blob/8cec82012694571156e8d7696307c848a7603b4e/numpy/core/_methods.py#L164 21 | 22 | pub trait Average { 23 | fn average(&self) -> f64; 24 | } 25 | 26 | impl Average for [T] 27 | where 28 | T: Num + AsPrimitive, 29 | { 30 | fn average(&self) -> f64 { 31 | self.iter().fold(0f64, |acc, &x| acc + x.as_()) as f64 / self.len() as f64 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .DEFAULT_GOAL := all 2 | black = black tsdownsample tests 3 | 4 | install: 5 | pip install . 6 | 7 | .PHONY: install-dev-requirements 8 | install-dev-requirements: 9 | pip install -r tests/requirements.txt 10 | pip install -r tests/requirements-linting.txt 11 | 12 | .PHONY: format 13 | format: 14 | ruff format tsdownsample tests 15 | $(black) 16 | cargo fmt 17 | 18 | .PHONY: lint-python 19 | lint-python: 20 | ruff check tsdownsample tests 21 | $(black) --check --diff 22 | 23 | .PHONY: lint-rust 24 | lint-rust: 25 | cargo fmt --version 26 | cargo fmt --all -- --check 27 | cargo clippy --version 28 | cargo clippy -- -D warnings -A incomplete_features -W clippy::dbg_macro -W clippy::print_stdout -A clippy::empty_line_after_doc_comments 29 | 30 | .PHONY: lint 31 | lint: lint-python lint-rust 32 | 33 | .PHONY: mypy 34 | mypy: 35 | mypy tsdownsample 36 | 37 | 38 | .PHONY: test 39 | test: 40 | pytest --benchmark-skip --cov=tsdownsample --cov-report=term-missing --cov-report=html --cov-report=xml 41 | 42 | .PHONY: bench 43 | bench: 44 | pytest --benchmark-only --benchmark-max-time=5 45 | 46 | 47 | .PHONY: all 48 | all: lint mypy test 49 | 50 | .PHONY: clean 51 | clean: 52 | rm -rf `find . -name __pycache__` 53 | rm -f `find . -type f -name '*.py[co]' ` 54 | rm -f `find . -type f -name '*~' ` 55 | rm -f `find . -type f -name '.*~' ` 56 | rm -f `find . -type f -name '*.cpython-*' ` 57 | rm -rf dist 58 | rm -rf build 59 | rm -rf target 60 | rm -rf .cache 61 | rm -rf .pytest_cache 62 | rm -rf .mypy_cache 63 | rm -rf htmlcov 64 | rm -rf *.egg-info 65 | rm -rf .ruff* 66 | rm -f .coverage 67 | rm -f .coverage.* 68 | rm -rf build 69 | rm -f tsdownsample/*.so -------------------------------------------------------------------------------- /.github/workflows/codeql.yml: -------------------------------------------------------------------------------- 1 | name: "CodeQL" 2 | 3 | on: 4 | push: 5 | branches: [ "main" ] 6 | pull_request: 7 | # The branches below must be a subset of the branches above 8 | branches: [ "main" ] 9 | schedule: 10 | - cron: '00 00 * * 1' 11 | 12 | jobs: 13 | analyze: 14 | name: Analyze 15 | # Runner size impacts CodeQL analysis time. To learn more, please see: 16 | # - https://gh.io/recommended-hardware-resources-for-running-codeql 17 | # - https://gh.io/supported-runners-and-hardware-resources 18 | # - https://gh.io/using-larger-runners 19 | # Consider using larger runners for possible analysis time improvements. 20 | runs-on: 'ubuntu-latest' 21 | timeout-minutes: 360 22 | permissions: 23 | actions: read 24 | contents: read 25 | security-events: write 26 | 27 | strategy: 28 | fail-fast: false 29 | matrix: 30 | language: [ 'python' ] 31 | # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby', 'swift' ] 32 | # Learn more about CodeQL language support at https://aka.ms/codeql-docs/language-support 33 | 34 | steps: 35 | - name: Checkout repository 36 | uses: actions/checkout@v3 37 | 38 | - name: Initialize CodeQL 39 | uses: github/codeql-action/init@v2 40 | with: 41 | languages: ${{ matrix.language }} 42 | # If you wish to specify custom queries, you can do so here or in a config file. 43 | # By default, queries listed here will override any specified in a config file. 44 | # Prefix the list here with "+" to use these queries and those in the config file. 45 | 46 | # For more details on CodeQL's query packs, refer to: https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs 47 | # queries: security-extended,security-and-quality 48 | 49 | - name: Perform CodeQL Analysis 50 | uses: github/codeql-action/analyze@v2 51 | with: 52 | category: "/language:${{matrix.language}}" 53 | -------------------------------------------------------------------------------- /downsample_rs/benches/bench_lttb.rs: -------------------------------------------------------------------------------- 1 | use downsample_rs::lttb as lttb_mod; 2 | 3 | use criterion::{black_box, criterion_group, criterion_main, Criterion}; 4 | use dev_utils::{config, utils}; 5 | 6 | fn lttb_f32_random_array_long(c: &mut Criterion) { 7 | let n = config::ARRAY_LENGTH_LONG; 8 | let x = (0..n).map(|i| i as i32).collect::>(); 9 | let y = utils::get_random_array::(n, f32::MIN, f32::MAX); 10 | c.bench_function("lttb_scalx_f32", |b| { 11 | b.iter(|| { 12 | lttb_mod::lttb_with_x( 13 | black_box(x.as_slice()), 14 | black_box(y.as_slice()), 15 | black_box(2_000), 16 | ) 17 | }) 18 | }); 19 | } 20 | fn lttb_f32_random_array_50m(c: &mut Criterion) { 21 | let n = 50_000_000; 22 | let x = (0..n).map(|i| i as i32).collect::>(); 23 | let y = utils::get_random_array::(n, f32::MIN, f32::MAX); 24 | c.bench_function("lttb_scalx_50M_f32", |b| { 25 | b.iter(|| { 26 | lttb_mod::lttb_with_x( 27 | black_box(x.as_slice()), 28 | black_box(y.as_slice()), 29 | black_box(2_000), 30 | ) 31 | }) 32 | }); 33 | } 34 | 35 | fn lttb_without_x_f32_random_array_long(c: &mut Criterion) { 36 | let n = config::ARRAY_LENGTH_LONG; 37 | let y = utils::get_random_array::(n, f32::MIN, f32::MAX); 38 | c.bench_function("lttb_scal_f32", |b| { 39 | b.iter(|| lttb_mod::lttb_without_x(black_box(y.as_slice()), black_box(2_000))) 40 | }); 41 | } 42 | fn lttb_without_x_f32_random_array_50m(c: &mut Criterion) { 43 | let n = 50_000_000; 44 | let y = utils::get_random_array::(n, f32::MIN, f32::MAX); 45 | c.bench_function("lttb_scal_50M_f32", |b| { 46 | b.iter(|| lttb_mod::lttb_without_x(black_box(y.as_slice()), black_box(2_000))) 47 | }); 48 | } 49 | 50 | criterion_group!( 51 | benches, 52 | // lttb_f32_random_array_long, 53 | lttb_f32_random_array_50m, 54 | // lttb_without_x_f32_random_array_long, 55 | lttb_without_x_f32_random_array_50m, 56 | ); 57 | criterion_main!(benches); 58 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["maturin>=1.1,<2.0"] 3 | build-backend = "maturin" 4 | 5 | [project] 6 | name = "tsdownsample" 7 | description = "Time series downsampling in rust" 8 | version = "0.1.4.1" 9 | requires-python = ">=3.8" 10 | dependencies = ["numpy"] 11 | authors = [{name = "Jeroen Van Der Donckt"}] 12 | readme = "README.md" 13 | license = {text = "MIT"} 14 | keywords = ["time series", "downsampling", "rust", "data science", "visualization"] 15 | classifiers = [ 16 | 'Intended Audience :: Developers', 17 | 'License :: OSI Approved :: MIT License', 18 | 'Programming Language :: Python :: 3', 19 | 'Programming Language :: Python :: 3.8', 20 | 'Programming Language :: Python :: 3.9', 21 | 'Programming Language :: Python :: 3.10', 22 | 'Programming Language :: Python :: 3.11', 23 | 'Programming Language :: Python :: 3.12', 24 | 'Programming Language :: Python :: 3.13', 25 | 'Programming Language :: Python :: 3.14', 26 | 'Operating System :: POSIX', 27 | 'Operating System :: MacOS :: MacOS X', 28 | 'Operating System :: Microsoft :: Windows' 29 | ] 30 | 31 | [project.urls] 32 | Homepage = "https://github.com/predict-idlab/tsdownsample" 33 | Repository = "https://github.com/predict-idlab/tsdownsample" 34 | 35 | # Build Python bindings for rust 36 | [tool.maturin] 37 | bindings = "pyo3" 38 | module-name = "tsdownsample._rust._tsdownsample_rs" # The path to place the compiled Rust module 39 | # See: https://www.maturin.rs/project_layout.html#import-rust-as-a-submodule-of-your-project 40 | 41 | # Linting 42 | [tool.ruff] 43 | line-length = 88 44 | 45 | [tool.ruff.lint] 46 | select = ["E", "F", "I"] 47 | extend-select = ["Q"] 48 | ignore = ["E402", "F403"] 49 | 50 | # Formatting 51 | [tool.black] 52 | color = true 53 | line-length = 88 54 | skip-string-normalization = true 55 | skip-magic-trailing-comma = true 56 | 57 | # Static typing 58 | [tool.mypy] 59 | follow_imports = "normal" 60 | strict_optional = true 61 | warn_redundant_casts = true 62 | warn_unused_ignores = true 63 | check_untyped_defs = true 64 | no_implicit_reexport = true 65 | disallow_untyped_defs = false 66 | disallow_any_generics = false 67 | ignore_missing_imports = true 68 | -------------------------------------------------------------------------------- /tests/test_rust_mods.py: -------------------------------------------------------------------------------- 1 | import tsdownsample._rust._tsdownsample_rs as tsds_rs 2 | from test_config import ( 3 | rust_primitive_types_x, 4 | rust_primitive_types_y, 5 | rust_primitive_types_y_nan, 6 | ) 7 | 8 | 9 | def _test_rust_mod_correctly_build(mod, sub_mods, has_x_impl: bool): 10 | # Without x 11 | for sub_mod in sub_mods: 12 | assert hasattr(mod, sub_mod) 13 | m = getattr(mod, sub_mod) 14 | for ty in rust_primitive_types_y: 15 | assert hasattr(m, f"downsample_{ty}") 16 | # With x 17 | if not has_x_impl: 18 | return 19 | for sub_mod in sub_mods: 20 | assert hasattr(mod, sub_mod) 21 | m = getattr(mod, sub_mod) 22 | for tx in rust_primitive_types_x: 23 | for ty in rust_primitive_types_y: 24 | assert hasattr(m, f"downsample_{tx}_{ty}") 25 | 26 | 27 | def _test_rust_nan_mod_correctly_build(mod, sub_mods, has_x_impl: bool): 28 | # without x 29 | for sub_mod in sub_mods: 30 | assert hasattr(mod, sub_mod) 31 | m = getattr(mod, sub_mod) 32 | for ty in rust_primitive_types_y_nan: 33 | assert hasattr(m, f"downsample_nan_{ty}") 34 | 35 | # with x 36 | if not has_x_impl: 37 | return 38 | for sub_mod in sub_mods: 39 | assert hasattr(mod, sub_mod) 40 | m = getattr(mod, sub_mod) 41 | for tx in rust_primitive_types_x: 42 | for ty in rust_primitive_types_y_nan: 43 | assert hasattr(m, f"downsample_{tx}_{ty}") 44 | 45 | 46 | def test_minmax_rust_mod_correctly_build(): 47 | mod = tsds_rs.minmax 48 | sub_mods = ["sequential", "parallel"] 49 | _test_rust_mod_correctly_build(mod, sub_mods, has_x_impl=True) 50 | _test_rust_nan_mod_correctly_build(mod, sub_mods, has_x_impl=True) 51 | 52 | 53 | def test_m4_rust_mod_correctly_build(): 54 | mod = tsds_rs.m4 55 | sub_mods = ["sequential", "parallel"] 56 | _test_rust_mod_correctly_build(mod, sub_mods, has_x_impl=True) 57 | _test_rust_nan_mod_correctly_build(mod, sub_mods, has_x_impl=True) 58 | 59 | 60 | def test_lttb_rust_mod_correctly_build(): 61 | mod = tsds_rs.lttb 62 | sub_mods = ["sequential"] 63 | _test_rust_mod_correctly_build(mod, sub_mods, has_x_impl=True) 64 | 65 | 66 | def test_minmaxlttb_rust_mod_correctly_build(): 67 | mod = tsds_rs.minmaxlttb 68 | sub_mods = ["sequential", "parallel"] 69 | _test_rust_mod_correctly_build(mod, sub_mods, has_x_impl=True) 70 | _test_rust_nan_mod_correctly_build(mod, sub_mods, has_x_impl=True) 71 | -------------------------------------------------------------------------------- /tests/test_algos_python_compliance.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pytest 3 | 4 | from tsdownsample import ( 5 | LTTBDownsampler, 6 | M4Downsampler, 7 | MinMaxDownsampler, 8 | NaNM4Downsampler, 9 | NaNMinMaxDownsampler, 10 | ) 11 | from tsdownsample._python.downsamplers import ( 12 | LTTB_py, 13 | M4_py, 14 | MinMax_py, 15 | NaNM4_py, 16 | NaNMinMax_py, 17 | ) 18 | 19 | 20 | @pytest.mark.parametrize( 21 | "rust_python_pair", 22 | [ 23 | (MinMaxDownsampler(), MinMax_py()), 24 | (M4Downsampler(), M4_py()), 25 | (LTTBDownsampler(), LTTB_py()), 26 | # Include NaN downsamplers 27 | (NaNMinMaxDownsampler(), NaNMinMax_py()), 28 | (NaNM4Downsampler(), NaNM4_py()), 29 | ], 30 | ) 31 | @pytest.mark.parametrize("n", [10_000, 10_032, 20_321, 23_489]) 32 | @pytest.mark.parametrize("n_out", [100, 200, 252]) 33 | def test_resampler_accordance(rust_python_pair, n, n_out): 34 | rust_downsampler, python_downsampler = rust_python_pair 35 | x = np.arange(n) 36 | y = np.random.randn(n) 37 | # Without x passed to the rust downsampler 38 | assert np.allclose( 39 | rust_downsampler.downsample(y, n_out=n_out), 40 | python_downsampler.downsample(x, y, n_out=n_out), 41 | ) 42 | # With x passed to the rust downsampler 43 | assert np.allclose( 44 | rust_downsampler.downsample(x, y, n_out=n_out), 45 | python_downsampler.downsample(x, y, n_out=n_out), 46 | ) 47 | 48 | 49 | @pytest.mark.parametrize( 50 | "rust_python_pair", 51 | [(NaNMinMaxDownsampler(), NaNMinMax_py()), (NaNM4Downsampler(), NaNM4_py())], 52 | ) 53 | @pytest.mark.parametrize("n", [10_000, 10_032, 20_321, 23_489]) 54 | @pytest.mark.parametrize("n_random_nans", [100, 200, 500, 2000, 5000]) 55 | @pytest.mark.parametrize("n_out", [100, 200, 252]) 56 | def test_nan_resampler_accordance(rust_python_pair, n, n_random_nans, n_out): 57 | rust_downsampler, python_downsampler = rust_python_pair 58 | x = np.arange(n) 59 | y = np.random.randn(n) 60 | y[np.random.choice(y.size, n_random_nans, replace=False)] = np.nan 61 | # Without x passed to the rust downsampler 62 | rust_result = rust_downsampler.downsample(y, n_out=n_out) 63 | python_result = python_downsampler.downsample(x, y, n_out=n_out) 64 | assert np.allclose(rust_result, python_result) 65 | # With x passed to the rust downsampler 66 | assert np.allclose( 67 | rust_downsampler.downsample(x, y, n_out=n_out), 68 | python_downsampler.downsample(x, y, n_out=n_out), 69 | ) 70 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .ruff* 2 | .vscode/* 3 | venv/ 4 | TODO.md 5 | main.rs 6 | 7 | ### ----- rust gitignore 8 | 9 | # Generated by Cargo 10 | # will have compiled files and executables 11 | debug/ 12 | target/ 13 | 14 | # Remove Cargo.lock from gitignore if creating an executable, leave it for libraries 15 | # More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html 16 | Cargo.lock 17 | 18 | # These are backup files generated by rustfmt 19 | **/*.rs.bk 20 | 21 | # MSVC Windows builds of rustc generate these, which store debugging information 22 | *.pdb 23 | 24 | 25 | ### ----- Python gitignore 26 | 27 | # Byte-compiled / optimized / DLL files 28 | __pycache__/ 29 | *.py[cod] 30 | *$py.class 31 | 32 | # C extensions 33 | *.so 34 | 35 | # Distribution / packaging 36 | .Python 37 | build/ 38 | develop-eggs/ 39 | dist/ 40 | downloads/ 41 | eggs/ 42 | .eggs/ 43 | lib/ 44 | lib64/ 45 | parts/ 46 | sdist/ 47 | var/ 48 | wheels/ 49 | pip-wheel-metadata/ 50 | share/python-wheels/ 51 | *.egg-info/ 52 | .installed.cfg 53 | *.egg 54 | MANIFEST 55 | 56 | # PyInstaller 57 | # Usually these files are written by a python script from a template 58 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 59 | *.manifest 60 | *.spec 61 | 62 | # Installer logs 63 | pip-log.txt 64 | pip-delete-this-directory.txt 65 | 66 | # Unit test / coverage reports 67 | htmlcov/ 68 | .tox/ 69 | .nox/ 70 | .coverage 71 | .coverage.* 72 | .cache 73 | nosetests.xml 74 | coverage.xml 75 | *.cover 76 | *.py,cover 77 | .hypothesis/ 78 | .pytest_cache/ 79 | 80 | # Translations 81 | *.mo 82 | *.pot 83 | 84 | # Django stuff: 85 | *.log 86 | local_settings.py 87 | db.sqlite3 88 | db.sqlite3-journal 89 | 90 | # Flask stuff: 91 | instance/ 92 | .webassets-cache 93 | 94 | # Scrapy stuff: 95 | .scrapy 96 | 97 | # Sphinx documentation 98 | docs/_build/ 99 | 100 | # PyBuilder 101 | target/ 102 | 103 | # Jupyter Notebook 104 | .ipynb_checkpoints 105 | 106 | # IPython 107 | profile_default/ 108 | ipython_config.py 109 | 110 | # pyenv 111 | .python-version 112 | 113 | # pipenv 114 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 115 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 116 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 117 | # install all needed dependencies. 118 | #Pipfile.lock 119 | 120 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 121 | __pypackages__/ 122 | 123 | # Celery stuff 124 | celerybeat-schedule 125 | celerybeat.pid 126 | 127 | # SageMath parsed files 128 | *.sage.py 129 | 130 | # Environments 131 | .env 132 | .venv 133 | env/ 134 | venv/ 135 | ENV/ 136 | env.bak/ 137 | venv.bak/ 138 | 139 | # Spyder project settings 140 | .spyderproject 141 | .spyproject 142 | 143 | # Rope project settings 144 | .ropeproject 145 | 146 | # mkdocs documentation 147 | /site 148 | 149 | # mypy 150 | .mypy_cache/ 151 | .dmypy.json 152 | dmypy.json 153 | 154 | # Pyre type checker 155 | .pyre/ 156 | -------------------------------------------------------------------------------- /downsample_rs/benches/bench_m4.rs: -------------------------------------------------------------------------------- 1 | use downsample_rs::m4 as m4_mod; 2 | 3 | use criterion::{black_box, criterion_group, criterion_main, Criterion}; 4 | use dev_utils::{config, utils}; 5 | 6 | fn m4_f32_random_array_long_single_core(c: &mut Criterion) { 7 | let n = config::ARRAY_LENGTH_LONG; 8 | let data = utils::get_random_array::(n, f32::MIN, f32::MAX); 9 | c.bench_function("m4_f32", |b| { 10 | b.iter(|| m4_mod::m4_without_x(black_box(data.as_slice()), black_box(2_000))) 11 | }); 12 | } 13 | 14 | fn m4_f32_random_array_long_multi_core(c: &mut Criterion) { 15 | let n = config::ARRAY_LENGTH_LONG; 16 | let data = utils::get_random_array::(n, f32::MIN, f32::MAX); 17 | c.bench_function("m4_p_f32", |b| { 18 | b.iter(|| m4_mod::m4_without_x_parallel(black_box(data.as_slice()), black_box(2_000))) 19 | }); 20 | } 21 | 22 | fn m4_f32_random_array_50M_single_core(c: &mut Criterion) { 23 | let n = 50_000_000; 24 | let data = utils::get_random_array::(n, f32::MIN, f32::MAX); 25 | let x = (0..n).map(|i| i as i32).collect::>(); 26 | c.bench_function("m4_50M_f32", |b| { 27 | b.iter(|| m4_mod::m4_without_x(black_box(data.as_slice()), black_box(2_000))) 28 | }); 29 | c.bench_function("m4_x_50M_f32", |b| { 30 | b.iter(|| { 31 | m4_mod::m4_with_x( 32 | black_box(x.as_slice()), 33 | black_box(data.as_slice()), 34 | black_box(2_000), 35 | ) 36 | }) 37 | }); 38 | } 39 | 40 | fn m4_f32_random_array_50M_multi_core(c: &mut Criterion) { 41 | let n = 50_000_000; 42 | let data = utils::get_random_array::(n, f32::MIN, f32::MAX); 43 | let x = (0..n).map(|i| i as i32).collect::>(); 44 | c.bench_function("m4_p_50M_f32", |b| { 45 | b.iter(|| m4_mod::m4_without_x_parallel(black_box(data.as_slice()), black_box(2_000))) 46 | }); 47 | c.bench_function("m4_x_p_50M_f32", |b| { 48 | b.iter(|| { 49 | m4_mod::m4_with_x_parallel( 50 | black_box(x.as_slice()), 51 | black_box(data.as_slice()), 52 | black_box(2_000), 53 | ) 54 | }) 55 | }); 56 | } 57 | 58 | // fn m4_f32_worst_case_array_long(c: &mut Criterion) { 59 | // let n = config::ARRAY_LENGTH_LONG; 60 | // let data = utils::get_worst_case_array::(n, 1.0); 61 | // c.bench_function("overlap_worst_long_f32", |b| { 62 | // b.iter(|| minmax_mod::min_max_overlap(black_box(data.as_slice()), black_box(2_000))) 63 | // }); 64 | // c.bench_function("simple_worst_long_f32", |b| { 65 | // b.iter(|| minmax_mod::min_max(black_box(data.as_slice()), black_box(2_000))) 66 | // }); 67 | // c.bench_function("simd_worst_long_f32", |b| { 68 | // b.iter(|| minmax_mod::min_max_simd_f32(black_box(data.as_slice()), black_box(2_000))) 69 | // }); 70 | // } 71 | 72 | criterion_group!( 73 | benches, 74 | // m4_f32_random_array_long_single_core, 75 | // m4_f32_random_array_long_multi_core, 76 | m4_f32_random_array_50M_single_core, 77 | m4_f32_random_array_50M_multi_core, 78 | // m4_f32_worst_case_array_long, 79 | ); 80 | criterion_main!(benches); 81 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to tsdownsample 2 | 3 | Welcome! We're happy to have you here. Thank you in advance for your contribution to tsdownsample. 4 | 5 | ## The basics 6 | 7 | tsdownsample welcomes contributions in the form of Pull Requests. For small changes (e.g., bug fixes), feel free to submit a PR. For larger changes (e.g., new functionality, major refactoring), consider submitting an [Issue](https://github.com/predict-idlab/tsdownsample/issues) outlining your proposed change. 8 | 9 | ### Prerequisites 10 | 11 | tsdownsample is written in Rust. You'll need to install the [Rust toolchain](https://www.rust-lang.org/tools/install) for development. 12 | 13 | This project uses the nightly version of Rust. You can install it with: 14 | 15 | ```bash 16 | rustup install nightly 17 | ``` 18 | 19 | and then set it as the default toolchain with: 20 | 21 | ```bash 22 | rustup default nightly 23 | ``` 24 | 25 | ### Installing (locally) 26 | 27 | To install the package locally, run the following command in the root directory of the project: 28 | 29 | ```bash 30 | make install 31 | ``` 32 | 33 | ### tsdownsample 34 | 35 | The structure of the tsdownsample project is as follows: 36 | 37 | ```bash 38 | tsdownsample 39 | ├── Cargo.toml 40 | ├── README.md 41 | ├── src 42 | │ ├── lib.rs # Python bindings for Rust library 43 | ├── tsdownsample # The Python package 44 | ├── downsample_rs # Rust library containing the actual implementation 45 | ├── tests # Tests for the Python package 46 | ``` 47 | 48 | The Rust library is located in the `downsample_rs` directory. The Python package is located in the `tsdownsample` directory. The `src/lib.rs` file contains the Python bindings for the Rust library. 49 | 50 | Under the hood most downsampling algorithms heavily rely on the [argminmax](https://github.com/jvdd/argminmax) - a SIMD accelerated library for finding the index of the minimum and maximum values in an array. If you want to improve the performance of the library, you could also take a look at the `argminmax` library. 51 | 52 | ### Testing 53 | 54 | Changes to the downsample_rs library can be tested with: 55 | 56 | ```bash 57 | cd downsample_rs 58 | cargo test 59 | ``` 60 | 61 | Changes to the Python package can be tested using the [`Makefile`](Makefile) in the root directory of the project: 62 | 63 | *Make sure you have the test dependencies installed:* 64 | 65 | ```bash 66 | pip install -r test/requirements.txt # Install test dependencies 67 | pip install -r test/requirements-linting.txt # Install linting dependencies 68 | ``` 69 | 70 | To run the tests: 71 | ```bash 72 | make test 73 | ``` 74 | 75 | To run the tests and linting: 76 | ```bash 77 | make lint 78 | ``` 79 | 80 | ### Formatting 81 | 82 | We use [black](https://github.com/psf/black) and [isort](https://github.com/PyCQA/isort) to format the Python code. 83 | 84 | To format the code, run the following command (more details in the [Makefile](Makefile)): 85 | ```sh 86 | make format 87 | ``` 88 | 89 | *(make sure you have the test linting dependencies installed)* 90 | 91 | To format the Rust code, run the following command: 92 | ```sh 93 | cargo fmt 94 | ``` 95 | 96 | --- 97 | 98 | ## Improving the performance 99 | 100 | When a PR is submitted that improves the performance of the library, we would highly appreciate if the PR also includes a (verifiable) benchmark that shows the improvement. -------------------------------------------------------------------------------- /downsample_rs/benches/bench_minmax.rs: -------------------------------------------------------------------------------- 1 | use downsample_rs::minmax as minmax_mod; 2 | 3 | use criterion::{black_box, criterion_group, criterion_main, Criterion}; 4 | use dev_utils::{config, utils}; 5 | 6 | fn minmax_f32_random_array_long_single_core(c: &mut Criterion) { 7 | let n = config::ARRAY_LENGTH_LONG; 8 | let data = utils::get_random_array::(n, f32::MIN, f32::MAX); 9 | c.bench_function("minmax_f32", |b| { 10 | b.iter(|| minmax_mod::min_max_without_x(black_box(data.as_slice()), black_box(2_000))) 11 | }); 12 | } 13 | 14 | fn minmax_f32_random_array_long_multi_core(c: &mut Criterion) { 15 | let n = config::ARRAY_LENGTH_LONG; 16 | let data = utils::get_random_array::(n, f32::MIN, f32::MAX); 17 | c.bench_function("minmax_p_f32", |b| { 18 | b.iter(|| { 19 | minmax_mod::min_max_without_x_parallel(black_box(data.as_slice()), black_box(2_000)) 20 | }) 21 | }); 22 | } 23 | 24 | fn minmax_f32_random_array_50M_single_core(c: &mut Criterion) { 25 | let n = 50_000_000; 26 | let data = utils::get_random_array::(n, f32::MIN, f32::MAX); 27 | let x = (0..n).map(|i| i as i32).collect::>(); 28 | c.bench_function("minmax_50M_f32", |b| { 29 | b.iter(|| minmax_mod::min_max_without_x(black_box(data.as_slice()), black_box(2_000))) 30 | }); 31 | c.bench_function("minmax_x_50M_f32", |b| { 32 | b.iter(|| { 33 | minmax_mod::min_max_with_x( 34 | black_box(x.as_slice()), 35 | black_box(data.as_slice()), 36 | black_box(2_000), 37 | ) 38 | }) 39 | }); 40 | 41 | // c.bench_function("minmax_50M_f32", |b| { 42 | // b.iter(|| minmax_mod::min_max_without_x(black_box(data.as_slice()), black_box(60_000))) 43 | // }); 44 | // c.bench_function("minmax_x_50M_f32", |b| { 45 | // b.iter(|| minmax_mod::min_max_with_x(black_box(x.as_slice()), black_box(data.as_slice()), black_box(60_000))) 46 | // }); 47 | } 48 | 49 | fn minmax_f32_random_array_50M_long_multi_core(c: &mut Criterion) { 50 | let n = 50_000_000; 51 | let data = utils::get_random_array::(n, f32::MIN, f32::MAX); 52 | let x = (0..n).map(|i| i as i32).collect::>(); 53 | c.bench_function("minmax_p_50M_f32", |b| { 54 | b.iter(|| { 55 | minmax_mod::min_max_without_x_parallel(black_box(data.as_slice()), black_box(2_000)) 56 | }) 57 | }); 58 | c.bench_function("minmax_x_p_50M_f32", |b| { 59 | b.iter(|| { 60 | minmax_mod::min_max_with_x_parallel( 61 | black_box(x.as_slice()), 62 | black_box(data.as_slice()), 63 | black_box(2_000), 64 | ) 65 | }) 66 | }); 67 | 68 | // c.bench_function("minmax_p_50M_f32", |b| { 69 | // b.iter(|| minmax_mod::min_max_without_x_parallel(black_box(data.as_slice()), black_box(60_000))) 70 | // }); 71 | // c.bench_function("minmax_x_p_50M_f32", |b| { 72 | // b.iter(|| minmax_mod::min_max_with_x_parallel(black_box(x.as_slice()), black_box(data.as_slice()), black_box(60_000))) 73 | // }); 74 | } 75 | 76 | // fn minmax_f32_worst_case_array_long(c: &mut Criterion) { 77 | // let n = config::ARRAY_LENGTH_LONG; 78 | // let data = utils::get_worst_case_array::(n, 1.0); 79 | // c.bench_function("overlap_worst_long_f32", |b| { 80 | // b.iter(|| minmax_mod::min_max_overlap(black_box(data.as_slice()), black_box(2_000))) 81 | // }); 82 | // c.bench_function("simple_worst_long_f32", |b| { 83 | // b.iter(|| minmax_mod::min_max(black_box(data.as_slice()), black_box(2_000))) 84 | // }); 85 | // c.bench_function("simd_worst_long_f32", |b| { 86 | // b.iter(|| minmax_mod::min_max_simd_f32(black_box(data.as_slice()), black_box(2_000))) 87 | // }); 88 | // } 89 | 90 | criterion_group!( 91 | benches, 92 | // minmax_f32_random_array_long_single_core, 93 | // minmax_f32_random_array_long_multi_core, 94 | minmax_f32_random_array_50M_single_core, 95 | minmax_f32_random_array_50M_long_multi_core, 96 | // minmax_f32_worst_case_array_long, 97 | ); 98 | criterion_main!(benches); 99 | -------------------------------------------------------------------------------- /downsample_rs/benches/bench_minmaxlttb.rs: -------------------------------------------------------------------------------- 1 | use downsample_rs::minmaxlttb as minmaxlttb_mod; 2 | 3 | use criterion::{black_box, criterion_group, criterion_main, Criterion}; 4 | use dev_utils::{config, utils}; 5 | 6 | const MINMAX_RATIO: usize = 30; 7 | 8 | fn minmaxlttb_f32_random_array_long_single_core(c: &mut Criterion) { 9 | let n = config::ARRAY_LENGTH_LONG; 10 | let x = (0..n).map(|i| i as i32).collect::>(); 11 | let y = utils::get_random_array::(n, f32::MIN, f32::MAX); 12 | c.bench_function("mlttb_x_f32", |b| { 13 | b.iter(|| { 14 | minmaxlttb_mod::minmaxlttb_with_x( 15 | black_box(x.as_slice()), 16 | black_box(y.as_slice()), 17 | black_box(2_000), 18 | black_box(MINMAX_RATIO), 19 | ) 20 | }) 21 | }); 22 | } 23 | 24 | fn minmaxlttb_f32_random_array_long_multi_core(c: &mut Criterion) { 25 | let n = config::ARRAY_LENGTH_LONG; 26 | let x = (0..n).map(|i| i as i32).collect::>(); 27 | let y = utils::get_random_array::(n, f32::MIN, f32::MAX); 28 | c.bench_function("mlttb_x_p_f32", |b| { 29 | b.iter(|| { 30 | minmaxlttb_mod::minmaxlttb_with_x_parallel( 31 | black_box(x.as_slice()), 32 | black_box(y.as_slice()), 33 | black_box(2_000), 34 | black_box(MINMAX_RATIO), 35 | ) 36 | }) 37 | }); 38 | } 39 | 40 | fn minmaxlttb_f32_random_array_50M_single_core(c: &mut Criterion) { 41 | let n = 50_000_000; 42 | let x = (0..n).map(|i| i as i32).collect::>(); 43 | let y = utils::get_random_array::(n, f32::MIN, f32::MAX); 44 | c.bench_function("mlttb_x_50M_f32", |b| { 45 | b.iter(|| { 46 | minmaxlttb_mod::minmaxlttb_with_x( 47 | black_box(x.as_slice()), 48 | black_box(y.as_slice()), 49 | black_box(2_000), 50 | black_box(MINMAX_RATIO), 51 | ) 52 | }) 53 | }); 54 | } 55 | 56 | fn minmaxlttb_f32_random_array_50M_multi_core(c: &mut Criterion) { 57 | let n = 50_000_000; 58 | let x = (0..n).map(|i| i as i32).collect::>(); 59 | let y = utils::get_random_array::(n, f32::MIN, f32::MAX); 60 | c.bench_function("mlttb_x_p_50M_f32", |b| { 61 | b.iter(|| { 62 | minmaxlttb_mod::minmaxlttb_with_x_parallel( 63 | black_box(x.as_slice()), 64 | black_box(y.as_slice()), 65 | black_box(2_000), 66 | black_box(MINMAX_RATIO), 67 | ) 68 | }) 69 | }); 70 | } 71 | 72 | fn minmaxlttb_without_x_f32_random_array_50M_single_core(c: &mut Criterion) { 73 | let n = 50_000_000; 74 | let y = utils::get_random_array::(n, f32::MIN, f32::MAX); 75 | c.bench_function("mlttb_50M_f32", |b| { 76 | b.iter(|| { 77 | minmaxlttb_mod::minmaxlttb_without_x( 78 | black_box(y.as_slice()), 79 | black_box(2_000), 80 | black_box(MINMAX_RATIO), 81 | ) 82 | }) 83 | }); 84 | } 85 | 86 | fn minmaxlttb_without_x_f32_random_array_50M_multi_core(c: &mut Criterion) { 87 | let n = 50_000_000; 88 | let y = utils::get_random_array::(n, f32::MIN, f32::MAX); 89 | c.bench_function("mlttb_p_50M_f32", |b| { 90 | b.iter(|| { 91 | minmaxlttb_mod::minmaxlttb_without_x_parallel( 92 | black_box(y.as_slice()), 93 | black_box(2_000), 94 | black_box(MINMAX_RATIO), 95 | ) 96 | }) 97 | }); 98 | } 99 | 100 | criterion_group!( 101 | benches, 102 | // minmaxlttb_f32_random_array_long_single_core, 103 | // minmaxlttb_f32_random_array_long_multi_core, 104 | minmaxlttb_f32_random_array_50M_single_core, 105 | minmaxlttb_f32_random_array_50M_multi_core, 106 | minmaxlttb_without_x_f32_random_array_50M_single_core, 107 | minmaxlttb_without_x_f32_random_array_50M_multi_core, 108 | // minmaxlttb_f32_random_array_100m 109 | ); 110 | criterion_main!(benches); 111 | -------------------------------------------------------------------------------- /.github/workflows/ci-downsample_rs.yml: -------------------------------------------------------------------------------- 1 | name: CI downsample_rs 2 | 3 | on: 4 | pull_request: 5 | push: 6 | branches: 7 | - main 8 | paths: 9 | - "downsample_rs/**" 10 | - "!downsample_rs/LICENSE" 11 | - "!downsample_rs/README.md" 12 | 13 | defaults: 14 | run: 15 | shell: bash 16 | working-directory: downsample_rs 17 | 18 | jobs: 19 | Check: 20 | runs-on: ubuntu-latest 21 | steps: 22 | - name: Checkout 23 | uses: actions/checkout@v2 24 | 25 | - name: Install Rust toolchain 26 | uses: actions-rs/toolchain@v1 27 | with: 28 | profile: minimal 29 | toolchain: nightly 30 | components: clippy, rustfmt 31 | - name: Setup Rust 32 | run: | 33 | rustup update nightly --no-self-update 34 | rustup default nightly 35 | 36 | - name: Rust toolchain info 37 | run: | 38 | cargo --version --verbose 39 | rustc --version 40 | cargo clippy --version 41 | cargo fmt --version 42 | 43 | - name: check no optional features 44 | run: cargo check --verbose 45 | - name: check with all features 46 | run: cargo check --verbose --all-features 47 | - name: formatting check 48 | run: cargo fmt --all -- --check 49 | # - name: check with clippy 50 | # run: cargo clippy --all --all-targets --all-features -- -D warnings 51 | 52 | Test: 53 | runs-on: ${{ matrix.os }} 54 | strategy: 55 | fail-fast: false 56 | matrix: 57 | os: ["windows-latest", "macOS-latest", "ubuntu-latest"] 58 | rust: ["nightly"] # ['stable', 'beta'] 59 | 60 | steps: 61 | - name: Checkout 62 | uses: actions/checkout@v2 63 | 64 | - name: Install Rust toolchain 65 | uses: actions-rs/toolchain@v1 66 | with: 67 | profile: minimal 68 | toolchain: ${{ matrix.rust }} 69 | - name: Setup Rust 70 | run: | 71 | rustup update nightly --no-self-update 72 | rustup default nightly 73 | 74 | - name: Cache Dependencies 75 | uses: Swatinem/rust-cache@v1 76 | 77 | - name: Run tests (debug) 78 | run: cargo test --verbose --all-features 79 | - name: Run tests (release) 80 | run: cargo test --verbose --all-features --release 81 | 82 | Bench: 83 | runs-on: ${{ matrix.os }} 84 | strategy: 85 | fail-fast: false 86 | matrix: 87 | os: ["ubuntu-latest"] # ['windows-latest', 'macOS-latest'] 88 | rust: ["nightly"] # ['stable', 'beta'] 89 | 90 | steps: 91 | - name: Checkout 92 | uses: actions/checkout@v2 93 | 94 | - name: Install Rust toolchain 95 | uses: actions-rs/toolchain@v1 96 | with: 97 | profile: minimal 98 | toolchain: ${{ matrix.rust }} 99 | - name: Setup Rust 100 | run: | 101 | rustup update nightly --no-self-update 102 | rustup default nightly 103 | 104 | - name: Cache Dependencies 105 | uses: Swatinem/rust-cache@v1 106 | 107 | - name: Run benchmarks 108 | run: cargo bench --quiet --message-format=short --all-features | grep "time:" 109 | 110 | Build: 111 | runs-on: ubuntu-latest 112 | strategy: 113 | fail-fast: false 114 | matrix: 115 | target: 116 | # We shouldn't really have any OS-specific code, so think of this as a list of architectures 117 | - x86_64-unknown-linux-gnu 118 | - i686-unknown-linux-gnu 119 | - i586-unknown-linux-gnu 120 | - aarch64-unknown-linux-gnu 121 | - armv7-unknown-linux-gnueabihf 122 | # MIPS is currently not supported anymore on nightly chains. 123 | # more information: 124 | # - https://github.com/rust-lang/compiler-team/issues/648 125 | # - https://github.com/rust-lang/rust/pull/113274 126 | # - mips-unknown-linux-gnu 127 | # - mips64-unknown-linux-gnuabi64 128 | - powerpc-unknown-linux-gnu 129 | - powerpc64-unknown-linux-gnu 130 | - riscv64gc-unknown-linux-gnu 131 | - s390x-unknown-linux-gnu 132 | - sparc64-unknown-linux-gnu 133 | - wasm32-unknown-unknown 134 | 135 | steps: 136 | - uses: actions/checkout@v2 137 | - name: Setup Rust 138 | run: | 139 | rustup update nightly --no-self-update 140 | rustup default nightly 141 | rustup target add ${{ matrix.target }} 142 | # rustup component add clippy 143 | # - name: Run Clippy 144 | # run: cargo clippy --all-targets --target ${{ matrix.target }} 145 | - name: Build (release) 146 | run: cargo build --target ${{ matrix.target }} --release --all-features 147 | 148 | # - name: Run cargo-tarpaulin 149 | # uses: actions-rs/tarpaulin@v0.1 150 | # with: 151 | # args: '--features half -- --test-threads 1' 152 | 153 | # - name: Upload to codecov.io 154 | # uses: codecov/codecov-action@v3 155 | # 156 | # 157 | # largely inspired by: https://github.com/rust-lang/portable-simd/blob/master/.github/workflows/ci.yml 158 | -------------------------------------------------------------------------------- /tsdownsample/downsamplers.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | from typing import Union 3 | 4 | import numpy as np 5 | 6 | # ------------------ Rust Downsamplers ------------------ 7 | from tsdownsample._rust import _tsdownsample_rs # type: ignore[attr-defined] 8 | 9 | from .downsampling_interface import ( 10 | AbstractDownsampler, 11 | AbstractRustDownsampler, 12 | AbstractRustNaNDownsampler, 13 | ) 14 | 15 | 16 | class MinMaxDownsampler(AbstractRustDownsampler): 17 | """Downsampler that uses the MinMax algorithm. If the y data contains NaNs, these 18 | ignored (i.e. the NaNs are not taken into account when selecting data points). 19 | 20 | For each bin, the indices of the minimum and maximum values are selected. 21 | """ 22 | 23 | @property 24 | def rust_mod(self): 25 | return _tsdownsample_rs.minmax 26 | 27 | @staticmethod 28 | def _check_valid_n_out(n_out: int): 29 | AbstractRustDownsampler._check_valid_n_out(n_out) 30 | if n_out % 2 != 0: 31 | raise ValueError("n_out must be even") 32 | 33 | 34 | class NaNMinMaxDownsampler(AbstractRustNaNDownsampler): 35 | """Downsampler that uses the MinMax algorithm. If the y data contains NaNs, the 36 | indices of these NaNs are returned. 37 | 38 | For each bin, the indices of the minimum and maximum values are selected. 39 | """ 40 | 41 | @property 42 | def rust_mod(self): 43 | return _tsdownsample_rs.minmax 44 | 45 | @staticmethod 46 | def _check_valid_n_out(n_out: int): 47 | AbstractRustDownsampler._check_valid_n_out(n_out) 48 | if n_out % 2 != 0: 49 | raise ValueError("n_out must be even") 50 | 51 | 52 | class M4Downsampler(AbstractRustDownsampler): 53 | """Downsampler that uses the M4 algorithm. If the y data contains NaNs, these are 54 | ignored (i.e. the NaNs are not taken into account when selecting data points). 55 | 56 | For each bin, the indices of the first, last, minimum and maximum values are 57 | selected. 58 | """ 59 | 60 | @property 61 | def rust_mod(self): 62 | return _tsdownsample_rs.m4 63 | 64 | @staticmethod 65 | def _check_valid_n_out(n_out: int): 66 | AbstractRustDownsampler._check_valid_n_out(n_out) 67 | if n_out % 4 != 0: 68 | raise ValueError("n_out must be a multiple of 4") 69 | 70 | 71 | class NaNM4Downsampler(AbstractRustNaNDownsampler): 72 | """Downsampler that uses the M4 algorithm. If the y data contains NaNs, the indices 73 | of these NaNs are returned. 74 | 75 | For each bin, the indices of the first, last, minimum and maximum values are 76 | selected. 77 | """ 78 | 79 | @property 80 | def rust_mod(self): 81 | return _tsdownsample_rs.m4 82 | 83 | @staticmethod 84 | def _check_valid_n_out(n_out: int): 85 | AbstractRustDownsampler._check_valid_n_out(n_out) 86 | if n_out % 4 != 0: 87 | raise ValueError("n_out must be a multiple of 4") 88 | 89 | 90 | class LTTBDownsampler(AbstractRustDownsampler): 91 | """Downsampler that uses the LTTB algorithm.""" 92 | 93 | @property 94 | def rust_mod(self): 95 | return _tsdownsample_rs.lttb 96 | 97 | 98 | class MinMaxLTTBDownsampler(AbstractRustDownsampler): 99 | """Downsampler that uses the MinMaxLTTB algorithm. If the y data contains NaNs, 100 | these are ignored (i.e. the NaNs are not taken into account when selecting data 101 | points). 102 | 103 | MinMaxLTTB paper: https://arxiv.org/abs/2305.00332 104 | """ 105 | 106 | @property 107 | def rust_mod(self): 108 | return _tsdownsample_rs.minmaxlttb 109 | 110 | def downsample( 111 | self, *args, n_out: int, minmax_ratio: int = 4, parallel: bool = False, **_ 112 | ): 113 | assert minmax_ratio > 0, "minmax_ratio must be greater than 0" 114 | return super().downsample( 115 | *args, n_out=n_out, parallel=parallel, ratio=minmax_ratio 116 | ) 117 | 118 | 119 | class NaNMinMaxLTTBDownsampler(AbstractRustNaNDownsampler): 120 | """Downsampler that uses the MinMaxLTTB algorithm. If the y data contains NaNs, the 121 | indices of these NaNs are returned. 122 | 123 | MinMaxLTTB paper: https://arxiv.org/abs/2305.00332 124 | """ 125 | 126 | @property 127 | def rust_mod(self): 128 | return _tsdownsample_rs.minmaxlttb 129 | 130 | def downsample( 131 | self, *args, n_out: int, minmax_ratio: int = 4, parallel: bool = False, **_ 132 | ): 133 | assert minmax_ratio > 0, "minmax_ratio must be greater than 0" 134 | return super().downsample( 135 | *args, n_out=n_out, parallel=parallel, ratio=minmax_ratio 136 | ) 137 | 138 | 139 | # ------------------ EveryNth Downsampler ------------------ 140 | 141 | 142 | class EveryNthDownsampler(AbstractDownsampler): 143 | """Downsampler that selects every nth data point""" 144 | 145 | def __init__(self, **kwargs): 146 | super().__init__(check_contiguous=False, **kwargs) 147 | 148 | def _downsample( 149 | self, x: Union[np.ndarray, None], y: np.ndarray, n_out: int, **_ 150 | ) -> np.ndarray: 151 | if x is not None: 152 | name = self.__class__.__name__ 153 | warnings.warn( 154 | f"x is passed to downsample method of {name}, but is not taken " 155 | "into account by the current implementation of the EveryNth algorithm." 156 | ) 157 | step = max(1, len(y) / n_out) 158 | return np.arange(start=0, stop=len(y) - 0.1, step=step).astype(np.uint) 159 | -------------------------------------------------------------------------------- /downsample_rs/benches/results: -------------------------------------------------------------------------------- 1 | overlap_random_long_f32 time: [45.116 µs 45.175 µs 45.255 µs] 2 | simple_random_long_f32 time: [24.639 µs 24.711 µs 24.793 µs] 3 | simd_random_long_f32 time: [10.549 µs 10.580 µs 10.615 µs] 4 | 5 | --- 6 | 7 | overlap_random_long_f32 time: [8.9179 ms 8.9405 ms 8.9688 ms] 8 | simple_random_long_f32 time: [7.9809 ms 8.0065 ms 8.0416 ms] 9 | simd_random_long_f32 time: [2.4118 ms 2.4177 ms 2.4242 ms] 10 | 11 | 12 | overlap_random_long_f32 time: [45.010 µs 45.040 µs 45.071 µs] 13 | simple_random_long_f32 time: [22.929 µs 22.979 µs 23.036 µs] 14 | simd_random_long_f32 time: [9.8801 µs 9.8925 µs 9.9034 µs] 15 | overlap_random_10m_f32 time: [9.0349 ms 9.0440 ms 9.0537 ms] 16 | simple_random_10m_f32 time: [7.7164 ms 7.7322 ms 7.7481 ms] 17 | simd_random_10m_f32 time: [2.4348 ms 2.4424 ms 2.4505 ms] 18 | 19 | -> parallel 20 | 21 | overlap_random_long_f32 time: [17.514 µs 17.711 µs 17.960 µs] 22 | simple_random_long_f32 time: [6.9441 µs 6.9717 µs 7.0060 µs] 23 | simd_random_long_f32 time: [33.948 µs 34.284 µs 34.674 µs] 24 | overlap_random_10m_f32 time: [8.9596 ms 8.9664 ms 8.9736 ms] 25 | simple_random_10m_f32 time: [8.4142 ms 8.4373 ms 8.4582 ms] 26 | simd_random_10m_f32 time: [1.5374 ms 1.5640 ms 1.5931 ms] 27 | 28 | 29 | overlap_rand_long_f32 time: [17.789 µs 17.953 µs 18.180 µs] 30 | simple_rand_long_f32 time: [6.8148 µs 6.8297 µs 6.8479 µs] 31 | simd_rand_long_f32 time: [9.5135 µs 9.5796 µs 9.6567 µs] 32 | simple_p_rand_long_f32 time: [34.526 µs 35.000 µs 35.524 µs] 33 | simd_p_rand_long_f32 time: [35.538 µs 36.052 µs 36.584 µs] 34 | overlap_rand_50m_f32 time: [44.472 ms 44.533 ms 44.594 ms] 35 | simple_rand_50m_f32 time: [45.433 ms 45.514 ms 45.589 ms] 36 | simd_rand_50m_f32 time: [15.185 ms 15.221 ms 15.263 ms] 37 | simple_p_rand_50m_f32 time: [6.8875 ms 6.9377 ms 6.9899 ms] 38 | simd_p_rand_50m_f32 time: [7.2611 ms 7.2999 ms 7.3410 ms] 39 | 40 | 41 | 42 | overlap_rand_50m_f32 time: [260.86 ms 262.64 ms 264.74 ms] 43 | simple_rand_50m_f32 time: [97.680 ms 98.330 ms 99.087 ms] 44 | simple_rand__50m_f32 time: [97.766 ms 98.251 ms 98.774 ms] 45 | simd_rand_50m_f32 time: [39.015 ms 39.545 ms 40.096 ms] 46 | 47 | overlap_rand_50m_f32 time: [87.381 ms 87.739 ms 88.166 ms] 48 | simple_rand_50m_f32 time: [35.099 ms 35.327 ms 35.573 ms] 49 | simple_rand__50m_f32 time: [35.639 ms 35.858 ms 36.104 ms] 50 | simd_rand_50m_f32 time: [16.300 ms 16.498 ms 16.710 ms] 51 | 52 | -> op de redoxv2 server 53 | overlap_rand_50m_f32 time: [118.54 ms 118.67 ms 118.80 ms] 54 | simple_rand_50m_f32 time: [55.846 ms 56.661 ms 57.730 ms] 55 | simple_p_rand_50m_f32 time: [5.8644 ms 5.8852 ms 5.9127 ms] 56 | 57 | 58 | ---- 59 | mmlttb_rand_10m_f32 time: [25.205 ms 25.450 ms 25.754 ms] 60 | mmlttb_p_rand_10m_f32 time: [25.090 ms 25.283 ms 25.498 ms] 61 | mmlttb_rand_50m_f32 time: [41.006 ms 41.430 ms 41.892 ms] 62 | mmlttb_p_rand_50m_f32 time: [39.729 ms 40.115 ms 40.730 ms] 63 | 64 | 65 | simple_rand_50m_f32 time: [36.226 ms 37.007 ms 37.984 ms] 66 | simd_rand_50m_f32 time: [17.323 ms 17.350 ms 17.378 ms] 67 | simple_p_rand_50m_f32 time: [10.506 ms 10.529 ms 10.553 ms] 68 | simd_p_rand_50m_f32 time: [10.102 ms 10.126 ms 10.154 ms] 69 | 70 | mmltb_rand_50mf32 time: [48.331 ms 48.417 ms 48.514 ms] 71 | mmltb_p_rand_50mf32 time: [8.9456 ms 8.9841 ms 9.0265 ms] 72 | mmltb_nox_rand_50mf32 time: [47.381 ms 47.446 ms 47.527 ms] 73 | mmltb_p_nox_rand_50mf32 time: [8.6851 ms 8.7108 ms 8.7373 ms] 74 | 75 | 76 | mmltb_rand_1Bf32 time: [844.96 ms 846.33 ms 847.83 ms] 77 | mmltb_p_rand_1Bf32 time: [151.16 ms 151.48 ms 151.85 ms] 78 | mmltb_nox_rand_1Bf32 time: [842.46 ms 843.47 ms 844.56 ms] 79 | mmltb_p_nox_rand_1Bf32 time: [150.72 ms 151.04 ms 151.40 ms] 80 | --> do not wrap the simple_argminmax into an option function 81 | mmltb_rand_1Bf32 time: [846.90 ms 848.46 ms 850.11 ms] 82 | mmltb_p_rand_1Bf32 time: [147.21 ms 147.99 ms 148.90 ms] 83 | mmltb_nox_rand_1Bf32 time: [843.30 ms 844.33 ms 845.55 ms] 84 | mmltb_p_nox_rand_1Bf32 time: [147.32 ms 148.14 ms 149.07 ms] 85 | ---> optimize the simple_argminmax function 86 | mmltb_rand_1Bf32 time: [786.27 ms 786.83 ms 787.45 ms] 87 | mmltb_p_rand_1Bf32 time: [138.83 ms 139.19 ms 139.58 ms] 88 | mmltb_nox_rand_1Bf32 time: [785.77 ms 786.92 ms 788.46 ms] 89 | mmltb_p_nox_rand_1Bf32 time: [138.14 ms 138.42 ms 138.73 ms] 90 | 91 | --------------------- 92 | 93 | lttb_scal_50M_f32 time: [114.35 ms 114.60 ms 114.88 ms] 94 | lttbnox_scal_50M_f32 time: [127.56 ms 127.70 ms 127.84 ms] 95 | m4_scal_50M_f32 time: [31.478 ms 31.539 ms 31.596 ms] 96 | m4_simd_50M_f32 time: [12.065 ms 12.168 ms 12.270 ms] 97 | m4_scalx_50M_f32 time: [30.513 ms 30.536 ms 30.560 ms] 98 | m4_simdx_50M_f32 time: [12.318 ms 12.430 ms 12.540 ms] 99 | m4_scal_p_50M_f32 time: [9.2012 ms 9.2176 ms 9.2352 ms] 100 | m4_simd_p_50M_f32 time: [9.0070 ms 9.0274 ms 9.0500 ms] 101 | m4_scalx_p_50M_f32 time: [9.7611 ms 9.7895 ms 9.8213 ms] 102 | m4_simdx_p_50M_f32 time: [9.4658 ms 9.4908 ms 9.5187 ms] 103 | minmax_scal_50M_f32 time: [31.585 ms 31.693 ms 31.814 ms] 104 | minmax_simd_50M_f32 time: [11.940 ms 12.046 ms 12.152 ms] 105 | minmax_scalx_50M_f32 time: [30.701 ms 30.769 ms 30.852 ms] 106 | minmax_simdx_50M_f32 time: [12.392 ms 12.499 ms 12.607 ms] 107 | minmax_scal_p_50M_f32 time: [9.2281 ms 9.2515 ms 9.2781 ms] 108 | minmax_simd_p_50M_f32 time: [9.0181 ms 9.0404 ms 9.0645 ms] 109 | minmax_scalx_p_50M_f32 time: [10.075 ms 10.100 ms 10.133 ms] 110 | minmax_simdx_p_50M_f32 time: [9.7846 ms 9.8051 ms 9.8272 ms] 111 | mlttb_scalx_50M_f32 time: [40.820 ms 40.855 ms 40.894 ms] 112 | mlttb_simdx_50M_f32 time: [22.739 ms 22.788 ms 22.843 ms] 113 | mlttb_scalx_p_50M_f32 time: [19.783 ms 19.816 ms 19.851 ms] // 2x slower bc MinMax with 60k n_out is 2x slower when using x 114 | mlttb_simdx_p_50M_f32 time: [19.713 ms 19.752 ms 19.796 ms] // 2x slower bc MinMax with 60k n_out is 2x slower when using x 115 | mlttbnox_scal_50M_f32 time: [36.289 ms 36.327 ms 36.364 ms] 116 | mlttbnox_simd_50M_f32 time: [16.706 ms 16.744 ms 16.784 ms] 117 | mlttbnox_scal_p_50M_f32 time: [10.252 ms 10.272 ms 10.293 ms] 118 | mlttbnox_simd_p_50M_f32 time: [10.037 ms 10.069 ms 10.105 ms] 119 | -------------------------------------------------------------------------------- /.github/workflows/ci-tsdownsample.yml: -------------------------------------------------------------------------------- 1 | name: CI tsdownsample 2 | 3 | on: 4 | pull_request: {} 5 | push: 6 | branches: 7 | - main 8 | tags: 9 | - '**' 10 | 11 | defaults: 12 | run: 13 | shell: bash 14 | 15 | jobs: 16 | 17 | Lint_and_Check: 18 | runs-on: ubuntu-latest 19 | steps: 20 | - uses: actions/checkout@v3 21 | - uses: actions/setup-python@v4 22 | with: 23 | python-version: '3.10' 24 | - name: Install Rust toolchain 25 | uses: actions-rs/toolchain@v1 26 | with: 27 | profile: minimal 28 | toolchain: nightly 29 | components: clippy, rustfmt 30 | - name: Setup Rust 31 | run: | 32 | rustup update nightly --no-self-update 33 | rustup default nightly 34 | - name: Cache rust 35 | uses: Swatinem/rust-cache@v2 36 | 37 | - run: pip install -r tests/requirements-linting.txt 38 | - run: pip freeze 39 | - run: make lint # Lint Python & Rust 40 | - run: make mypy # Type check Python 41 | 42 | Test: 43 | runs-on: ${{ matrix.os }} 44 | strategy: 45 | fail-fast: false 46 | matrix: 47 | os: ['windows-latest', 'macOS-latest', 'ubuntu-latest'] 48 | rust: ['nightly'] # ['stable', 'beta'] 49 | python-version: ['3.8', '3.9', '3.10', '3.11', '3.12', '3.13', '3.14'] 50 | 51 | env: 52 | PYTHON: ${{ matrix.python-version }} 53 | 54 | steps: 55 | - uses: actions/checkout@v4 56 | - uses: actions/setup-python@v5 57 | with: 58 | python-version: ${{ matrix.python-version }} 59 | - run: pip install -r tests/requirements.txt 60 | 61 | - name: Install Rust toolchain 62 | uses: actions-rs/toolchain@v1 63 | with: 64 | profile: minimal 65 | toolchain: nightly 66 | components: clippy, rustfmt 67 | - name: Setup Rust 68 | run: | 69 | rustup update nightly --no-self-update 70 | rustup default nightly 71 | - name: Cache rust 72 | uses: Swatinem/rust-cache@v2 73 | 74 | - name: install develop version 75 | run: make install 76 | 77 | - run: pip install -r tests/requirements.txt 78 | 79 | - run: pip freeze 80 | 81 | - run: make test # Test Python 82 | 83 | - name: Upload coverage to Codecov 84 | uses: codecov/codecov-action@v5 85 | 86 | Build: 87 | # Perhaps smth more in line with this https://github.com/messense/crfs-rs/blob/main/.github/workflows/Python.yml 88 | name: build on ${{ matrix.os }} (${{ matrix.target }} - ${{ matrix.manylinux || 'auto' }}) 89 | # only run on push to main and on release 90 | if: "success() && (startsWith(github.ref, 'refs/tags/') || github.ref == 'refs/heads/main' || contains(github.event.pull_request.labels.*.name, 'Full Build'))" 91 | strategy: 92 | fail-fast: false 93 | matrix: 94 | os: [ubuntu, macos, windows] 95 | target: [x86_64, aarch64] 96 | manylinux: [auto] 97 | include: 98 | - os: windows 99 | ls: dir 100 | - os: windows 101 | ls: dir 102 | target: i686 103 | python-architecture: x86 104 | - os: macos 105 | target: aarch64 106 | - os: ubuntu 107 | target: i686 108 | # GCC 4.8.5 in manylinux2014 container doesn't support c11 atomic 109 | # we use manylinux_2_24 container for aarch64 and armv7 targets instead, 110 | - os: ubuntu 111 | target: aarch64 112 | container: messense/manylinux_2_24-cross:aarch64 113 | - os: ubuntu 114 | target: armv7 115 | container: messense/manylinux_2_24-cross:armv7 116 | - os: ubuntu 117 | target: ppc64le 118 | container: messense/manylinux_2_24-cross:ppc64le 119 | - os: ubuntu 120 | target: s390x 121 | container: messense/manylinux_2_24-cross:s390x 122 | # musllinux 123 | - os: ubuntu 124 | target: x86_64 125 | manylinux: musllinux_1_1 126 | - os: ubuntu 127 | target: aarch64 128 | manylinux: musllinux_1_1 129 | exclude: 130 | # this fails 131 | - os: windows 132 | target: aarch64 133 | 134 | runs-on: ${{ matrix.os }}-latest 135 | steps: 136 | - uses: actions/checkout@v3 137 | 138 | - name: set up python 139 | uses: actions/setup-python@v5 140 | with: 141 | python-version: 3.13 142 | architecture: ${{ matrix.python-architecture || 'x64' }} 143 | 144 | - name: build sdist 145 | if: ${{ matrix.os == 'ubuntu' && matrix.target == 'x86_64' && matrix.manylinux == 'auto' }} 146 | uses: PyO3/maturin-action@v1 147 | with: 148 | command: sdist 149 | args: --out dist 150 | 151 | - name: build wheels 152 | uses: PyO3/maturin-action@v1 153 | with: 154 | rust-toolchain: nightly 155 | target: ${{ matrix.target }} 156 | manylinux: ${{ matrix.manylinux || 'auto' }} 157 | container: ${{ matrix.container }} 158 | args: --release --out dist --interpreter ${{ matrix.interpreter || '3.8 3.9 3.10 3.11 3.12 3.13 3.14' }} 159 | 160 | - run: ${{ matrix.ls || 'ls -lh' }} dist/ 161 | 162 | - uses: actions/upload-artifact@v4 163 | with: 164 | name: pypi_files-${{ matrix.os }}-${{ matrix.target }}-${{ matrix.interpreter || 'all' }}-${{ matrix.manylinux || 'auto' }} 165 | path: dist 166 | 167 | Release: 168 | needs: [Lint_and_Check, Test, Build] 169 | if: "success() && startsWith(github.ref, 'refs/tags/')" 170 | runs-on: ubuntu-latest 171 | 172 | steps: 173 | - uses: actions/checkout@v3 174 | 175 | - name: set up python 176 | uses: actions/setup-python@v4 177 | # with: 178 | # python-version: '3.10' 179 | 180 | - run: pip install -U twine packaging 181 | 182 | - name: get dist artifacts 183 | uses: actions/download-artifact@v4 184 | with: 185 | pattern: pypi_files-* 186 | merge-multiple: true 187 | path: dist 188 | 189 | - run: twine check dist/* 190 | 191 | - name: upload to pypi 192 | run: twine upload dist/* 193 | env: 194 | TWINE_USERNAME: __token__ 195 | TWINE_PASSWORD: ${{ secrets.pypi_token }} 196 | 197 | # https://github.com/samuelcolvin/rtoml/blob/main/.github/workflows/ci.yml 198 | # https://github.com/messense/rjmespath-py/blob/main/.github/workflows/CI.yml 199 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # tsdownsample 2 | 3 | [![PyPI Latest Release](https://img.shields.io/pypi/v/tsdownsample.svg)](https://pypi.org/project/tsdownsample/) 4 | [![support-version](https://img.shields.io/pypi/pyversions/tsdownsample)](https://img.shields.io/pypi/pyversions/tsdownsample) 5 | [![Downloads](https://static.pepy.tech/badge/tsdownsample)](https://pepy.tech/project/tsdownsample) 6 | [![CodeQL](https://github.com/predict-idlab/tsdownsample/actions/workflows/codeql.yml/badge.svg)](https://github.com/predict-idlab/tsdownsample/actions/workflows/codeql.yml) 7 | [![Testing](https://github.com/predict-idlab/tsdownsample/actions/workflows/ci-downsample_rs.yml/badge.svg)](https://github.com/predict-idlab/tsdownsample/actions/workflows/ci-downsample_rs.yml) 8 | [![Testing](https://github.com/predict-idlab/tsdownsample/actions/workflows/ci-tsdownsample.yml/badge.svg)](https://github.com/predict-idlab/tsdownsample/actions/workflows/ci-tsdownsample.yml) 9 | [![Discord](https://img.shields.io/badge/Discord-%235865F2.svg?logo=discord&logoColor=white)](https://discord.gg/k2d59GrxPX) 10 | 11 | 12 | 13 | Extremely fast **time series downsampling 📈** for visualization, written in Rust. 14 | 15 | ## Features ✨ 16 | 17 | - **Fast**: written in rust with PyO3 bindings 18 | - leverages optimized [argminmax](https://github.com/jvdd/argminmax) - which is SIMD accelerated with runtime feature detection 19 | - scales linearly with the number of data points 20 | 21 | - multithreaded with Rayon (in Rust) 22 |
23 | Why we do not use Python multiprocessing 24 | Citing the PyO3 docs on parallelism:
25 |
26 | CPython has the infamous Global Interpreter Lock, which prevents several threads from executing Python bytecode in parallel. This makes threading in Python a bad fit for CPU-bound tasks and often forces developers to accept the overhead of multiprocessing. 27 |
28 | In Rust - which is a compiled language - there is no GIL, so CPU-bound tasks can be parallelized (with Rayon) with little to no overhead. 29 |
30 | - **Efficient**: memory efficient 31 | - works on views of the data (no copies) 32 | - no intermediate data structures are created 33 | - **Flexible**: works on any type of data 34 | - supported datatypes are 35 | - for `x`: `f32`, `f64`, `i16`, `i32`, `i64`, `u16`, `u32`, `u64`, `datetime64`, `timedelta64` 36 | - for `y`: `f16`, `f32`, `f64`, `i8`, `i16`, `i32`, `i64`, `u8`, `u16`, `u32`, `u64`, `datetime64`, `timedelta64`, `bool` 37 |
38 | !! 🚀 f16 argminmax is 200-300x faster than numpy 39 | In contrast with all other data types above, f16 is *not* hardware supported (i.e., no instructions for f16) by most modern CPUs!!
40 | 🐌 Programming languages facilitate support for this datatype by either (i) upcasting to f32 or (ii) using a software implementation.
41 | 💡 As for argminmax, only comparisons are needed - and thus no arithmetic operations - creating a symmetrical ordinal mapping from f16 to i16 is sufficient. This mapping allows to use the hardware supported scalar and SIMD i16 instructions - while not producing any memory overhead 🎉
42 | More details are described in argminmax PR #1. 43 |
44 | - **Easy to use**: simple & flexible API 45 | 46 | ## Install 47 | 48 | ```bash 49 | pip install tsdownsample 50 | ``` 51 | 52 | ## Usage 53 | 54 | ```python 55 | from tsdownsample import MinMaxLTTBDownsampler 56 | import numpy as np 57 | 58 | # Create a time series 59 | y = np.random.randn(10_000_000) 60 | x = np.arange(len(y)) 61 | 62 | # Downsample to 1000 points (assuming constant sampling rate) 63 | s_ds = MinMaxLTTBDownsampler().downsample(y, n_out=1000) 64 | 65 | # Select downsampled data 66 | downsampled_y = y[s_ds] 67 | 68 | # Downsample to 1000 points using the (possible irregularly spaced) x-data 69 | s_ds = MinMaxLTTBDownsampler().downsample(x, y, n_out=1000) 70 | 71 | # Select downsampled data 72 | downsampled_x = x[s_ds] 73 | downsampled_y = y[s_ds] 74 | ``` 75 | 76 | ## Downsampling algorithms & API 77 | 78 | ### Downsampling API 📑 79 | 80 | Each downsampling algorithm is implemented as a class that implements a `downsample` method. 81 | The signature of the `downsample` method: 82 | 83 | ``` 84 | downsample([x], y, n_out, **kwargs) -> ndarray[uint64] 85 | ``` 86 | 87 | **Arguments**: 88 | 89 | - `x` is optional 90 | - `x` and `y` are both positional arguments 91 | - `n_out` is a mandatory keyword argument that defines the number of output values* 92 | - `**kwargs` are optional keyword arguments *(see [table below](#downsampling-algorithms-📈))*: 93 | - `parallel`: whether to use multi-threading (default: `False`) 94 | ❗ The max number of threads can be configured with the `TSDOWNSAMPLE_MAX_THREADS` ENV var (e.g. `os.environ["TSDOWNSAMPLE_MAX_THREADS"] = "4"`) 95 | - ... 96 | 97 | **Returns**: a `ndarray[uint64]` of indices that can be used to index the original data. 98 | 99 | \*When there are gaps in the time series, fewer than `n_out` indices may be returned. 100 | 101 | ### Downsampling algorithms 📈 102 | 103 | The following downsampling algorithms (classes) are implemented: 104 | 105 | | Downsampler | Description | `**kwargs` | 106 | | ---:| --- |--- | 107 | | `MinMaxDownsampler` | selects the **min and max** value in each bin | `parallel` | 108 | | `M4Downsampler` | selects the [**min, max, first and last**](https://dl.acm.org/doi/pdf/10.14778/2732951.2732953) value in each bin | `parallel` | 109 | | `LTTBDownsampler` | performs the [**Largest Triangle Three Buckets**](https://skemman.is/bitstream/1946/15343/3/SS_MSthesis.pdf) algorithm | `parallel` | 110 | | `MinMaxLTTBDownsampler` | (*new two-step algorithm 🎉*) first selects `n_out` * `minmax_ratio` **min and max** values, then further reduces these to `n_out` values using the **Largest Triangle Three Buckets** algorithm | `parallel`, `minmax_ratio`* | 111 | 112 | *Default value for `minmax_ratio` is 4, which is empirically proven to be a good default. More details here: https://arxiv.org/abs/2305.00332 113 | 114 | ### Handling NaNs 115 | 116 | This library supports two `NaN`-policies: 117 | 118 | 1. Omit `NaN`s (`NaN`s are ignored during downsampling). 119 | 2. Return index of first `NaN` once there is at least one present in the bin of the considered data. 120 | 121 | | Omit `NaN`s | Return `NaN`s | 122 | | ----------------------: | :------------------------- | 123 | | `MinMaxDownsampler` | `NaNMinMaxDownsampler` | 124 | | `M4Downsampler` | `NaNM4Downsampler` | 125 | | `MinMaxLTTBDownsampler` | `NaNMinMaxLTTBDownsampler` | 126 | | `LTTBDownsampler` | | 127 | 128 | > Note that NaNs are not supported for `x`-data. 129 | 130 | ## Limitations & assumptions 🚨 131 | 132 | Assumes; 133 | 134 | 1. `x`-data is (non-strictly) monotonic increasing (i.e., sorted) 135 | 2. no `NaN`s in `x`-data 136 | 137 | --- 138 | 139 |

140 | 👤 Jeroen Van Der Donckt 141 |

142 | -------------------------------------------------------------------------------- /downsample_rs/src/lttb.rs: -------------------------------------------------------------------------------- 1 | use super::helpers::Average; 2 | use super::types::Num; 3 | use num_traits::AsPrimitive; 4 | use std::cmp; 5 | 6 | #[inline(always)] 7 | fn f64_to_i64unsigned(v: f64) -> i64 { 8 | // Transmute to i64 and mask out the sign bit 9 | let v: i64 = unsafe { std::mem::transmute::(v) }; 10 | v & 0x7FFF_FFFF_FFFF_FFFF 11 | } 12 | 13 | // ----------------------------------- NON-PARALLEL ------------------------------------ 14 | 15 | // ----------- WITH X 16 | 17 | pub fn lttb_with_x, Ty: Num + AsPrimitive>( 18 | x: &[Tx], 19 | y: &[Ty], 20 | n_out: usize, 21 | ) -> Vec { 22 | assert_eq!(x.len(), y.len()); 23 | if n_out >= x.len() { 24 | return (0..x.len()).collect::>(); 25 | } 26 | assert!(n_out >= 3); // avoid division by 0 27 | 28 | // Bucket size. Leave room for start and end data points. 29 | let every: f64 = (x.len() - 2) as f64 / (n_out - 2) as f64; 30 | // Initially a is the first point in the triangle. 31 | let mut a: usize = 0; 32 | 33 | let mut sampled_indices: Vec = vec![usize::default(); n_out]; 34 | 35 | // Always add the first point 36 | sampled_indices[0] = 0; 37 | 38 | for i in 0..n_out - 2 { 39 | // Calculate point average for next bucket (containing c). 40 | let avg_range_start = (every * (i + 1) as f64) as usize + 1; 41 | let avg_range_end = cmp::min((every * (i + 2) as f64) as usize + 1, x.len()); 42 | 43 | let y_slice = &y[avg_range_start..avg_range_end]; 44 | let avg_y: f64 = y_slice.average(); 45 | // TODO: avg_y could be approximated argminmax instead of mean? 46 | // TODO: below is faster than above, but not as accurate 47 | // let avg_x: f64 = (x_slice[avg_range_end - 1].as_() + x_slice[avg_range_start].as_()) / 2.0; 48 | let avg_x: f64 = unsafe { 49 | (x.get_unchecked(avg_range_end - 1).as_() + x.get_unchecked(avg_range_start).as_()) 50 | / 2.0 51 | }; 52 | 53 | // Get the range for this bucket 54 | let range_offs = (every * i as f64) as usize + 1; 55 | let range_to = avg_range_start; // = start of the next bucket 56 | 57 | // Point a 58 | let point_ax = unsafe { x.get_unchecked(a).as_() }; 59 | let point_ay = unsafe { y.get_unchecked(a).as_() }; 60 | 61 | let d1 = point_ax - avg_x; 62 | let d2 = avg_y - point_ay; 63 | let offset: f64 = d1 * point_ay + d2 * point_ax; 64 | 65 | let x_slice = &x[range_offs..range_to]; 66 | let y_slice = &y[range_offs..range_to]; 67 | (_, a) = y_slice.iter().zip(x_slice.iter()).enumerate().fold( 68 | (-1i64, a), 69 | |(max_area, a), (i, (y_, x_))| { 70 | // Calculate triangle area over three buckets 71 | // -> area = d1 * (y_ - point_ay) - (point_ax - x_) * d2; 72 | // let area = d1 * y[i].as_() + d2 * x[i].as_() - offset; 73 | // let area = d1 * y_slice[i].as_() + d2 * x_slice[i].as_() - offset; 74 | let area = d1 * y_.as_() + d2 * x_.as_() - offset; 75 | let area = f64_to_i64unsigned(area); // this is faster than abs 76 | if area > max_area { 77 | (area, i) 78 | } else { 79 | (max_area, a) 80 | } 81 | }, 82 | ); 83 | a += range_offs; 84 | 85 | sampled_indices[i + 1] = a; 86 | } 87 | 88 | // Always add the last point 89 | sampled_indices[n_out - 1] = y.len() - 1; 90 | 91 | sampled_indices 92 | } 93 | 94 | // ----------- WITHOUT X 95 | 96 | pub fn lttb_without_x>(y: &[Ty], n_out: usize) -> Vec { 97 | if n_out >= y.len() { 98 | return (0..y.len()).collect::>(); 99 | } 100 | assert!(n_out >= 3); // avoid division by 0 101 | 102 | // Bucket size. Leave room for start and end data points. 103 | let every: f64 = (y.len() - 2) as f64 / (n_out - 2) as f64; 104 | // Initially a is the first point in the triangle. 105 | let mut a: usize = 0; 106 | 107 | let mut sampled_indices: Vec = vec![usize::default(); n_out]; 108 | 109 | // Always add the first point 110 | sampled_indices[0] = 0; 111 | 112 | for i in 0..n_out - 2 { 113 | // Calculate point average for next bucket (containing c). 114 | let avg_range_start = (every * (i + 1) as f64) as usize + 1; 115 | let avg_range_end = cmp::min((every * (i + 2) as f64) as usize + 1, y.len()); 116 | 117 | let y_slice = &y[avg_range_start..avg_range_end]; 118 | let avg_y: f64 = y_slice.average(); 119 | let avg_x: f64 = (avg_range_start + avg_range_end - 1) as f64 / 2.0; 120 | 121 | // Get the range for this bucket 122 | let range_offs = (every * i as f64) as usize + 1; 123 | let range_to = avg_range_start; // = start of the next bucket 124 | 125 | // Point a 126 | let point_ay = unsafe { y.get_unchecked(a).as_() }; 127 | let point_ax = a as f64; 128 | 129 | let d1 = point_ax - avg_x; 130 | let d2 = avg_y - point_ay; 131 | let point_ax = point_ax - range_offs as f64; 132 | 133 | // let mut max_area = -1i64; 134 | let mut ax_x = point_ax; // point_ax - x[i] 135 | let offset: f64 = d1 * point_ay; 136 | 137 | // TODO: for some reason is this faster than the loop below -> check if this is true for other devices 138 | let y_slice = &y[range_offs..range_to]; 139 | (_, a) = y_slice 140 | .iter() 141 | .enumerate() 142 | .fold((-1i64, a), |(max_area, a), (i, y)| { 143 | // Calculate triangle area over three buckets 144 | // -> area: f64 = d1 * y[i].as_() - ax_x * d2; 145 | let area: f64 = d1 * y.as_() - ax_x * d2 - offset; 146 | let area: i64 = f64_to_i64unsigned(area); 147 | ax_x -= 1.0; 148 | if area > max_area { 149 | (area, i + range_offs) 150 | } else { 151 | (max_area, a) 152 | } 153 | }); 154 | 155 | // let y_slice = unsafe { std::slice::from_raw_parts(y_ptr.add(range_offs), range_to - range_offs) }; 156 | // (_, a) = y_slice 157 | // .iter() 158 | // .enumerate() 159 | // .fold((-1i64, a), |(max_area, a), (i, y_)| { 160 | // // Calculate triangle area over three buckets 161 | // // -> area: f64 = d1 * y[i].as_() - ax_x * d2; 162 | // let area: f64 = d1 * y_.as_() - ax_x * d2 - offset; 163 | // let area: i64 = f64_to_i64unsigned(area); 164 | // ax_x -= 1.0; 165 | // if area > max_area { 166 | // (area, i) 167 | // } else { 168 | // (max_area, a) 169 | // } 170 | // }); 171 | // a += range_offs; 172 | 173 | sampled_indices[i + 1] = a; 174 | } 175 | 176 | // Always add the last point 177 | sampled_indices[n_out - 1] = y.len() - 1; 178 | 179 | sampled_indices 180 | } 181 | 182 | // --------------------------------------- TESTS --------------------------------------- 183 | 184 | #[cfg(test)] 185 | mod tests { 186 | use dev_utils::utils; 187 | 188 | use super::{lttb_with_x, lttb_without_x}; 189 | 190 | #[test] 191 | fn test_lttb_with_x() { 192 | let x = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]; 193 | let y = [0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0]; 194 | let sampled_indices = lttb_with_x(&x, &y, 4); 195 | assert_eq!(sampled_indices, vec![0, 1, 5, 9]); 196 | } 197 | 198 | #[test] 199 | fn test_lttb_without_x() { 200 | let y = [0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0]; 201 | let sampled_indices = lttb_without_x(&y, 4); 202 | assert_eq!(sampled_indices, vec![0, 1, 5, 9]); 203 | } 204 | 205 | #[test] 206 | fn test_random_same_output() { 207 | for _ in 0..100 { 208 | const N: usize = 5_000; 209 | let x: [i32; N] = core::array::from_fn(|i| i as i32); 210 | let y = utils::get_random_array(N, f32::MIN, f32::MAX); 211 | let sampled_indices1 = lttb_with_x(&x, y.as_slice(), 200); 212 | let sampled_indices2 = lttb_without_x(y.as_slice(), 200); 213 | assert_eq!(sampled_indices1, sampled_indices2); 214 | } 215 | } 216 | } 217 | -------------------------------------------------------------------------------- /tsdownsample/_python/downsamplers.py: -------------------------------------------------------------------------------- 1 | from typing import Union 2 | 3 | import numpy as np 4 | 5 | from ..downsampling_interface import AbstractDownsampler 6 | 7 | 8 | def _get_bin_idxs(x: np.ndarray, nb_bins: int) -> np.ndarray: 9 | """Get the equidistant indices of the bins to use for the aggregation. 10 | 11 | Parameters 12 | ---------- 13 | x : np.ndarray 14 | The x values of the input data. 15 | nb_bins : int 16 | The number of bins. 17 | 18 | Returns 19 | ------- 20 | np.ndarray 21 | The indices of the bins to use for the aggregation. 22 | """ 23 | # Thanks to the `linspace` the data is evenly distributed over the index-range 24 | # The searchsorted function returns the index positions 25 | bins = np.searchsorted(x, np.linspace(x[0], x[-1], nb_bins + 1), side="right") 26 | bins[0] = 0 27 | bins[-1] = len(x) 28 | return np.array(bins) 29 | 30 | 31 | class LTTB_py(AbstractDownsampler): 32 | @staticmethod 33 | def _argmax_area(prev_x, prev_y, avg_next_x, avg_next_y, x_bucket, y_bucket) -> int: 34 | """Vectorized triangular area argmax computation. 35 | 36 | Parameters 37 | ---------- 38 | prev_x : float 39 | The previous selected point is x value. 40 | prev_y : float 41 | The previous selected point its y value. 42 | avg_next_x : float 43 | The x mean of the next bucket 44 | avg_next_y : float 45 | The y mean of the next bucket 46 | x_bucket : np.ndarray 47 | All x values in the bucket 48 | y_bucket : np.ndarray 49 | All y values in the bucket 50 | 51 | Returns 52 | ------- 53 | int 54 | The index of the point with the largest triangular area. 55 | """ 56 | return np.abs( 57 | x_bucket * (prev_y - avg_next_y) 58 | + y_bucket * (avg_next_x - prev_x) 59 | + (prev_x * avg_next_y - avg_next_x * prev_y) 60 | ).argmax() 61 | 62 | def _downsample( 63 | self, x: Union[np.ndarray, None], y: np.ndarray, n_out: int, **kwargs 64 | ) -> np.ndarray: 65 | """TODO complete docs""" 66 | if x is None: 67 | # Is fine for this implementation as this is only used for testing 68 | x = np.arange(y.shape[0]) 69 | 70 | # Bucket size. Leave room for start and end data points 71 | block_size = (y.shape[0] - 2) / (n_out - 2) 72 | # Note this 'astype' cast must take place after array creation (and not with the 73 | # aranage() its dtype argument) or it will cast the `block_size` step to an int 74 | # before the arange array creation 75 | offset = np.arange(start=1, stop=y.shape[0], step=block_size).astype(np.int64) 76 | 77 | # Construct the output array 78 | sampled_x = np.empty(n_out, dtype="int64") 79 | sampled_x[0] = 0 80 | sampled_x[-1] = x.shape[0] - 1 81 | 82 | # Convert x & y to int if it is boolean 83 | if x.dtype == np.bool_: 84 | x = x.astype(np.int8) 85 | if y.dtype == np.bool_: 86 | y = y.astype(np.int8) 87 | 88 | a = 0 89 | for i in range(n_out - 3): 90 | a = ( 91 | LTTB_py._argmax_area( 92 | prev_x=x[a], 93 | prev_y=y[a], 94 | avg_next_x=np.mean(x[offset[i + 1] : offset[i + 2]]), 95 | avg_next_y=y[offset[i + 1] : offset[i + 2]].mean(), 96 | x_bucket=x[offset[i] : offset[i + 1]], 97 | y_bucket=y[offset[i] : offset[i + 1]], 98 | ) 99 | + offset[i] 100 | ) 101 | sampled_x[i + 1] = a 102 | 103 | # ------------ EDGE CASE ------------ 104 | # next-average of last bucket = last point 105 | sampled_x[-2] = ( 106 | LTTB_py._argmax_area( 107 | prev_x=x[a], 108 | prev_y=y[a], 109 | avg_next_x=x[-1], # last point 110 | avg_next_y=y[-1], 111 | x_bucket=x[offset[-2] : offset[-1]], 112 | y_bucket=y[offset[-2] : offset[-1]], 113 | ) 114 | + offset[-2] 115 | ) 116 | return sampled_x 117 | 118 | 119 | class MinMax_py(AbstractDownsampler): 120 | """Aggregation method which performs binned min-max aggregation over fully 121 | overlapping windows. 122 | """ 123 | 124 | @staticmethod 125 | def _check_valid_n_out(n_out: int): 126 | assert n_out % 2 == 0, "n_out must be a multiple of 2" 127 | 128 | def _downsample( 129 | self, x: Union[np.ndarray, None], y: np.ndarray, n_out: int, **kwargs 130 | ) -> np.ndarray: 131 | if x is None: 132 | # Is fine for this implementation as this is only used for testing 133 | x = np.arange(y.shape[0]) 134 | 135 | xdt = x.dtype 136 | if np.issubdtype(xdt, np.datetime64) or np.issubdtype(xdt, np.timedelta64): 137 | x = x.view(np.int64) 138 | 139 | bins = _get_bin_idxs(x, n_out // 2) 140 | 141 | rel_idxs = [] 142 | for lower, upper in zip(bins, bins[1:]): 143 | y_slice = y[lower:upper] 144 | if not len(y_slice): 145 | continue 146 | # calculate the argmin(slice) & argmax(slice) 147 | rel_idxs.append(lower + np.nanargmin(y_slice)) 148 | rel_idxs.append(lower + np.nanargmax(y_slice)) 149 | return np.unique(rel_idxs) 150 | 151 | 152 | class NaNMinMax_py(AbstractDownsampler): 153 | @staticmethod 154 | def _check_valid_n_out(n_out: int): 155 | assert n_out % 2 == 0, "n_out must be a multiple of 2" 156 | 157 | def _downsample( 158 | self, x: Union[np.ndarray, None], y: np.ndarray, n_out: int, **kwargs 159 | ) -> np.ndarray: 160 | if x is None: 161 | # Is fine for this implementation as this is only used for testing 162 | x = np.arange(y.shape[0]) 163 | 164 | xdt = x.dtype 165 | if np.issubdtype(xdt, np.datetime64) or np.issubdtype(xdt, np.timedelta64): 166 | x = x.view(np.int64) 167 | 168 | bins = _get_bin_idxs(x, n_out // 2) 169 | 170 | rel_idxs = [] 171 | for lower, upper in zip(bins, bins[1:]): 172 | y_slice = y[lower:upper] 173 | if not len(y_slice): 174 | continue 175 | # calculate the argmin(slice) & argmax(slice) 176 | rel_idxs.append(lower + np.argmin(y_slice)) 177 | rel_idxs.append(lower + np.argmax(y_slice)) 178 | return np.array(sorted(rel_idxs)) 179 | 180 | 181 | class M4_py(AbstractDownsampler): 182 | """Aggregation method which selects the 4 M-s, i.e y-argmin, y-argmax, x-argmin, and 183 | x-argmax per bin. 184 | 185 | .. note:: 186 | When `n_out` is 4 * the canvas its pixel widht it should create a pixel-perfect 187 | visualization w.r.t. the raw data. 188 | 189 | """ 190 | 191 | @staticmethod 192 | def _check_valid_n_out(n_out: int): 193 | assert n_out % 4 == 0, "n_out must be a multiple of 4" 194 | 195 | def _downsample( 196 | self, x: Union[np.ndarray, None], y: np.ndarray, n_out: int, **kwargs 197 | ) -> np.ndarray: 198 | """TODO complete docs""" 199 | if x is None: 200 | # Is fine for this implementation as this is only used for testing 201 | x = np.arange(y.shape[0]) 202 | 203 | xdt = x.dtype 204 | if np.issubdtype(xdt, np.datetime64) or np.issubdtype(xdt, np.timedelta64): 205 | x = x.view(np.int64) 206 | 207 | bins = _get_bin_idxs(x, n_out // 4) 208 | 209 | rel_idxs = [] 210 | for lower, upper in zip(bins, bins[1:]): 211 | y_slice = y[lower:upper] 212 | if not len(y_slice): 213 | continue 214 | 215 | # calculate the min(idx), argmin(slice), argmax(slice), max(idx) 216 | rel_idxs.append(lower) 217 | rel_idxs.append(lower + np.nanargmin(y_slice)) 218 | rel_idxs.append(lower + np.nanargmax(y_slice)) 219 | rel_idxs.append(upper - 1) 220 | 221 | # NOTE: we do not use the np.unique so that all indices are retained 222 | return np.array(sorted(rel_idxs)) 223 | 224 | 225 | class NaNM4_py(AbstractDownsampler): 226 | @staticmethod 227 | def _check_valid_n_out(n_out: int): 228 | assert n_out % 4 == 0, "n_out must be a multiple of 4" 229 | 230 | def _downsample( 231 | self, x: Union[np.ndarray, None], y: np.ndarray, n_out: int, **kwargs 232 | ) -> np.ndarray: 233 | """TODO complete docs""" 234 | if x is None: 235 | # Is fine for this implementation as this is only used for testing 236 | x = np.arange(y.shape[0]) 237 | 238 | xdt = x.dtype 239 | if np.issubdtype(xdt, np.datetime64) or np.issubdtype(xdt, np.timedelta64): 240 | x = x.view(np.int64) 241 | 242 | bins = _get_bin_idxs(x, n_out // 4) 243 | 244 | rel_idxs = [] 245 | for lower, upper in zip(bins, bins[1:]): 246 | y_slice = y[lower:upper] 247 | if not len(y_slice): 248 | continue 249 | 250 | # calculate the min(idx), argmin(slice), argmax(slice), max(idx) 251 | rel_idxs.append(lower) 252 | rel_idxs.append(lower + y_slice.argmin()) 253 | rel_idxs.append(lower + y_slice.argmax()) 254 | rel_idxs.append(upper - 1) 255 | 256 | # NOTE: we do not use the np.unique so that all indices are retained 257 | return np.array(sorted(rel_idxs)) 258 | -------------------------------------------------------------------------------- /downsample_rs/src/minmaxlttb.rs: -------------------------------------------------------------------------------- 1 | use argminmax::{ArgMinMax, NaNArgMinMax}; 2 | 3 | use super::lttb::{lttb_with_x, lttb_without_x}; 4 | use super::types::Num; 5 | 6 | use super::minmax; 7 | use num_traits::{AsPrimitive, FromPrimitive}; 8 | 9 | // ----------------------------------- NON-PARALLEL ------------------------------------ 10 | 11 | // ----------- WITH X 12 | 13 | macro_rules! minmaxlttb_with_x { 14 | ($func_name:ident, $trait:ident, $f_minmax:expr) => { 15 | pub fn $func_name( 16 | x: &[Tx], 17 | y: &[Ty], 18 | n_out: usize, 19 | minmax_ratio: usize, 20 | ) -> Vec 21 | where 22 | for<'a> &'a [Ty]: $trait, 23 | Tx: Num + AsPrimitive + FromPrimitive, 24 | Ty: Num + AsPrimitive, 25 | { 26 | minmaxlttb_generic(x, y, n_out, minmax_ratio, $f_minmax) 27 | } 28 | }; 29 | } 30 | 31 | minmaxlttb_with_x!(minmaxlttb_with_x, ArgMinMax, minmax::min_max_with_x); 32 | minmaxlttb_with_x!( 33 | minmaxlttb_with_x_nan, 34 | NaNArgMinMax, 35 | minmax::min_max_with_x_nan 36 | ); 37 | 38 | // ----------- WITHOUT X 39 | 40 | macro_rules! minmaxlttb_without_x { 41 | ($func_name:ident, $trait:ident, $f_minmax:expr) => { 42 | pub fn $func_name>( 43 | y: &[Ty], 44 | n_out: usize, 45 | minmax_ratio: usize, 46 | ) -> Vec 47 | where 48 | for<'a> &'a [Ty]: $trait, 49 | { 50 | minmaxlttb_generic_without_x(y, n_out, minmax_ratio, $f_minmax) 51 | } 52 | }; 53 | } 54 | 55 | minmaxlttb_without_x!(minmaxlttb_without_x, ArgMinMax, minmax::min_max_without_x); 56 | minmaxlttb_without_x!( 57 | minmaxlttb_without_x_nan, 58 | NaNArgMinMax, 59 | minmax::min_max_without_x_nan 60 | ); 61 | 62 | // ------------------------------------- PARALLEL -------------------------------------- 63 | 64 | // ----------- WITH X 65 | 66 | macro_rules! minmaxlttb_with_x_parallel { 67 | ($func_name:ident, $trait:ident, $f_minmax:expr) => { 68 | pub fn $func_name( 69 | x: &[Tx], 70 | y: &[Ty], 71 | n_out: usize, 72 | minmax_ratio: usize, 73 | ) -> Vec 74 | where 75 | for<'a> &'a [Ty]: $trait, 76 | Tx: Num + AsPrimitive + FromPrimitive + Send + Sync, 77 | Ty: Num + AsPrimitive + Send + Sync, 78 | { 79 | minmaxlttb_generic(x, y, n_out, minmax_ratio, $f_minmax) 80 | } 81 | }; 82 | } 83 | 84 | minmaxlttb_with_x_parallel!( 85 | minmaxlttb_with_x_parallel, 86 | ArgMinMax, 87 | minmax::min_max_with_x_parallel 88 | ); 89 | minmaxlttb_with_x_parallel!( 90 | minmaxlttb_with_x_parallel_nan, 91 | NaNArgMinMax, 92 | minmax::min_max_with_x_parallel_nan 93 | ); 94 | 95 | // ----------- WITHOUT X 96 | 97 | macro_rules! minmaxlttb_without_x_parallel { 98 | ($func_name:ident, $trait:ident, $f_minmax:expr) => { 99 | pub fn $func_name + Send + Sync>( 100 | y: &[Ty], 101 | n_out: usize, 102 | minmax_ratio: usize, 103 | ) -> Vec 104 | where 105 | for<'a> &'a [Ty]: $trait, 106 | { 107 | minmaxlttb_generic_without_x(y, n_out, minmax_ratio, $f_minmax) 108 | } 109 | }; 110 | } 111 | 112 | minmaxlttb_without_x_parallel!( 113 | minmaxlttb_without_x_parallel, 114 | ArgMinMax, 115 | minmax::min_max_without_x_parallel 116 | ); 117 | minmaxlttb_without_x_parallel!( 118 | minmaxlttb_without_x_parallel_nan, 119 | NaNArgMinMax, 120 | minmax::min_max_without_x_parallel_nan 121 | ); 122 | 123 | // ----------------------------------- GENERICS ------------------------------------ 124 | 125 | #[inline(always)] 126 | pub(crate) fn minmaxlttb_generic, Ty: Num + AsPrimitive>( 127 | x: &[Tx], 128 | y: &[Ty], 129 | n_out: usize, 130 | minmax_ratio: usize, 131 | f_minmax: fn(&[Tx], &[Ty], usize) -> Vec, 132 | ) -> Vec { 133 | assert_eq!(x.len(), y.len()); 134 | assert!(minmax_ratio > 1); 135 | // Apply first min max aggregation (if above ratio) 136 | if x.len() / n_out > minmax_ratio { 137 | // Get index of min max points 138 | let mut index = f_minmax( 139 | &x[1..(x.len() - 1)], 140 | &y[1..(x.len() - 1)], 141 | n_out * minmax_ratio, 142 | ); 143 | // inplace + 1 144 | index.iter_mut().for_each(|elem| *elem += 1); 145 | // Prepend first and last point 146 | index.insert(0, 0); 147 | index.push(x.len() - 1); 148 | // Get x and y values at index 149 | let x = unsafe { 150 | index 151 | .iter() 152 | .map(|i| *x.get_unchecked(*i)) 153 | .collect::>() 154 | }; 155 | let y = unsafe { 156 | index 157 | .iter() 158 | .map(|i| *y.get_unchecked(*i)) 159 | .collect::>() 160 | }; 161 | // Apply lttb on the reduced data 162 | let index_points_selected = lttb_with_x(x.as_slice(), y.as_slice(), n_out); 163 | // Return the original index 164 | return index_points_selected 165 | .iter() 166 | .map(|i| index[*i]) 167 | .collect::>(); 168 | } 169 | // Apply lttb on all data when requirement is not met 170 | lttb_with_x(x, y, n_out) 171 | } 172 | 173 | #[inline(always)] 174 | pub(crate) fn minmaxlttb_generic_without_x>( 175 | y: &[Ty], 176 | n_out: usize, 177 | minmax_ratio: usize, 178 | f_minmax: fn(&[Ty], usize) -> Vec, 179 | ) -> Vec { 180 | assert!(minmax_ratio > 1); 181 | // Apply first min max aggregation (if above ratio) 182 | if y.len() / n_out > minmax_ratio { 183 | // Get index of min max points 184 | let mut index = f_minmax(&y[1..(y.len() - 1)], n_out * minmax_ratio); 185 | // inplace + 1 186 | index.iter_mut().for_each(|elem| *elem += 1); 187 | // Prepend first and last point 188 | index.insert(0, 0); 189 | index.push(y.len() - 1); 190 | // Get y values at index 191 | let y = unsafe { 192 | index 193 | .iter() 194 | .map(|i| *y.get_unchecked(*i)) 195 | .collect::>() 196 | }; 197 | // Apply lttb on the reduced data (using the preselect data its index) 198 | let index_points_selected = lttb_with_x(index.as_slice(), y.as_slice(), n_out); 199 | // Return the original index 200 | return index_points_selected 201 | .iter() 202 | .map(|i| index[*i]) 203 | .collect::>(); 204 | } 205 | // Apply lttb on all data when requirement is not met 206 | lttb_without_x(y, n_out).to_vec() 207 | } 208 | 209 | #[cfg(test)] 210 | mod tests { 211 | use rstest::rstest; 212 | use rstest_reuse::{self, *}; 213 | 214 | use super::{minmaxlttb_with_x, minmaxlttb_without_x}; 215 | use super::{minmaxlttb_with_x_parallel, minmaxlttb_without_x_parallel}; 216 | 217 | use dev_utils::utils; 218 | 219 | fn get_array_f32(n: usize) -> Vec { 220 | utils::get_random_array(n, f32::MIN, f32::MAX) 221 | } 222 | 223 | // Template for n_out 224 | #[template] 225 | #[rstest] 226 | #[case(98)] 227 | #[case(100)] 228 | #[case(102)] 229 | fn n_outs(#[case] n_out: usize) {} 230 | 231 | #[test] 232 | fn test_minmaxlttb_with_x() { 233 | let x = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]; 234 | let y = [0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0]; 235 | let sampled_indices = minmaxlttb_with_x(&x, &y, 4, 2); 236 | assert_eq!(sampled_indices, vec![0, 1, 5, 9]); 237 | } 238 | 239 | #[test] 240 | fn test_minmaxlttb_without_x() { 241 | let y = [0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0]; 242 | let sampled_indices = minmaxlttb_without_x(&y, 4, 2); 243 | assert_eq!(sampled_indices, vec![0, 1, 5, 9]); 244 | } 245 | 246 | #[test] 247 | fn test_minmaxlttb_with_x_parallel() { 248 | let x = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]; 249 | let y = [0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0]; 250 | let sampled_indices = minmaxlttb_with_x_parallel(&x, &y, 4, 2); 251 | assert_eq!(sampled_indices, vec![0, 1, 5, 9]); 252 | } 253 | 254 | #[test] 255 | fn test_minmaxlttb_without_x_parallel() { 256 | let y = [0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0]; 257 | let sampled_indices = minmaxlttb_without_x_parallel(&y, 4, 2); 258 | assert_eq!(sampled_indices, vec![0, 1, 5, 9]); 259 | } 260 | 261 | #[test] 262 | fn test_same_output() { 263 | let N: usize = 2001; 264 | let n_out: usize = 100; 265 | let y = (0..N).map(|v| v as f32).collect::>(); 266 | let x = (0..N as i32).collect::>(); 267 | let sampled_indices1 = minmaxlttb_with_x(&x, &y, n_out, 4); 268 | let sampled_indices2 = minmaxlttb_without_x(&y, n_out, 4); 269 | assert_eq!(sampled_indices1, sampled_indices2); 270 | 271 | let N: usize = 1001; 272 | let n_out: usize = 26; 273 | let y = (0..N).map(|v| v as f32).collect::>(); 274 | let x = (0..N as i32).collect::>(); 275 | let sampled_indices1 = minmaxlttb_with_x(&x, &y, n_out, 4); 276 | let sampled_indices2 = minmaxlttb_without_x(&y, n_out, 4); 277 | assert_eq!(sampled_indices1, sampled_indices2); 278 | } 279 | 280 | #[apply(n_outs)] 281 | fn test_many_random_runs_same_output(n_out: usize) { 282 | const N: usize = 20_000; 283 | const MINMAX_RATIO: usize = 5; 284 | for _ in 0..100 { 285 | // TODO: test with x 286 | let arr = get_array_f32(N); 287 | let idxs1 = minmaxlttb_without_x(arr.as_slice(), n_out, MINMAX_RATIO); 288 | let idxs2 = minmaxlttb_without_x_parallel(arr.as_slice(), n_out, MINMAX_RATIO); 289 | assert_eq!(idxs1, idxs2); 290 | } 291 | } 292 | } 293 | -------------------------------------------------------------------------------- /tests/benchmarks/test_downsamplers.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pytest 3 | 4 | from tsdownsample import ( 5 | EveryNthDownsampler, 6 | LTTBDownsampler, 7 | M4Downsampler, 8 | MinMaxDownsampler, 9 | MinMaxLTTBDownsampler, 10 | NaNM4Downsampler, 11 | NaNMinMaxDownsampler, 12 | NaNMinMaxLTTBDownsampler, 13 | ) 14 | 15 | NB_SAMPLES = ["100,000", "1,000,000"] 16 | N_OUT = ["100", "1,000", "5,000"] 17 | Y_DTYPES = [np.float32, np.float64] + [np.int32, np.int64] 18 | 19 | 20 | # --------------------------------------------------------------------------- # 21 | # MinMaxDownsampler 22 | # --------------------------------------------------------------------------- # 23 | 24 | 25 | @pytest.mark.benchmark(group="minmax") 26 | @pytest.mark.parametrize("n_samples", NB_SAMPLES) 27 | @pytest.mark.parametrize("n_out", N_OUT) 28 | @pytest.mark.parametrize("dtype", Y_DTYPES) 29 | @pytest.mark.parametrize("parallel", [False, True]) 30 | def test_minmax_no_x(benchmark, n_samples, n_out, dtype, parallel): 31 | """Test the MinMaxDownsampler.""" 32 | downsampler = MinMaxDownsampler() 33 | n_samples = int(n_samples.replace(",", "")) 34 | n_out = int(n_out.replace(",", "")) 35 | 36 | y = np.random.randn(n_samples).astype(dtype) 37 | 38 | benchmark(downsampler.downsample, y, n_out=n_out, parallel=parallel) 39 | 40 | 41 | @pytest.mark.benchmark(group="minmax") 42 | @pytest.mark.parametrize("n_samples", NB_SAMPLES) 43 | @pytest.mark.parametrize("n_out", N_OUT) 44 | @pytest.mark.parametrize("dtype", Y_DTYPES) 45 | @pytest.mark.parametrize("parallel", [False, True]) 46 | def test_minmax_with_x(benchmark, n_samples, n_out, dtype, parallel): 47 | """Test the MinMaxDownsampler.""" 48 | downsampler = MinMaxDownsampler() 49 | n_samples = int(n_samples.replace(",", "")) 50 | n_out = int(n_out.replace(",", "")) 51 | 52 | x = np.arange(n_samples) 53 | y = np.random.randn(n_samples).astype(dtype) 54 | 55 | benchmark(downsampler.downsample, x, y, n_out=n_out, parallel=parallel) 56 | 57 | 58 | @pytest.mark.benchmark(group="nanminmax") 59 | @pytest.mark.parametrize("n_samples", NB_SAMPLES) 60 | @pytest.mark.parametrize("n_out", N_OUT) 61 | @pytest.mark.parametrize("dtype", Y_DTYPES) 62 | @pytest.mark.parametrize("parallel", [False, True]) 63 | def test_nanminmax_no_x(benchmark, n_samples, n_out, dtype, parallel): 64 | """Test the MinMaxDownsampler.""" 65 | downsampler = NaNMinMaxDownsampler() 66 | n_samples = int(n_samples.replace(",", "")) 67 | n_out = int(n_out.replace(",", "")) 68 | 69 | y = np.random.randn(n_samples).astype(dtype) 70 | 71 | benchmark(downsampler.downsample, y, n_out=n_out, parallel=parallel) 72 | 73 | 74 | @pytest.mark.benchmark(group="nanminmax") 75 | @pytest.mark.parametrize("n_samples", NB_SAMPLES) 76 | @pytest.mark.parametrize("n_out", N_OUT) 77 | @pytest.mark.parametrize("dtype", Y_DTYPES) 78 | @pytest.mark.parametrize("parallel", [False, True]) 79 | def test_nanminmax_with_x(benchmark, n_samples, n_out, dtype, parallel): 80 | """Test the MinMaxDownsampler.""" 81 | downsampler = NaNMinMaxDownsampler() 82 | n_samples = int(n_samples.replace(",", "")) 83 | n_out = int(n_out.replace(",", "")) 84 | 85 | x = np.arange(n_samples) 86 | y = np.random.randn(n_samples).astype(dtype) 87 | 88 | benchmark(downsampler.downsample, x, y, n_out=n_out, parallel=parallel) 89 | 90 | 91 | # --------------------------------------------------------------------------- # 92 | # M4Downsampler 93 | # --------------------------------------------------------------------------- # 94 | 95 | 96 | @pytest.mark.benchmark(group="m4") 97 | @pytest.mark.parametrize("n_samples", NB_SAMPLES) 98 | @pytest.mark.parametrize("n_out", N_OUT) 99 | @pytest.mark.parametrize("dtype", Y_DTYPES) 100 | @pytest.mark.parametrize("parallel", [False, True]) 101 | def test_m4_no_x(benchmark, n_samples, n_out, dtype, parallel): 102 | """Test the M4Downsampler.""" 103 | downsampler = M4Downsampler() 104 | n_samples = int(n_samples.replace(",", "")) 105 | n_out = int(n_out.replace(",", "")) 106 | 107 | y = np.random.randn(n_samples).astype(dtype) 108 | 109 | benchmark(downsampler.downsample, y, n_out=n_out, parallel=parallel) 110 | 111 | 112 | @pytest.mark.benchmark(group="m4") 113 | @pytest.mark.parametrize("n_samples", NB_SAMPLES) 114 | @pytest.mark.parametrize("n_out", N_OUT) 115 | @pytest.mark.parametrize("dtype", Y_DTYPES) 116 | @pytest.mark.parametrize("parallel", [False, True]) 117 | def test_m4_with_x(benchmark, n_samples, n_out, dtype, parallel): 118 | """Test the M4Downsampler.""" 119 | downsampler = M4Downsampler() 120 | n_samples = int(n_samples.replace(",", "")) 121 | n_out = int(n_out.replace(",", "")) 122 | 123 | x = np.arange(n_samples) 124 | y = np.random.randn(n_samples).astype(dtype) 125 | 126 | benchmark(downsampler.downsample, x, y, n_out=n_out, parallel=parallel) 127 | 128 | 129 | @pytest.mark.benchmark(group="nanm4") 130 | @pytest.mark.parametrize("n_samples", NB_SAMPLES) 131 | @pytest.mark.parametrize("n_out", N_OUT) 132 | @pytest.mark.parametrize("dtype", Y_DTYPES) 133 | @pytest.mark.parametrize("parallel", [False, True]) 134 | def test_nanm4_no_x(benchmark, n_samples, n_out, dtype, parallel): 135 | """Test the M4Downsampler.""" 136 | downsampler = NaNM4Downsampler() 137 | n_samples = int(n_samples.replace(",", "")) 138 | n_out = int(n_out.replace(",", "")) 139 | 140 | y = np.random.randn(n_samples).astype(dtype) 141 | 142 | benchmark(downsampler.downsample, y, n_out=n_out, parallel=parallel) 143 | 144 | 145 | @pytest.mark.benchmark(group="nanm4") 146 | @pytest.mark.parametrize("n_samples", NB_SAMPLES) 147 | @pytest.mark.parametrize("n_out", N_OUT) 148 | @pytest.mark.parametrize("dtype", Y_DTYPES) 149 | @pytest.mark.parametrize("parallel", [False, True]) 150 | def test_nanm4_with_x(benchmark, n_samples, n_out, dtype, parallel): 151 | """Test the M4Downsampler.""" 152 | downsampler = NaNM4Downsampler() 153 | n_samples = int(n_samples.replace(",", "")) 154 | n_out = int(n_out.replace(",", "")) 155 | 156 | x = np.arange(n_samples) 157 | y = np.random.randn(n_samples).astype(dtype) 158 | 159 | benchmark(downsampler.downsample, x, y, n_out=n_out, parallel=parallel) 160 | 161 | 162 | # --------------------------------------------------------------------------- # 163 | # LTTBDownsampler 164 | # --------------------------------------------------------------------------- # 165 | 166 | 167 | @pytest.mark.benchmark(group="lttb") 168 | @pytest.mark.parametrize("n_samples", NB_SAMPLES) 169 | @pytest.mark.parametrize("n_out", N_OUT) 170 | @pytest.mark.parametrize("dtype", Y_DTYPES) 171 | @pytest.mark.parametrize("parallel", [False, True]) 172 | def test_lttb_no_x(benchmark, n_samples, n_out, dtype, parallel): 173 | """Test the LTTBDownsampler.""" 174 | downsampler = LTTBDownsampler() 175 | n_samples = int(n_samples.replace(",", "")) 176 | n_out = int(n_out.replace(",", "")) 177 | 178 | y = np.random.randn(n_samples).astype(dtype) 179 | 180 | benchmark(downsampler.downsample, y, n_out=n_out, parallel=parallel) 181 | 182 | 183 | @pytest.mark.benchmark(group="lttb") 184 | @pytest.mark.parametrize("n_samples", NB_SAMPLES) 185 | @pytest.mark.parametrize("n_out", N_OUT) 186 | @pytest.mark.parametrize("dtype", Y_DTYPES) 187 | @pytest.mark.parametrize("parallel", [False, True]) 188 | def test_lttb_with_x(benchmark, n_samples, n_out, dtype, parallel): 189 | """Test the LTTBDownsampler.""" 190 | downsampler = LTTBDownsampler() 191 | n_samples = int(n_samples.replace(",", "")) 192 | n_out = int(n_out.replace(",", "")) 193 | 194 | x = np.arange(n_samples) 195 | y = np.random.randn(n_samples).astype(dtype) 196 | 197 | benchmark(downsampler.downsample, x, y, n_out=n_out, parallel=parallel) 198 | 199 | 200 | # --------------------------------------------------------------------------- # 201 | # MinMaxLTTBDownsampler 202 | # --------------------------------------------------------------------------- # 203 | 204 | 205 | @pytest.mark.benchmark(group="minmaxlttb") 206 | @pytest.mark.parametrize("n_samples", NB_SAMPLES) 207 | @pytest.mark.parametrize("n_out", N_OUT) 208 | @pytest.mark.parametrize("dtype", Y_DTYPES) 209 | @pytest.mark.parametrize("parallel", [False, True]) 210 | def test_minmaxlttb_no_x(benchmark, n_samples, n_out, dtype, parallel): 211 | """Test the MinMaxLTTBDownsampler.""" 212 | downsampler = MinMaxLTTBDownsampler() 213 | n_samples = int(n_samples.replace(",", "")) 214 | n_out = int(n_out.replace(",", "")) 215 | 216 | y = np.random.randn(n_samples).astype(dtype) 217 | 218 | benchmark(downsampler.downsample, y, n_out=n_out, parallel=parallel) 219 | 220 | 221 | @pytest.mark.benchmark(group="minmaxlttb") 222 | @pytest.mark.parametrize("n_samples", NB_SAMPLES) 223 | @pytest.mark.parametrize("n_out", N_OUT) 224 | @pytest.mark.parametrize("dtype", Y_DTYPES) 225 | @pytest.mark.parametrize("parallel", [False, True]) 226 | def test_minmaxlttb_with_x(benchmark, n_samples, n_out, dtype, parallel): 227 | """Test the MinMaxLTTBDownsampler.""" 228 | downsampler = MinMaxLTTBDownsampler() 229 | n_samples = int(n_samples.replace(",", "")) 230 | n_out = int(n_out.replace(",", "")) 231 | 232 | x = np.arange(n_samples) 233 | y = np.random.randn(n_samples).astype(dtype) 234 | 235 | benchmark(downsampler.downsample, x, y, n_out=n_out, parallel=parallel) 236 | 237 | 238 | @pytest.mark.benchmark(group="nanminmaxlttb") 239 | @pytest.mark.parametrize("n_samples", NB_SAMPLES) 240 | @pytest.mark.parametrize("n_out", N_OUT) 241 | @pytest.mark.parametrize("dtype", Y_DTYPES) 242 | @pytest.mark.parametrize("parallel", [False, True]) 243 | def test_nanminmaxlttb_no_x(benchmark, n_samples, n_out, dtype, parallel): 244 | """Test the MinMaxLTTBDownsampler.""" 245 | downsampler = NaNMinMaxLTTBDownsampler() 246 | n_samples = int(n_samples.replace(",", "")) 247 | n_out = int(n_out.replace(",", "")) 248 | 249 | y = np.random.randn(n_samples).astype(dtype) 250 | 251 | benchmark(downsampler.downsample, y, n_out=n_out, parallel=parallel) 252 | 253 | 254 | @pytest.mark.benchmark(group="nanminmaxlttb") 255 | @pytest.mark.parametrize("n_samples", NB_SAMPLES) 256 | @pytest.mark.parametrize("n_out", N_OUT) 257 | @pytest.mark.parametrize("dtype", Y_DTYPES) 258 | @pytest.mark.parametrize("parallel", [False, True]) 259 | def test_nanminmaxlttb_with_x(benchmark, n_samples, n_out, dtype, parallel): 260 | """Test the MinMaxLTTBDownsampler.""" 261 | downsampler = NaNMinMaxLTTBDownsampler() 262 | n_samples = int(n_samples.replace(",", "")) 263 | n_out = int(n_out.replace(",", "")) 264 | 265 | x = np.arange(n_samples) 266 | y = np.random.randn(n_samples).astype(dtype) 267 | 268 | benchmark(downsampler.downsample, x, y, n_out=n_out, parallel=parallel) 269 | 270 | 271 | # --------------------------------------------------------------------------- # 272 | # EveryNthDownsampler 273 | # --------------------------------------------------------------------------- # 274 | 275 | 276 | @pytest.mark.benchmark(group="everynth") 277 | @pytest.mark.parametrize("n_samples", NB_SAMPLES) 278 | @pytest.mark.parametrize("n_out", N_OUT) 279 | def test_everynth(benchmark, n_samples, n_out): 280 | """Test the EveryNthDownsampler.""" 281 | downsampler = EveryNthDownsampler() 282 | n_samples = int(n_samples.replace(",", "")) 283 | n_out = int(n_out.replace(",", "")) 284 | 285 | y = np.random.randn(n_samples) 286 | 287 | benchmark(downsampler.downsample, y, n_out=n_out) 288 | -------------------------------------------------------------------------------- /downsample_rs/src/searchsorted.rs: -------------------------------------------------------------------------------- 1 | use rayon::iter::IndexedParallelIterator; 2 | use rayon::prelude::*; 3 | 4 | use super::types::Num; 5 | use super::POOL; 6 | use num_traits::{AsPrimitive, FromPrimitive}; 7 | 8 | const EPSILON: f64 = 1e-12; // Small value to avoid precision errors 9 | 10 | // ---------------------- Binary search ---------------------- 11 | 12 | /// Binary search for the index position of the given value in the given array. 13 | /// The array must be sorted in ascending order and contain no duplicates. 14 | /// 15 | /// Complies with the Python bisect function 16 | /// https://docs.python.org/3/library/bisect.html#bisect.bisect 17 | /// 18 | // #[inline(always)] 19 | fn binary_search(arr: &[T], value: T, left: usize, right: usize) -> usize { 20 | let mut size: usize = right - left; 21 | let mut left: usize = left; 22 | let mut right: usize = right; 23 | // Return the index where the value is >= arr[index] and arr[index-1] < value 24 | while left < right { 25 | let mid = left + size / 2; 26 | if arr[mid] < value { 27 | left = mid + 1; 28 | } else { 29 | right = mid; 30 | } 31 | size = right - left; 32 | } 33 | if arr[left] <= value { 34 | left + 1 35 | } else { 36 | left 37 | } 38 | } 39 | 40 | /// Binary search for the index position of the given value in the given array. 41 | /// The array must be sorted in ascending order and contain no duplicates. 42 | /// 43 | /// The mid index is pre-guessed to speed up the search. 44 | /// 45 | /// Complies with the Python bisect function 46 | /// https://docs.python.org/3/library/bisect.html#bisect.bisect 47 | /// 48 | // #[inline(always)] 49 | fn binary_search_with_mid( 50 | arr: &[T], 51 | value: T, 52 | left: usize, 53 | right: usize, 54 | mid: usize, 55 | ) -> usize { 56 | assert!(mid >= left || mid <= right); 57 | let mut left: usize = left; 58 | let mut right: usize = right; 59 | let mut mid: usize = mid; 60 | // Return the index where the value is <= arr[index] and arr[index+1] < value 61 | while left < right { 62 | if arr[mid] < value { 63 | left = mid + 1; 64 | } else { 65 | right = mid; 66 | } 67 | let size = right - left; 68 | mid = left + size / 2; 69 | } 70 | if arr[left] <= value { 71 | left + 1 72 | } else { 73 | left 74 | } 75 | } 76 | 77 | // ------------------- Equidistant binning -------------------- 78 | 79 | #[inline(always)] 80 | fn sequential_add_mul(start_val: f64, add_val: f64, mul: usize, epsilon: f64) -> f64 { 81 | // start_val + add_val * mul will sometimes overflow when add_val * mul is 82 | // larger than the largest positive f64 number. 83 | // This code should not fail when: (f64::MAX - start_val) < (add_val * mul). 84 | // -> Note that f64::MAX - start_val can be up to 2 * f64::MAX. 85 | let mul_2: f64 = mul as f64 / 2.0; 86 | // start_val + add_val * mul_2 as f64 + add_val * (mul - mul_2) as f64 87 | start_val + add_val * mul_2 + add_val * mul_2 + epsilon 88 | } 89 | 90 | // --- Sequential version 91 | 92 | pub(crate) fn get_equidistant_bin_idx_iterator( 93 | arr: &[T], 94 | nb_bins: usize, 95 | ) -> impl Iterator> + '_ 96 | where 97 | T: Num + FromPrimitive + AsPrimitive, 98 | { 99 | assert!(nb_bins >= 2); 100 | // 1. Compute the step between each bin 101 | // Divide by nb_bins to avoid overflow! 102 | let val_step: f64 = 103 | (arr[arr.len() - 1].as_() / nb_bins as f64) - (arr[0].as_() / nb_bins as f64); 104 | // Estimate the step between each index (used to pre-guess the mid index) 105 | let idx_step: usize = arr.len() / nb_bins; 106 | 107 | // 2. The moving index & value 108 | let arr0: f64 = arr[0].as_(); // The first value of the array 109 | let mut idx: usize = 0; // Index of the search value 110 | 111 | // 3. Iterate over the bins 112 | (0..nb_bins).map(move |i| { 113 | let start_idx: usize = idx; // Start index of the bin (previous end index) 114 | 115 | // Update the search value 116 | let search_value: T = 117 | T::from_f64(sequential_add_mul(arr0, val_step, i + 1, EPSILON)).unwrap(); 118 | if arr[start_idx] >= search_value { 119 | // If the first value of the bin is already >= the search value, 120 | // then the bin is empty. 121 | return None; 122 | } 123 | // Update the pre-guess index 124 | let mid: usize = std::cmp::min(idx + idx_step, arr.len() - 2); 125 | // TODO: Implementation WITHOUT pre-guessing mid is slower!! 126 | idx = binary_search_with_mid(arr, search_value, idx, arr.len() - 1, mid); // End index of the bin 127 | Some((start_idx, idx)) 128 | }) 129 | } 130 | 131 | // --- Parallel version 132 | 133 | pub(crate) fn get_equidistant_bin_idx_iterator_parallel( 134 | arr: &[T], 135 | nb_bins: usize, 136 | ) -> impl IndexedParallelIterator> + '_> + '_ 137 | where 138 | T: Num + FromPrimitive + AsPrimitive + Sync + Send, 139 | { 140 | assert!(nb_bins >= 2); 141 | // 1. Compute the step between each bin 142 | // Divide by nb_bins to avoid overflow! 143 | let val_step: f64 = 144 | (arr[arr.len() - 1].as_() / nb_bins as f64) - (arr[0].as_() / nb_bins as f64); 145 | let arr0: f64 = arr[0].as_(); // The first value of the array 146 | 147 | // 2. Compute the number of threads & bins per thread 148 | let n_threads = std::cmp::min(POOL.current_num_threads(), nb_bins); 149 | let nb_bins_per_thread = nb_bins / n_threads; 150 | let nb_bins_last_thread = nb_bins - nb_bins_per_thread * (n_threads - 1); 151 | 152 | // 3. Iterate over the number of threads 153 | // -> for each thread perform the binary search sorted with moving left and 154 | // yield the indices (using the same idea as for the sequential version) 155 | (0..n_threads).into_par_iter().map(move |i| { 156 | // The moving index & value (for the thread) 157 | let arr0_thr: f64 = sequential_add_mul(arr0, val_step, i * nb_bins_per_thread, EPSILON); // Search value 158 | let start_value: T = T::from_f64(arr0_thr).unwrap(); 159 | // Search the start of the fist bin (of the thread) 160 | let mut idx: usize = 0; // Index of the search value 161 | if i > 0 { 162 | idx = binary_search(arr, start_value, 0, arr.len() - 1); 163 | } 164 | 165 | // The number of bins for the thread 166 | let nb_bins_thread = if i == n_threads - 1 { 167 | nb_bins_last_thread 168 | } else { 169 | nb_bins_per_thread 170 | }; 171 | // Perform sequential binary search for the end of the bins (of the thread) 172 | (0..nb_bins_thread).map(move |i| { 173 | let start_idx: usize = idx; // Start index of the bin (previous end index) 174 | 175 | // Update the search value 176 | let search_value: T = T::from_f64(arr0_thr + val_step * (i + 1) as f64).unwrap(); 177 | if arr[start_idx] >= search_value { 178 | // If the first value of the bin is already >= the search value, 179 | // then the bin is empty. 180 | return None; 181 | } 182 | idx = binary_search(arr, search_value, idx, arr.len() - 1); // End index of the bin 183 | Some((start_idx, idx)) 184 | }) 185 | }) 186 | } 187 | 188 | // --------------------------------------- TESTS --------------------------------------- 189 | 190 | #[cfg(test)] 191 | mod tests { 192 | use rstest::rstest; 193 | use rstest_reuse::{self, *}; 194 | 195 | use super::*; 196 | 197 | use dev_utils::utils::get_random_array; 198 | 199 | // Template for nb_bins 200 | #[template] 201 | #[rstest] 202 | #[case(99)] 203 | #[case(100)] 204 | #[case(101)] 205 | fn nb_bins(#[case] nb_bins: usize) {} 206 | 207 | #[test] 208 | fn test_sequential_add_mul() { 209 | assert_eq!(sequential_add_mul(0.0, 1.0, 0, 0.0), 0.0); 210 | assert_eq!(sequential_add_mul(-1.0, 1.0, 1, 0.0), 0.0); 211 | assert_eq!(sequential_add_mul(-1.0, 1.0, 1, EPSILON), EPSILON); 212 | // Really large values 213 | assert_eq!(sequential_add_mul(0.0, 1.0, 1_000_000, 0.0), 1_000_000.0); 214 | assert!(sequential_add_mul(f64::MIN, f64::MAX / 2.0, 3, 0.0) < f64::MAX,); 215 | // TODO: the next tests fails due to very minor precision error 216 | // -> however, this precision error is needed to avoid the issue with m4_with_x 217 | // assert_eq!( 218 | // sequential_add_mul(f64::MIN, f64::MAX / 2.0, 3, 0.0), 219 | // f64::MIN + f64::MAX / 2.0 + f64::MAX 220 | // ); 221 | } 222 | 223 | #[test] 224 | fn test_search_sorted_identicial_to_np_linspace_searchsorted() { 225 | // Create a 0..9999 array 226 | let arr: [u32; 10_000] = core::array::from_fn(|i| i.as_()); 227 | assert!(arr.len() == 10_000); 228 | let iterator = get_equidistant_bin_idx_iterator(&arr, 4); 229 | // Check the iterator 230 | let mut idx: usize = 0; 231 | for bin in iterator { 232 | let (start_idx, end_idx) = bin.unwrap(); 233 | assert!(start_idx == idx); 234 | assert!(end_idx == idx + 2_500); 235 | idx += 2_500; 236 | } 237 | } 238 | 239 | #[test] 240 | fn test_binary_search() { 241 | let arr = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]; 242 | assert_eq!(binary_search(&arr, 0, 0, arr.len() - 1), 0); 243 | assert_eq!(binary_search(&arr, 1, 0, arr.len() - 1), 1); 244 | assert_eq!(binary_search(&arr, 2, 0, arr.len() - 1), 2); 245 | assert_eq!(binary_search(&arr, 3, 0, arr.len() - 1), 3); 246 | assert_eq!(binary_search(&arr, 4, 0, arr.len() - 1), 4); 247 | assert_eq!(binary_search(&arr, 5, 0, arr.len() - 1), 5); 248 | assert_eq!(binary_search(&arr, 6, 0, arr.len() - 1), 6); 249 | assert_eq!(binary_search(&arr, 7, 0, arr.len() - 1), 7); 250 | assert_eq!(binary_search(&arr, 8, 0, arr.len() - 1), 8); 251 | assert_eq!(binary_search(&arr, 9, 0, arr.len() - 1), 9); 252 | assert_eq!(binary_search(&arr, 10, 0, arr.len() - 1), 10); 253 | assert_eq!(binary_search(&arr, 11, 0, arr.len() - 1), 10); 254 | } 255 | 256 | #[test] 257 | fn test_binary_search_with_mid() { 258 | let arr = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]; 259 | assert_eq!(binary_search_with_mid(&arr, 0, 0, arr.len() - 1, 0), 0); 260 | assert_eq!(binary_search_with_mid(&arr, 1, 0, arr.len() - 1, 0), 1); 261 | assert_eq!(binary_search_with_mid(&arr, 2, 0, arr.len() - 1, 1), 2); 262 | assert_eq!(binary_search_with_mid(&arr, 3, 0, arr.len() - 1, 2), 3); 263 | assert_eq!(binary_search_with_mid(&arr, 4, 0, arr.len() - 1, 3), 4); 264 | assert_eq!(binary_search_with_mid(&arr, 5, 0, arr.len() - 1, 4), 5); 265 | assert_eq!(binary_search_with_mid(&arr, 6, 0, arr.len() - 1, 5), 6); 266 | assert_eq!(binary_search_with_mid(&arr, 7, 0, arr.len() - 1, 6), 7); 267 | assert_eq!(binary_search_with_mid(&arr, 8, 0, arr.len() - 1, 7), 8); 268 | assert_eq!(binary_search_with_mid(&arr, 9, 0, arr.len() - 1, 8), 9); 269 | assert_eq!(binary_search_with_mid(&arr, 10, 0, arr.len() - 1, 9), 10); 270 | // this line causes the code to crash -> because value higher than arr[mid] 271 | // assert_eq!(binary_search_with_mid(&arr, 11, 0, arr.len() - 1, 9), 10); 272 | } 273 | 274 | #[test] 275 | fn test_get_equidistant_bin_idxs() { 276 | let expected_indices = vec![0, 4, 7]; 277 | 278 | let arr = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]; 279 | let bin_idxs_iter = get_equidistant_bin_idx_iterator(&arr, 3); 280 | let bin_idxs = bin_idxs_iter.map(|x| x.unwrap().0).collect::>(); 281 | assert_eq!(bin_idxs, expected_indices); 282 | 283 | let bin_idxs_iter = get_equidistant_bin_idx_iterator_parallel(&arr, 3); 284 | let bin_idxs = bin_idxs_iter 285 | .map(|x| x.map(|x| x.unwrap().0).collect::>()) 286 | .flatten() 287 | .collect::>(); 288 | assert_eq!(bin_idxs, expected_indices); 289 | } 290 | 291 | #[apply(nb_bins)] 292 | fn test_many_random_same_result(nb_bins: usize) { 293 | let n = 5_000; 294 | 295 | for _ in 0..100 { 296 | let mut arr = get_random_array::(n, i32::MIN, i32::MAX); 297 | // Sort the array 298 | arr.sort_by(|a, b| a.partial_cmp(b).unwrap()); 299 | 300 | // Calculate the bin indexes 301 | let bin_idxs_iter = get_equidistant_bin_idx_iterator(&arr[..], nb_bins); 302 | let bin_idxs = bin_idxs_iter.map(|x| x.unwrap().0).collect::>(); 303 | 304 | // Calculate the bin indexes in parallel 305 | let bin_idxs_iter = get_equidistant_bin_idx_iterator_parallel(&arr[..], nb_bins); 306 | let bin_idxs_parallel = bin_idxs_iter 307 | .map(|x| x.map(|x| x.unwrap().0).collect::>()) 308 | .flatten() 309 | .collect::>(); 310 | 311 | // Check that the results are the same 312 | assert_eq!(bin_idxs, bin_idxs_parallel); 313 | } 314 | } 315 | } 316 | -------------------------------------------------------------------------------- /tests/test_tsdownsample.py: -------------------------------------------------------------------------------- 1 | from typing import Iterable 2 | 3 | import numpy as np 4 | import pytest 5 | from test_config import supported_dtypes_x, supported_dtypes_y 6 | 7 | from tsdownsample import ( # MeanDownsampler,; MedianDownsampler, 8 | EveryNthDownsampler, 9 | LTTBDownsampler, 10 | M4Downsampler, 11 | MinMaxDownsampler, 12 | MinMaxLTTBDownsampler, 13 | NaNM4Downsampler, 14 | NaNMinMaxDownsampler, 15 | NaNMinMaxLTTBDownsampler, 16 | ) 17 | from tsdownsample.downsampling_interface import ( 18 | AbstractDownsampler, 19 | AbstractRustNaNDownsampler, 20 | ) 21 | 22 | # TODO: Improve tests 23 | # - compare implementations with existing plotly_resampler implementations 24 | 25 | 26 | RUST_DOWNSAMPLERS = [ 27 | MinMaxDownsampler(), 28 | M4Downsampler(), 29 | LTTBDownsampler(), 30 | MinMaxLTTBDownsampler(), 31 | ] 32 | 33 | RUST_NAN_DOWNSAMPLERS = [ 34 | NaNMinMaxDownsampler(), 35 | NaNM4Downsampler(), 36 | NaNMinMaxLTTBDownsampler(), 37 | ] 38 | 39 | OTHER_DOWNSAMPLERS = [EveryNthDownsampler()] 40 | 41 | 42 | def generate_rust_downsamplers() -> Iterable[AbstractDownsampler]: 43 | for downsampler in RUST_DOWNSAMPLERS + RUST_NAN_DOWNSAMPLERS: 44 | yield downsampler 45 | 46 | 47 | def generate_rust_nan_downsamplers() -> Iterable[AbstractDownsampler]: 48 | for downsampler in RUST_NAN_DOWNSAMPLERS: 49 | yield downsampler 50 | 51 | 52 | def generate_all_downsamplers() -> Iterable[AbstractDownsampler]: 53 | for downsampler in RUST_DOWNSAMPLERS + RUST_NAN_DOWNSAMPLERS + OTHER_DOWNSAMPLERS: 54 | yield downsampler 55 | 56 | 57 | def generate_datapoints(): 58 | N_DATAPOINTS = 10_000 59 | return np.arange(N_DATAPOINTS) 60 | 61 | 62 | def generate_nan_datapoints(): 63 | N_DATAPOINTS = 10_000 64 | datapoints = np.arange(N_DATAPOINTS, dtype=np.float64) 65 | datapoints[0] = np.nan 66 | datapoints[9960] = np.nan 67 | return datapoints 68 | 69 | 70 | @pytest.mark.parametrize("downsampler", generate_all_downsamplers()) 71 | def test_serialization_copy(downsampler: AbstractDownsampler): 72 | """Test serialization.""" 73 | from copy import copy, deepcopy 74 | 75 | dc = copy(downsampler) 76 | ddc = deepcopy(downsampler) 77 | 78 | arr = generate_datapoints() 79 | 80 | orig_downsampled = downsampler.downsample(arr, n_out=100) 81 | dc_downsampled = dc.downsample(arr, n_out=100) 82 | ddc_downsampled = ddc.downsample(arr, n_out=100) 83 | assert np.all(orig_downsampled == dc_downsampled) 84 | assert np.all(orig_downsampled == ddc_downsampled) 85 | 86 | 87 | @pytest.mark.parametrize("downsampler", generate_all_downsamplers()) 88 | def test_serialization_pickle(downsampler: AbstractDownsampler): 89 | """Test serialization.""" 90 | import pickle 91 | 92 | dc = pickle.loads(pickle.dumps(downsampler)) 93 | 94 | arr = generate_datapoints() 95 | orig_downsampled = downsampler.downsample(arr, n_out=100) 96 | dc_downsampled = dc.downsample(arr, n_out=100) 97 | assert np.all(orig_downsampled == dc_downsampled) 98 | 99 | 100 | @pytest.mark.parametrize("downsampler", generate_rust_downsamplers()) 101 | def test_rust_downsampler(downsampler: AbstractDownsampler): 102 | """Test the Rust downsamplers.""" 103 | arr = generate_datapoints() 104 | s_downsampled = downsampler.downsample(arr, n_out=100) 105 | assert s_downsampled[0] == 0 106 | assert s_downsampled[-1] == len(arr) - 1 107 | 108 | 109 | @pytest.mark.parametrize("downsampler", generate_rust_nan_downsamplers()) 110 | def test_rust_nan_downsampler(downsampler: AbstractRustNaNDownsampler): 111 | """Test the Rust NaN downsamplers.""" 112 | datapoints = generate_nan_datapoints() 113 | s_downsampled = downsampler.downsample(datapoints, n_out=100) 114 | print(s_downsampled) 115 | assert s_downsampled[0] == 0 116 | assert s_downsampled[-2] == 9960 117 | assert s_downsampled[50] != np.nan 118 | 119 | 120 | def test_everynth_downsampler(): 121 | """Test EveryNth downsampler.""" 122 | arr = np.arange(10_000) 123 | downsampler = EveryNthDownsampler() 124 | s_downsampled = downsampler.downsample(arr, n_out=100) 125 | assert s_downsampled[0] == 0 126 | assert s_downsampled[-1] == 9_900 127 | 128 | 129 | @pytest.mark.parametrize("downsampler", generate_rust_downsamplers()) 130 | def test_parallel_downsampling(downsampler: AbstractDownsampler): 131 | """Test parallel downsampling.""" 132 | arr = np.random.randn(10_000).astype(np.float32) 133 | s_downsampled = downsampler.downsample(arr, n_out=100) 134 | s_downsampled_p = downsampler.downsample(arr, n_out=100, parallel=True) 135 | assert np.all(s_downsampled == s_downsampled_p) 136 | 137 | 138 | @pytest.mark.parametrize("downsampler", generate_rust_downsamplers()) 139 | def test_parallel_downsampling_with_x(downsampler: AbstractDownsampler): 140 | """Test parallel downsampling with x.""" 141 | arr = np.random.randn(10_001).astype(np.float32) # 10_001 to test edge case 142 | idx = np.arange(len(arr)) 143 | s_downsampled = downsampler.downsample(idx, arr, n_out=100) 144 | s_downsampled_p = downsampler.downsample(idx, arr, n_out=100, parallel=True) 145 | assert np.all(s_downsampled == s_downsampled_p) 146 | 147 | 148 | @pytest.mark.parametrize("downsampler", generate_all_downsamplers()) 149 | def test_downsampling_with_x(downsampler: AbstractDownsampler): 150 | """Test downsampling with x.""" 151 | arr = np.random.randn(2_001).astype(np.float32) # 2_001 to test edge case 152 | idx = np.arange(len(arr)) 153 | s_downsampled = downsampler.downsample(arr, n_out=100) 154 | s_downsampled_x = downsampler.downsample(idx, arr, n_out=100) 155 | assert np.all(s_downsampled == s_downsampled_x) 156 | 157 | 158 | @pytest.mark.parametrize("downsampler", generate_all_downsamplers()) 159 | def test_downsampling_with_gaps_in_x(downsampler: AbstractDownsampler): 160 | """Test downsampling with gaps in x. 161 | 162 | With gap we do NOT mean a NaN in the array, but a large gap in the x values. 163 | """ 164 | # TODO: might improve this test, now we just validate that the code does 165 | # not crash 166 | arr = np.random.randn(10_000).astype(np.float32) 167 | idx = np.arange(len(arr)) 168 | idx[: len(idx) // 2] += len(idx) // 2 # add large gap in x 169 | s_downsampled = downsampler.downsample(idx, arr, n_out=100) 170 | assert len(s_downsampled) <= 100 171 | assert len(s_downsampled) >= 66 172 | 173 | 174 | @pytest.mark.parametrize("downsampler", generate_rust_downsamplers()) 175 | def test_downsampling_different_dtypes(downsampler: AbstractDownsampler): 176 | """Test downsampling with different data types.""" 177 | arr_orig = np.random.randint(0, 100, size=10_000) 178 | res = [] 179 | for dtype_y in supported_dtypes_y: 180 | arr = arr_orig.astype(dtype_y) 181 | s_downsampled = downsampler.downsample(arr, n_out=100) 182 | if dtype_y is not np.bool_: 183 | res += [s_downsampled] 184 | for i in range(1, len(res)): 185 | assert np.all(res[0] == res[i]) 186 | 187 | 188 | @pytest.mark.parametrize("downsampler", generate_rust_downsamplers()) 189 | def test_downsampling_different_dtypes_with_x(downsampler: AbstractDownsampler): 190 | """Test downsampling with x with different data types.""" 191 | arr_orig = np.random.randint(0, 100, size=10_000) 192 | idx_orig = np.arange(len(arr_orig)) 193 | for dtype_x in supported_dtypes_x: 194 | res = [] 195 | idx = idx_orig.astype(dtype_x) 196 | for dtype_y in supported_dtypes_y: 197 | arr = arr_orig.astype(dtype_y) 198 | s_downsampled = downsampler.downsample(idx, arr, n_out=100) 199 | if dtype_y is not np.bool_: 200 | res += [s_downsampled] 201 | for i in range(1, len(res)): 202 | assert np.all(res[0] == res[i]) 203 | 204 | 205 | @pytest.mark.parametrize("downsampler", generate_rust_downsamplers()) 206 | def test_downsampling_no_out_of_bounds_different_dtypes( 207 | downsampler: AbstractDownsampler, 208 | ): 209 | """Test no out of bounds issues when downsampling with different data types.""" 210 | arr_orig = np.random.randint(0, 100, size=100) 211 | res = [] 212 | for dtype in supported_dtypes_y: 213 | arr = arr_orig.astype(dtype) 214 | s_downsampled = downsampler.downsample(arr, n_out=76) 215 | s_downsampled_p = downsampler.downsample(arr, n_out=76, parallel=True) 216 | assert np.all(s_downsampled == s_downsampled_p) 217 | if dtype is not np.bool_: 218 | res += [s_downsampled] 219 | for i in range(1, len(res)): 220 | assert np.all(res[0] == res[i]) 221 | 222 | 223 | @pytest.mark.parametrize("downsampler", generate_rust_downsamplers()) 224 | def test_downsampling_no_out_of_bounds_different_dtypes_with_x( 225 | downsampler: AbstractDownsampler, 226 | ): 227 | """Test no out of bounds issues when downsampling with different data types.""" 228 | arr_orig = np.random.randint(0, 100, size=100) 229 | idx_orig = np.arange(len(arr_orig)) 230 | for dtype_x in supported_dtypes_x: 231 | res = [] 232 | idx = idx_orig.astype(dtype_x) 233 | for dtype_y in supported_dtypes_y: 234 | arr = arr_orig.astype(dtype_y) 235 | s_downsampled = downsampler.downsample(idx, arr, n_out=76) 236 | s_downsampled_p = downsampler.downsample(idx, arr, n_out=76, parallel=True) 237 | assert np.all(s_downsampled == s_downsampled_p) 238 | if dtype_y is not np.bool_: 239 | res += [s_downsampled] 240 | for i in range(1, len(res)): 241 | assert np.all(res[0] == res[i]) 242 | 243 | 244 | def test_lttb_no_overflow(): 245 | """Test no overflow when calculating average.""" 246 | ### THIS SHOULD NOT OVERFLOW & HAVE THE SAME RESULT 247 | arr_orig = np.array([2 * 10**5] * 10_000, dtype=np.float64) 248 | s_downsampled = LTTBDownsampler().downsample(arr_orig, n_out=100) 249 | arr = arr_orig.astype(np.float32) 250 | s_downsampled_f32 = LTTBDownsampler().downsample(arr, n_out=100) 251 | assert np.all(s_downsampled == s_downsampled_f32) 252 | ### THIS SHOULD OVERFLOW & THUS HAVE A DIFFERENT RESULT... 253 | # max float32 is 3.4028235 × 1038 (so 2*10**38 is too big when adding 2 values) 254 | arr_orig = np.array([2 * 10**38] * 10_000, dtype=np.float64) 255 | s_downsampled = LTTBDownsampler().downsample(arr_orig, n_out=100) 256 | arr = arr_orig.astype(np.float32) 257 | s_downsampled_f32 = LTTBDownsampler().downsample(arr, n_out=100) 258 | assert not np.all(s_downsampled == s_downsampled_f32) # TODO :( 259 | # I will leave this test here, but as many (much larger) libraries do not 260 | # really account for this, I guess it is perhaps less of an issue than I 261 | # thought. In the end f32 MAX is 3.4028235 × 1038 & f64 MAX is 262 | # 1.7976931348623157 × 10308 => which is in the end quite a lot.. (and all 263 | # integer averages are handled using f64) - f32 is only used for f16 & f32 264 | # (just as in numpy). 265 | 266 | 267 | def test_invalid_nout(): 268 | """Test invalid n_out.""" 269 | arr = np.random.randint(0, 100, size=10_000) 270 | with pytest.raises(ValueError): 271 | LTTBDownsampler().downsample(arr, n_out=-1) 272 | with pytest.raises(ValueError): 273 | # Should be even 274 | MinMaxDownsampler().downsample(arr, n_out=33) 275 | with pytest.raises(ValueError): 276 | # Should be multiple of 4 277 | M4Downsampler().downsample(arr, n_out=34) 278 | 279 | 280 | def test_error_unsupported_dtype(): 281 | """Test unsupported dtype.""" 282 | arr = np.random.randint(0, 100, size=10_000) 283 | arr = arr.astype("object") 284 | with pytest.raises(ValueError): 285 | MinMaxDownsampler().downsample(arr, n_out=100) 286 | 287 | 288 | def test_error_invalid_args(): 289 | """Test invalid arguments.""" 290 | arr = np.random.randint(0, 100, size=10_000) 291 | # No args 292 | with pytest.raises(ValueError) as e_msg: 293 | MinMaxDownsampler().downsample(n_out=100, parallel=True) 294 | assert "takes 1 or 2 positional arguments" in str(e_msg.value) 295 | # Too many args 296 | with pytest.raises(ValueError) as e_msg: 297 | MinMaxDownsampler().downsample(arr, arr, arr, n_out=100, parallel=True) 298 | assert "takes 1 or 2 positional arguments" in str(e_msg.value) 299 | # Invalid y 300 | with pytest.raises(ValueError) as e_msg: 301 | MinMaxDownsampler().downsample(arr.reshape(5, 2_000), n_out=100, parallel=True) 302 | assert "y must be 1D" in str(e_msg.value) 303 | # Invalid x 304 | with pytest.raises(ValueError) as e_msg: 305 | MinMaxDownsampler().downsample( 306 | arr.reshape(5, 2_000), arr, n_out=100, parallel=True 307 | ) 308 | assert "x must be 1D" in str(e_msg.value) 309 | # Invalid x and y (different length) 310 | with pytest.raises(ValueError) as e_msg: 311 | MinMaxDownsampler().downsample(arr, arr[:-1], n_out=100, parallel=True) 312 | assert "x and y must have the same length" in str(e_msg.value) 313 | 314 | 315 | @pytest.mark.parametrize("downsampler", generate_rust_downsamplers()) 316 | def test_non_contiguous_array(downsampler: AbstractDownsampler): 317 | """Test non contiguous array.""" 318 | arr = np.random.randint(0, 100, size=10_000).astype(np.float32) 319 | arr = arr[::2] 320 | assert not arr.flags["C_CONTIGUOUS"] 321 | with pytest.raises(ValueError) as e_msg: 322 | downsampler.downsample(arr, n_out=100) 323 | assert "must be contiguous" in str(e_msg.value) 324 | 325 | 326 | def test_everynth_non_contiguous_array(): 327 | """Test non contiguous array.""" 328 | arr = np.random.randint(0, 100, size=10_000) 329 | arr = arr[::2] 330 | assert not arr.flags["C_CONTIGUOUS"] 331 | downsampler = EveryNthDownsampler() 332 | s_downsampled = downsampler.downsample(arr, n_out=100) 333 | assert s_downsampled[0] == 0 334 | assert s_downsampled[-1] == 4950 335 | 336 | 337 | def test_nan_minmax_downsampler(): 338 | """Test NaN downsamplers.""" 339 | arr = np.random.randn(50_000) 340 | arr[::5] = np.nan 341 | s_downsampled = NaNMinMaxDownsampler().downsample(arr, n_out=100) 342 | arr_downsampled = arr[s_downsampled] 343 | assert np.all(np.isnan(arr_downsampled)) 344 | 345 | 346 | def test_nan_m4_downsampler(): 347 | """Test NaN downsamplers.""" 348 | arr = np.random.randn(50_000) 349 | arr[::5] = np.nan 350 | s_downsampled = NaNM4Downsampler().downsample(arr, n_out=100) 351 | arr_downsampled = arr[s_downsampled] 352 | assert np.all(np.isnan(arr_downsampled[1::4])) # min is NaN 353 | assert np.all(np.isnan(arr_downsampled[2::4])) # max is NaN 354 | 355 | 356 | def test_nan_minmaxlttb_downsampler(): 357 | """Test NaN downsamplers.""" 358 | arr = np.random.randn(50_000) 359 | arr[::5] = np.nan 360 | s_downsampled = NaNMinMaxLTTBDownsampler().downsample(arr, n_out=100) 361 | arr_downsampled = arr[s_downsampled] 362 | assert np.all(np.isnan(arr_downsampled[1:-1])) # first and last are not NaN 363 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | use half::f16; 2 | 3 | use numpy::{IntoPyArray, PyArray1, PyReadonlyArray1}; 4 | use paste::paste; 5 | use pyo3::prelude::*; 6 | use pyo3::wrap_pymodule; 7 | 8 | /// ------------------------- MACROS ------------------------- 9 | 10 | // Create macros to avoid duplicate code for the various resample functions over the 11 | // different data types. 12 | 13 | // ----- Helper macros ----- 14 | 15 | // Without x-range 16 | 17 | macro_rules! _create_pyfunc_without_x { 18 | ($name:ident, $resample_mod:ident, $resample_fn:ident, $type:ty, $mod:ident) => { 19 | // Create the Python function 20 | #[pyfunction] 21 | fn $name<'py>( 22 | py: Python<'py>, 23 | y: PyReadonlyArray1<$type>, 24 | n_out: usize, 25 | ) -> Bound<'py, PyArray1> { 26 | let y = y.as_slice().unwrap(); 27 | let sampled_indices = $resample_mod::$resample_fn(y, n_out); 28 | sampled_indices.into_pyarray(py) 29 | } 30 | // Add the function to the module 31 | $mod.add_wrapped(wrap_pyfunction!($name))?; 32 | }; 33 | } 34 | 35 | macro_rules! _create_pyfunc_without_x_with_ratio { 36 | ($name:ident, $resample_mod:ident, $resample_fn:ident, $type:ty, $mod:ident) => { 37 | // Create the Python function 38 | #[pyfunction] 39 | fn $name<'py>( 40 | py: Python<'py>, 41 | y: PyReadonlyArray1<$type>, 42 | n_out: usize, 43 | ratio: usize, 44 | ) -> Bound<'py, PyArray1> { 45 | let y = y.as_slice().unwrap(); 46 | let sampled_indices = $resample_mod::$resample_fn(y, n_out, ratio); 47 | sampled_indices.into_pyarray(py) 48 | } 49 | // Add the function to the module 50 | $mod.add_wrapped(wrap_pyfunction!($name))?; 51 | }; 52 | } 53 | 54 | macro_rules! _create_pyfuncs_without_x_generic { 55 | ($create_macro:ident, $resample_mod:ident, $resample_fn:ident, $mod:ident, $($t:ty)*) => { 56 | $( 57 | paste! { 58 | $create_macro!([], $resample_mod, $resample_fn, $t, $mod); 59 | } 60 | )* 61 | }; 62 | 63 | (@nan $create_macro:ident, $resample_mod:ident, $resample_fn:ident, $mod:ident, $($t:ty)*) => { 64 | $( 65 | paste! { 66 | $create_macro!([], $resample_mod, $resample_fn, $t, $mod); 67 | } 68 | )* 69 | }; 70 | } 71 | 72 | // With x-range 73 | 74 | macro_rules! _create_pyfunc_with_x { 75 | ($name:ident, $resample_mod:ident, $resample_fn:ident, $type_x:ty, $type_y:ty, $mod:ident) => { 76 | // Create the Python function 77 | #[pyfunction] 78 | fn $name<'py>( 79 | py: Python<'py>, 80 | x: PyReadonlyArray1<$type_x>, 81 | y: PyReadonlyArray1<$type_y>, 82 | n_out: usize, 83 | ) -> Bound<'py, PyArray1> { 84 | let x = x.as_slice().unwrap(); 85 | let y = y.as_slice().unwrap(); 86 | let sampled_indices = $resample_mod::$resample_fn(x, y, n_out); 87 | sampled_indices.into_pyarray(py) 88 | } 89 | // Add the function to the module 90 | $mod.add_wrapped(wrap_pyfunction!($name))?; 91 | }; 92 | } 93 | 94 | macro_rules! _create_pyfunc_with_x_with_ratio { 95 | ($name:ident, $resample_mod:ident, $resample_fn:ident, $type_x:ty, $type_y:ty, $mod:ident) => { 96 | // Create the Python function 97 | #[pyfunction] 98 | fn $name<'py>( 99 | py: Python<'py>, 100 | x: PyReadonlyArray1<$type_x>, 101 | y: PyReadonlyArray1<$type_y>, 102 | n_out: usize, 103 | ratio: usize, 104 | ) -> Bound<'py, PyArray1> { 105 | let x = x.as_slice().unwrap(); 106 | let y = y.as_slice().unwrap(); 107 | let sampled_indices = $resample_mod::$resample_fn(x, y, n_out, ratio); 108 | sampled_indices.into_pyarray(py) 109 | } 110 | // Add the function to the module 111 | $mod.add_wrapped(wrap_pyfunction!($name))?; 112 | }; 113 | } 114 | 115 | macro_rules! _create_pyfuncs_with_x_generic { 116 | 117 | ($create_macro:ident, $resample_mod:ident, $resample_fn:ident, $mod:ident, $($tx:ty)+, $($ty:ty)+) => { 118 | // The macro will implement the function for all combinations of $tx and $ty (for respectively type x and y). 119 | _create_pyfuncs_with_x_generic!(@inner $create_macro, $resample_mod, $resample_fn, $mod, $($tx)+; $($ty),+); 120 | }; 121 | 122 | // Base case: there is only one type (for y) left 123 | (@inner $create_macro:ident, $resample_mod:ident, $resample_fn:ident, $mod:ident, $($tx:ty)+; $ty:ty) => { 124 | $( 125 | paste! { 126 | $create_macro!([], $resample_mod, $resample_fn, $tx, $ty, $mod); 127 | } 128 | )* 129 | }; 130 | // The head/tail recursion: pick the first element -> apply the base case, and recurse over the rest. 131 | (@inner $create_macro:ident, $resample_mod:ident, $resample_fn:ident, $mod:ident, $($tx:ty)+; $ty_head:ty, $($ty_rest:ty),+) => { 132 | _create_pyfuncs_with_x_generic!(@inner $create_macro, $resample_mod, $resample_fn, $mod, $($tx)+; $ty_head); 133 | _create_pyfuncs_with_x_generic!(@inner $create_macro, $resample_mod, $resample_fn, $mod, $($tx)+; $($ty_rest),+); 134 | }; 135 | 136 | // Huge thx to https://stackoverflow.com/a/54552848 137 | // and https://users.rust-lang.org/t/tail-recursive-macros/905/3 138 | } 139 | 140 | // TODO: there must be a better way to combine normal and nan macros 141 | macro_rules! _create_nan_pyfuncs_with_x_generic { 142 | 143 | ($create_macro:ident, $resample_mod:ident, $resample_fn:ident, $mod:ident, $($tx:ty)+, $($ty:ty)+) => { 144 | // The macro will implement the function for all combinations of $tx and $ty (for respectively type x and y). 145 | _create_nan_pyfuncs_with_x_generic!(@inner $create_macro, $resample_mod, $resample_fn, $mod, $($tx)+; $($ty),+); 146 | }; 147 | 148 | // Base case: there is only one type (for y) left 149 | (@inner $create_macro:ident, $resample_mod:ident, $resample_fn:ident, $mod:ident, $($tx:ty)+; $ty:ty) => { 150 | $( 151 | paste! { 152 | $create_macro!([], $resample_mod, $resample_fn, $tx, $ty, $mod); 153 | } 154 | )* 155 | }; 156 | // The head/tail recursion: pick the first element -> apply the base case, and recurse over the rest. 157 | (@inner $create_macro:ident, $resample_mod:ident, $resample_fn:ident, $mod:ident, $($tx:ty)+; $ty_head:ty, $($ty_rest:ty),+) => { 158 | _create_nan_pyfuncs_with_x_generic!(@inner $create_macro, $resample_mod, $resample_fn, $mod, $($tx)+; $ty_head); 159 | _create_nan_pyfuncs_with_x_generic!(@inner $create_macro, $resample_mod, $resample_fn, $mod, $($tx)+; $($ty_rest),+); 160 | }; 161 | 162 | // Huge thx to https://stackoverflow.com/a/54552848 163 | // and https://users.rust-lang.org/t/tail-recursive-macros/905/3 164 | } 165 | // ------ Main macros ------ 166 | 167 | macro_rules! _create_pyfuncs_without_x_helper { 168 | ($pyfunc_fn:ident, $resample_mod:ident, $resample_fn:ident, $mod:ident) => { 169 | _create_pyfuncs_without_x_generic!($pyfunc_fn, $resample_mod, $resample_fn, $mod, f16 f32 f64 i8 i16 i32 i64 u8 u16 u32 u64); 170 | }; 171 | 172 | (@nan $pyfunc_fn:ident, $resample_mod:ident, $resample_fn:ident, $mod:ident) => { 173 | _create_pyfuncs_without_x_generic!(@nan $pyfunc_fn, $resample_mod, $resample_fn, $mod, f16 f32 f64); 174 | }; 175 | } 176 | 177 | macro_rules! create_pyfuncs_without_x { 178 | ($resample_mod:ident, $resample_fn:ident, $mod:ident) => { 179 | _create_pyfuncs_without_x_helper!( 180 | _create_pyfunc_without_x, 181 | $resample_mod, 182 | $resample_fn, 183 | $mod 184 | ); 185 | }; 186 | (@nan $resample_mod:ident, $resample_fn:ident, $mod:ident) => { 187 | _create_pyfuncs_without_x_helper!(@nan 188 | _create_pyfunc_without_x, 189 | $resample_mod, 190 | $resample_fn, 191 | $mod 192 | ); 193 | }; 194 | } 195 | 196 | macro_rules! create_pyfuncs_without_x_with_ratio { 197 | ($resample_mod:ident, $resample_fn:ident, $mod:ident) => { 198 | _create_pyfuncs_without_x_helper!( 199 | _create_pyfunc_without_x_with_ratio, 200 | $resample_mod, 201 | $resample_fn, 202 | $mod 203 | ); 204 | }; 205 | (@nan $resample_mod:ident, $resample_fn:ident, $mod:ident) => { 206 | _create_pyfuncs_without_x_helper!(@nan 207 | _create_pyfunc_without_x_with_ratio, 208 | $resample_mod, 209 | $resample_fn, 210 | $mod 211 | ); 212 | }; 213 | } 214 | 215 | macro_rules! _create_pyfuncs_with_x_helper { 216 | ($pyfunc_fn:ident, $resample_mod:ident, $resample_fn:ident, $mod:ident) => { 217 | _create_pyfuncs_with_x_generic!($pyfunc_fn, $resample_mod, $resample_fn, $mod, f32 f64 i16 i32 i64 u16 u32 u64, f16 f32 f64 i8 i16 i32 i64 u8 u16 u32 u64); 218 | }; 219 | (@nan $pyfunc_fn:ident, $resample_mod:ident, $resample_fn:ident, $mod:ident) => { 220 | _create_nan_pyfuncs_with_x_generic!($pyfunc_fn, $resample_mod, $resample_fn, $mod, f32 f64 i16 i32 i64 u16 u32 u64, f16 f32 f64); 221 | }; 222 | } 223 | 224 | macro_rules! create_pyfuncs_with_x { 225 | ($resample_mod:ident, $resample_fn:ident, $mod:ident) => { 226 | _create_pyfuncs_with_x_helper!(_create_pyfunc_with_x, $resample_mod, $resample_fn, $mod); 227 | }; 228 | (@nan $resample_mod:ident, $resample_fn:ident, $mod:ident) => { 229 | _create_pyfuncs_with_x_helper!(@nan _create_pyfunc_with_x, $resample_mod, $resample_fn, $mod); 230 | }; 231 | } 232 | 233 | macro_rules! create_pyfuncs_with_x_with_ratio { 234 | ($resample_mod:ident, $resample_fn:ident, $mod:ident) => { 235 | _create_pyfuncs_with_x_helper!( 236 | _create_pyfunc_with_x_with_ratio, 237 | $resample_mod, 238 | $resample_fn, 239 | $mod 240 | ); 241 | }; 242 | (@nan $resample_mod:ident, $resample_fn:ident, $mod:ident) => { 243 | _create_pyfuncs_with_x_helper!(@nan 244 | _create_pyfunc_with_x_with_ratio, 245 | $resample_mod, 246 | $resample_fn, 247 | $mod 248 | ); 249 | }; 250 | } 251 | 252 | // -------------------------------------- MINMAX --------------------------------------- 253 | 254 | use downsample_rs::minmax as minmax_mod; 255 | 256 | // Create a sub module for the minmax algorithm 257 | #[pymodule] 258 | fn minmax(_py: Python<'_>, m: &Bound<'_, PyModule>) -> PyResult<()> { 259 | // ----------------- SEQUENTIAL 260 | 261 | let sequential_mod = PyModule::new(_py, "sequential")?; 262 | 263 | // ----- WITHOUT X 264 | { 265 | create_pyfuncs_without_x!(minmax_mod, min_max_without_x, sequential_mod); 266 | create_pyfuncs_without_x!(@nan minmax_mod, min_max_without_x_nan, sequential_mod); 267 | } 268 | 269 | // ----- WITH X 270 | { 271 | create_pyfuncs_with_x!(minmax_mod, min_max_with_x, sequential_mod); 272 | create_pyfuncs_with_x!(@nan minmax_mod, min_max_with_x_nan, sequential_mod); 273 | } 274 | 275 | // ----------------- PARALLEL 276 | 277 | let parallel_mod = PyModule::new(_py, "parallel")?; 278 | 279 | // ----- WITHOUT X 280 | { 281 | create_pyfuncs_without_x!(minmax_mod, min_max_without_x_parallel, parallel_mod); 282 | create_pyfuncs_without_x!(@nan minmax_mod, min_max_without_x_parallel, parallel_mod); 283 | } 284 | 285 | // ----- WITH X 286 | { 287 | create_pyfuncs_with_x!(minmax_mod, min_max_with_x_parallel, parallel_mod); 288 | create_pyfuncs_with_x!(@nan minmax_mod, min_max_with_x_parallel, parallel_mod); 289 | } 290 | 291 | // Add the sub modules to the module 292 | m.add_submodule(&sequential_mod)?; 293 | m.add_submodule(¶llel_mod)?; 294 | 295 | Ok(()) 296 | } 297 | 298 | // --------------------------------------- M4 ------------------------------------------ 299 | 300 | use downsample_rs::m4 as m4_mod; 301 | 302 | // Create a sub module for the M4 algorithm 303 | #[pymodule] 304 | fn m4(_py: Python, m: &Bound<'_, PyModule>) -> PyResult<()> { 305 | // ----------------- SEQUENTIAL 306 | 307 | let sequential_mod = PyModule::new(_py, "sequential")?; 308 | 309 | // ----- WITHOUT X 310 | { 311 | create_pyfuncs_without_x!(m4_mod, m4_without_x, sequential_mod); 312 | create_pyfuncs_without_x!(@nan m4_mod, m4_without_x_nan, sequential_mod); 313 | } 314 | 315 | // ----- WITH X 316 | { 317 | create_pyfuncs_with_x!(m4_mod, m4_with_x, sequential_mod); 318 | create_pyfuncs_with_x!(@nan m4_mod, m4_with_x_nan, sequential_mod); 319 | } 320 | 321 | // ----------------- PARALLEL 322 | 323 | let parallel_mod = PyModule::new(_py, "parallel")?; 324 | 325 | // ----- WITHOUT X 326 | { 327 | create_pyfuncs_without_x!(m4_mod, m4_without_x_parallel, parallel_mod); 328 | create_pyfuncs_without_x!(@nan m4_mod, m4_without_x_parallel, parallel_mod); 329 | } 330 | 331 | // ----- WITH X 332 | { 333 | create_pyfuncs_with_x!(m4_mod, m4_with_x_parallel, parallel_mod); 334 | create_pyfuncs_with_x!(@nan m4_mod, m4_with_x_parallel, parallel_mod); 335 | } 336 | 337 | // Add the sub modules to the module 338 | m.add_submodule(&sequential_mod)?; 339 | m.add_submodule(¶llel_mod)?; 340 | 341 | Ok(()) 342 | } 343 | 344 | // -------------------------------------- LTTB ----------------------------------------- 345 | 346 | use downsample_rs::lttb as lttb_mod; 347 | 348 | // Create a sub module for the LTTB algorithm 349 | #[pymodule] 350 | fn lttb(_py: Python, m: &Bound<'_, PyModule>) -> PyResult<()> { 351 | // ----------------- SEQUENTIAL 352 | 353 | let sequential_mod = PyModule::new(_py, "sequential")?; 354 | 355 | // Create the Python functions for the module 356 | // ----- WITHOUT X 357 | { 358 | create_pyfuncs_without_x!(lttb_mod, lttb_without_x, sequential_mod); 359 | } 360 | 361 | // ----- WITH X 362 | { 363 | create_pyfuncs_with_x!(lttb_mod, lttb_with_x, sequential_mod); 364 | } 365 | 366 | // Add the sub modules to the module 367 | m.add_submodule(&sequential_mod)?; 368 | 369 | Ok(()) 370 | } 371 | 372 | // -------------------------------------- MINMAXLTTB ----------------------------------------- 373 | 374 | use downsample_rs::minmaxlttb as minmaxlttb_mod; 375 | 376 | // Create a sub module for the MINMAXLTTB algorithm 377 | #[pymodule] 378 | fn minmaxlttb(_py: Python, m: &Bound<'_, PyModule>) -> PyResult<()> { 379 | // ----------------- SEQUENTIAL 380 | 381 | let sequential_mod = PyModule::new(_py, "sequential")?; 382 | 383 | // ----- WITHOUT X 384 | { 385 | create_pyfuncs_without_x_with_ratio!(minmaxlttb_mod, minmaxlttb_without_x, sequential_mod); 386 | create_pyfuncs_without_x_with_ratio!(@nan minmaxlttb_mod, minmaxlttb_without_x_nan, sequential_mod); 387 | } 388 | 389 | // ----- WITH X 390 | { 391 | create_pyfuncs_with_x_with_ratio!(minmaxlttb_mod, minmaxlttb_with_x, sequential_mod); 392 | create_pyfuncs_with_x_with_ratio!(@nan minmaxlttb_mod, minmaxlttb_with_x_nan, sequential_mod); 393 | } 394 | 395 | // ----------------- PARALLEL 396 | 397 | let parallel_mod = PyModule::new(_py, "parallel")?; 398 | 399 | // ----- WITHOUT X 400 | { 401 | create_pyfuncs_without_x_with_ratio!( 402 | minmaxlttb_mod, 403 | minmaxlttb_without_x_parallel, 404 | parallel_mod 405 | ); 406 | create_pyfuncs_without_x_with_ratio!(@nan 407 | minmaxlttb_mod, 408 | minmaxlttb_without_x_parallel, 409 | parallel_mod 410 | ); 411 | } 412 | 413 | // ----- WITH X 414 | { 415 | create_pyfuncs_with_x_with_ratio!(minmaxlttb_mod, minmaxlttb_with_x_parallel, parallel_mod); 416 | create_pyfuncs_with_x_with_ratio!(@nan minmaxlttb_mod, minmaxlttb_with_x_parallel, parallel_mod); 417 | } 418 | 419 | // Add the submodules to the module 420 | m.add_submodule(&sequential_mod)?; 421 | m.add_submodule(¶llel_mod)?; 422 | 423 | Ok(()) 424 | } 425 | 426 | // ------------------------------- DOWNSAMPLING MODULE ------------------------------ // 427 | 428 | #[pymodule] // The super module 429 | #[pyo3(name = "_tsdownsample_rs")] // How the module is imported in Python: https://github.com/PyO3/maturin/issues/256#issuecomment-1038576218 430 | fn tsdownsample(_py: Python<'_>, m: &Bound<'_, PyModule>) -> PyResult<()> { 431 | m.add_wrapped(wrap_pymodule!(minmax))?; 432 | m.add_wrapped(wrap_pymodule!(m4))?; 433 | m.add_wrapped(wrap_pymodule!(lttb))?; 434 | m.add_wrapped(wrap_pymodule!(minmaxlttb))?; 435 | 436 | Ok(()) 437 | } 438 | -------------------------------------------------------------------------------- /downsample_rs/src/minmax.rs: -------------------------------------------------------------------------------- 1 | use rayon::iter::IndexedParallelIterator; 2 | use rayon::prelude::*; 3 | 4 | use argminmax::{ArgMinMax, NaNArgMinMax}; 5 | use num_traits::{AsPrimitive, FromPrimitive}; 6 | 7 | use super::searchsorted::{ 8 | get_equidistant_bin_idx_iterator, get_equidistant_bin_idx_iterator_parallel, 9 | }; 10 | use super::types::Num; 11 | use super::POOL; 12 | 13 | // ----------------------------------- NON-PARALLEL ------------------------------------ 14 | 15 | // ----------- WITH X 16 | 17 | macro_rules! min_max_with_x { 18 | ($func_name:ident, $trait:path, $f_argminmax:expr) => { 19 | pub fn $func_name(x: &[Tx], arr: &[Ty], n_out: usize) -> Vec 20 | where 21 | for<'a> &'a [Ty]: $trait, 22 | Tx: Num + FromPrimitive + AsPrimitive, 23 | Ty: Copy + PartialOrd, 24 | { 25 | assert_eq!(n_out % 2, 0); 26 | let bin_idx_iterator = get_equidistant_bin_idx_iterator(x, n_out / 2); 27 | min_max_generic_with_x(arr, bin_idx_iterator, n_out, $f_argminmax) 28 | } 29 | }; 30 | } 31 | 32 | min_max_with_x!(min_max_with_x, ArgMinMax, |arr| arr.argminmax()); 33 | min_max_with_x!(min_max_with_x_nan, NaNArgMinMax, |arr| arr.nanargminmax()); 34 | 35 | // ----------- WITHOUT X 36 | 37 | macro_rules! min_max_without_x { 38 | ($func_name:ident, $trait:path, $f_argminmax:expr) => { 39 | pub fn $func_name(arr: &[T], n_out: usize) -> Vec 40 | where 41 | for<'a> &'a [T]: $trait, 42 | { 43 | assert_eq!(n_out % 2, 0); 44 | min_max_generic(arr, n_out, $f_argminmax) 45 | } 46 | }; 47 | } 48 | 49 | min_max_without_x!(min_max_without_x, ArgMinMax, |arr| arr.argminmax()); 50 | min_max_without_x!(min_max_without_x_nan, NaNArgMinMax, |arr| arr 51 | .nanargminmax()); 52 | 53 | // ------------------------------------- PARALLEL -------------------------------------- 54 | 55 | // ----------- WITH X 56 | 57 | macro_rules! min_max_with_x_parallel { 58 | ($func_name:ident, $trait:path, $f_argminmax:expr) => { 59 | pub fn $func_name(x: &[Tx], arr: &[Ty], n_out: usize) -> Vec 60 | where 61 | for<'a> &'a [Ty]: $trait, 62 | Tx: Num + FromPrimitive + AsPrimitive + Send + Sync, 63 | Ty: Copy + PartialOrd + Send + Sync, 64 | { 65 | assert_eq!(n_out % 2, 0); 66 | let bin_idx_iterator = get_equidistant_bin_idx_iterator_parallel(x, n_out / 2); 67 | min_max_generic_with_x_parallel(arr, bin_idx_iterator, n_out, $f_argminmax) 68 | } 69 | }; 70 | } 71 | 72 | min_max_with_x_parallel!(min_max_with_x_parallel, ArgMinMax, |arr| arr.argminmax()); 73 | min_max_with_x_parallel!(min_max_with_x_parallel_nan, NaNArgMinMax, |arr| arr 74 | .nanargminmax()); 75 | 76 | // ----------- WITHOUT X 77 | 78 | macro_rules! min_max_without_x_parallel { 79 | ($func_name:ident, $trait:path, $f_argminmax:expr) => { 80 | pub fn $func_name(arr: &[T], n_out: usize) -> Vec 81 | where 82 | for<'a> &'a [T]: $trait, 83 | { 84 | assert_eq!(n_out % 2, 0); 85 | min_max_generic_parallel(arr, n_out, $f_argminmax) 86 | } 87 | }; 88 | } 89 | 90 | min_max_without_x_parallel!(min_max_without_x_parallel, ArgMinMax, |arr| arr.argminmax()); 91 | min_max_without_x_parallel!(min_max_without_x_parallel_nan, NaNArgMinMax, |arr| arr 92 | .nanargminmax()); 93 | 94 | // ----------------------------------- GENERICS ------------------------------------ 95 | 96 | // --------------------- WITHOUT X 97 | 98 | #[inline(always)] 99 | pub(crate) fn min_max_generic( 100 | arr: &[T], 101 | n_out: usize, 102 | f_argminmax: fn(&[T]) -> (usize, usize), 103 | ) -> Vec { 104 | // Assumes n_out is a multiple of 2 105 | if n_out >= arr.len() { 106 | return (0..arr.len()).collect::>(); 107 | } 108 | 109 | // arr.len() - 1 is used to match the delta of a range-index (0..arr.len()-1) 110 | let block_size: f64 = (arr.len() - 1) as f64 / (n_out / 2) as f64; 111 | 112 | let mut sampled_indices = vec![usize::default(); n_out]; 113 | 114 | let mut start_idx: usize = 0; 115 | for i in 0..n_out / 2 { 116 | // Decided to use multiplication instead of adding to the accumulator (end) 117 | // as multiplication seems to be less prone to rounding errors. 118 | let end: f64 = block_size * (i + 1) as f64; 119 | let end_idx: usize = end as usize + 1; 120 | 121 | let (min_index, max_index) = f_argminmax(&arr[start_idx..end_idx]); 122 | 123 | // Add the indexes in sorted order 124 | if min_index < max_index { 125 | sampled_indices[2 * i] = min_index + start_idx; 126 | sampled_indices[2 * i + 1] = max_index + start_idx; 127 | } else { 128 | sampled_indices[2 * i] = max_index + start_idx; 129 | sampled_indices[2 * i + 1] = min_index + start_idx; 130 | } 131 | 132 | start_idx = end_idx; 133 | } 134 | 135 | sampled_indices 136 | } 137 | 138 | #[inline(always)] 139 | pub(crate) fn min_max_generic_parallel( 140 | arr: &[T], 141 | n_out: usize, 142 | f_argminmax: fn(&[T]) -> (usize, usize), 143 | ) -> Vec { 144 | // Assumes n_out is a multiple of 2 145 | if n_out >= arr.len() { 146 | return (0..arr.len()).collect::>(); 147 | } 148 | 149 | // arr.len() - 1 is used to match the delta of a range-index (0..arr.len()-1) 150 | let block_size: f64 = (arr.len() - 1) as f64 / (n_out / 2) as f64; 151 | 152 | // Store the enumerated indexes in the output array 153 | // These indexes are used to calculate the start and end indexes of each bin in 154 | // the multi-threaded execution 155 | let mut sampled_indices: Vec = (0..n_out).collect::>(); 156 | 157 | POOL.install(|| { 158 | sampled_indices 159 | .par_chunks_exact_mut(2) 160 | .for_each(|sampled_index_chunk| { 161 | let i: f64 = unsafe { *sampled_index_chunk.get_unchecked(0) >> 1 } as f64; 162 | let start_idx: usize = (block_size * i) as usize + (i != 0.0) as usize; 163 | let end_idx: usize = (block_size * (i + 1.0)) as usize + 1; 164 | 165 | let (min_index, max_index) = f_argminmax(&arr[start_idx..end_idx]); 166 | 167 | // Add the indexes in sorted order 168 | if min_index < max_index { 169 | sampled_index_chunk[0] = min_index + start_idx; 170 | sampled_index_chunk[1] = max_index + start_idx; 171 | } else { 172 | sampled_index_chunk[0] = max_index + start_idx; 173 | sampled_index_chunk[1] = min_index + start_idx; 174 | } 175 | }) 176 | }); 177 | 178 | sampled_indices 179 | } 180 | 181 | // --------------------- WITH X 182 | 183 | #[inline(always)] 184 | pub(crate) fn min_max_generic_with_x( 185 | arr: &[T], 186 | bin_idx_iterator: impl Iterator>, 187 | n_out: usize, 188 | f_argminmax: fn(&[T]) -> (usize, usize), 189 | ) -> Vec { 190 | // Assumes n_out is a multiple of 2 191 | if n_out >= arr.len() { 192 | return (0..arr.len()).collect::>(); 193 | } 194 | 195 | let mut sampled_indices: Vec = Vec::with_capacity(n_out); 196 | 197 | bin_idx_iterator.for_each(|bin| { 198 | if let Some((start, end)) = bin { 199 | if end <= start + 2 { 200 | // If the bin has <= 2 elements, just add them all 201 | for i in start..end { 202 | sampled_indices.push(i); 203 | } 204 | } else { 205 | // If the bin has at least two elements, add the argmin and argmax 206 | let step = &arr[start..end]; 207 | let (min_index, max_index) = f_argminmax(step); 208 | 209 | // Add the indexes in sorted order 210 | if min_index < max_index { 211 | sampled_indices.push(min_index + start); 212 | sampled_indices.push(max_index + start); 213 | } else { 214 | sampled_indices.push(max_index + start); 215 | sampled_indices.push(min_index + start); 216 | } 217 | } 218 | } 219 | }); 220 | 221 | sampled_indices 222 | } 223 | 224 | #[inline(always)] 225 | pub(crate) fn min_max_generic_with_x_parallel( 226 | arr: &[T], 227 | bin_idx_iterator: impl IndexedParallelIterator>>, 228 | n_out: usize, 229 | f_argminmax: fn(&[T]) -> (usize, usize), 230 | ) -> Vec { 231 | // Assumes n_out is a multiple of 2 232 | if n_out >= arr.len() { 233 | return (0..arr.len()).collect::>(); 234 | } 235 | 236 | POOL.install(|| { 237 | bin_idx_iterator 238 | .flat_map(|bin_idx_iterator| { 239 | bin_idx_iterator 240 | .map(|bin| { 241 | match bin { 242 | Some((start, end)) => { 243 | if end <= start + 2 { 244 | // If the bin has <= 2 elements, just return them all 245 | return (start..end).collect::>(); 246 | } 247 | 248 | // If the bin has at least two elements, return the argmin and argmax 249 | let step = &arr[start..end]; 250 | let (min_index, max_index) = f_argminmax(step); 251 | 252 | // Return the indexes in sorted order 253 | if min_index < max_index { 254 | vec![min_index + start, max_index + start] 255 | } else { 256 | vec![max_index + start, min_index + start] 257 | } 258 | } // If the bin is empty, return empty Vec 259 | None => { 260 | vec![] 261 | } 262 | } 263 | }) 264 | .collect::>>() 265 | }) 266 | .flatten() 267 | .collect::>() 268 | }) 269 | } 270 | 271 | #[cfg(test)] 272 | mod tests { 273 | use num_traits::AsPrimitive; 274 | use rstest::rstest; 275 | use rstest_reuse::{self, *}; 276 | 277 | use super::{min_max_with_x, min_max_without_x}; 278 | use super::{min_max_with_x_parallel, min_max_without_x_parallel}; 279 | 280 | use dev_utils::utils; 281 | 282 | fn get_array_f32(n: usize) -> Vec { 283 | utils::get_random_array(n, f32::MIN, f32::MAX) 284 | } 285 | 286 | // Template for n_out 287 | #[template] 288 | #[rstest] 289 | #[case(198)] 290 | #[case(200)] 291 | #[case(202)] 292 | fn n_outs(#[case] n_out: usize) {} 293 | 294 | #[test] 295 | fn test_min_max_scalar_without_x_correct() { 296 | let arr: [f32; 100] = core::array::from_fn(|i| i.as_()); 297 | 298 | let sampled_indices = min_max_without_x(&arr, 10); 299 | let sampled_values = sampled_indices 300 | .iter() 301 | .map(|x| arr[*x]) 302 | .collect::>(); 303 | 304 | let expected_indices = vec![0, 19, 20, 39, 40, 59, 60, 79, 80, 99]; 305 | let expected_values = expected_indices 306 | .iter() 307 | .map(|x| *x as f32) 308 | .collect::>(); 309 | 310 | assert_eq!(sampled_indices, expected_indices); 311 | assert_eq!(sampled_values, expected_values); 312 | } 313 | 314 | #[test] 315 | fn test_min_max_scalar_without_x_parallel_correct() { 316 | let arr: [f32; 100] = core::array::from_fn(|i| i.as_()); 317 | 318 | let sampled_indices = min_max_without_x_parallel(&arr, 10); 319 | let sampled_values = sampled_indices 320 | .iter() 321 | .map(|x| arr[*x]) 322 | .collect::>(); 323 | 324 | let expected_indices = vec![0, 19, 20, 39, 40, 59, 60, 79, 80, 99]; 325 | let expected_values = expected_indices 326 | .iter() 327 | .map(|x| *x as f32) 328 | .collect::>(); 329 | 330 | assert_eq!(sampled_indices, expected_indices); 331 | assert_eq!(sampled_values, expected_values); 332 | } 333 | 334 | #[test] 335 | fn test_min_max_scalar_with_x_correct() { 336 | let x: [i32; 100] = core::array::from_fn(|i| i.as_()); 337 | let arr: [f32; 100] = core::array::from_fn(|i| i.as_()); 338 | 339 | let sampled_indices = min_max_with_x(&x, &arr, 10); 340 | let sampled_values = sampled_indices 341 | .iter() 342 | .map(|x| arr[*x]) 343 | .collect::>(); 344 | 345 | let expected_indices = vec![0, 19, 20, 39, 40, 59, 60, 79, 80, 99]; 346 | let expected_values = expected_indices 347 | .iter() 348 | .map(|x| *x as f32) 349 | .collect::>(); 350 | 351 | assert_eq!(sampled_indices, expected_indices); 352 | assert_eq!(sampled_values, expected_values); 353 | } 354 | 355 | #[test] 356 | fn test_min_max_scalar_with_x_parallel_correct() { 357 | let x: [i32; 100] = core::array::from_fn(|i| i.as_()); 358 | let arr: [f32; 100] = core::array::from_fn(|i| i.as_()); 359 | 360 | let sampled_indices = min_max_with_x_parallel(&x, &arr, 10); 361 | let sampled_values = sampled_indices 362 | .iter() 363 | .map(|x| arr[*x]) 364 | .collect::>(); 365 | 366 | let expected_indices = vec![0, 19, 20, 39, 40, 59, 60, 79, 80, 99]; 367 | let expected_values = expected_indices 368 | .iter() 369 | .map(|x| *x as f32) 370 | .collect::>(); 371 | 372 | assert_eq!(sampled_indices, expected_indices); 373 | assert_eq!(sampled_values, expected_values); 374 | } 375 | 376 | #[test] 377 | fn test_min_max_scalar_with_x_gap() { 378 | // We will create a gap in the middle of the array 379 | // Increment the second half of the array by 50 380 | let x: [i32; 100] = core::array::from_fn(|i| if i > 50 { (i + 50).as_() } else { i.as_() }); 381 | let arr: [f32; 100] = core::array::from_fn(|i| i.as_()); 382 | 383 | let sampled_indices = min_max_with_x(&x, &arr, 10); 384 | assert_eq!(sampled_indices.len(), 8); // One full gap 385 | let expected_indices = vec![0, 29, 30, 50, 51, 69, 70, 99]; 386 | assert_eq!(sampled_indices, expected_indices); 387 | 388 | // Increment the second half of the array by 50 again 389 | let x = x.map(|i| if i > 101 { i + 50 } else { i }); 390 | 391 | let sampled_indices = min_max_with_x(&x, &arr, 10); 392 | assert_eq!(sampled_indices.len(), 9); // Gap with 1 value 393 | let expected_indices = vec![0, 39, 40, 50, 51, 52, 59, 60, 99]; 394 | assert_eq!(sampled_indices, expected_indices); 395 | } 396 | 397 | #[test] 398 | fn test_min_max_scalar_with_x_parallel_gap() { 399 | // Create a gap in the middle of the array 400 | // Increment the second half of the array by 50 401 | let x: [i32; 100] = core::array::from_fn(|i| if i > 50 { (i + 50).as_() } else { i.as_() }); 402 | let arr: [f32; 100] = core::array::from_fn(|i| i.as_()); 403 | 404 | let sampled_indices = min_max_with_x_parallel(&x, &arr, 10); 405 | assert_eq!(sampled_indices.len(), 8); // One full gap 406 | let expected_indices = vec![0, 29, 30, 50, 51, 69, 70, 99]; 407 | assert_eq!(sampled_indices, expected_indices); 408 | 409 | // Increment the second half of the array by 50 again 410 | let x = x.map(|i| if i > 101 { i + 50 } else { i }); 411 | 412 | let sampled_indices = min_max_with_x_parallel(&x, &arr, 10); 413 | assert_eq!(sampled_indices.len(), 9); // Gap with 1 value 414 | let expected_indices = vec![0, 39, 40, 50, 51, 52, 59, 60, 99]; 415 | assert_eq!(sampled_indices, expected_indices); 416 | } 417 | 418 | #[test] 419 | fn test_same_output() { 420 | const N: usize = 1001 - 2; 421 | const n_out: usize = 26 * 4; 422 | let y = (0..N).map(|v| v as f32).collect::>(); 423 | let x = (1..(N + 1) as i32).collect::>(); 424 | let sampled_indices1 = min_max_with_x(&x, &y, n_out); 425 | let sampled_indices2 = min_max_without_x(&y, n_out); 426 | assert_eq!(sampled_indices1, sampled_indices2); 427 | } 428 | 429 | #[apply(n_outs)] 430 | fn test_many_random_runs_same_output(n_out: usize) { 431 | const N: usize = 20_003; 432 | let x: [i32; N] = core::array::from_fn(|i| i.as_()); 433 | for _ in 0..100 { 434 | let mut arr = get_array_f32(N); 435 | arr[N - 1] = f32::INFINITY; // Make sure the last value is always the max 436 | let idxs1 = min_max_without_x(arr.as_slice(), n_out); 437 | let idxs2 = min_max_without_x_parallel(arr.as_slice(), n_out); 438 | let idxs3 = min_max_with_x(&x, arr.as_slice(), n_out); 439 | let idxs4 = min_max_with_x_parallel(&x, arr.as_slice(), n_out); 440 | assert_eq!(idxs1, idxs2); 441 | assert_eq!(idxs1, idxs3); 442 | assert_eq!(idxs1, idxs4); 443 | } 444 | } 445 | } 446 | -------------------------------------------------------------------------------- /tsdownsample/downsampling_interface.py: -------------------------------------------------------------------------------- 1 | """AbstractDownsampler interface-class, subclassed by concrete downsamplers.""" 2 | 3 | __author__ = "Jeroen Van Der Donckt" 4 | 5 | import re 6 | import warnings 7 | from abc import ABC, abstractmethod 8 | from copy import deepcopy 9 | from types import ModuleType 10 | from typing import Callable, List, Optional, Tuple, Union 11 | 12 | import numpy as np 13 | 14 | 15 | class AbstractDownsampler(ABC): 16 | """AbstractDownsampler interface-class, subclassed by concrete downsamplers.""" 17 | 18 | def __init__( 19 | self, 20 | check_contiguous: bool = True, 21 | x_dtype_regex_list: Optional[List[str]] = None, 22 | y_dtype_regex_list: Optional[List[str]] = None, 23 | ): 24 | self.check_contiguous = check_contiguous 25 | self.x_dtype_regex_list = x_dtype_regex_list 26 | self.y_dtype_regex_list = y_dtype_regex_list 27 | 28 | def _check_contiguous(self, arr: np.ndarray, y: bool = True): 29 | # necessary for rust downsamplers as they don't support non-contiguous arrays 30 | # (we call .as_slice().unwrap() on the array) in the lib.rs file 31 | # which will panic if the array is not contiguous 32 | if not self.check_contiguous: 33 | return 34 | 35 | if arr.flags["C_CONTIGUOUS"]: 36 | return 37 | 38 | raise ValueError(f"{'y' if y else 'x'} array must be contiguous.") 39 | 40 | def _supports_dtype(self, arr: np.ndarray, y: bool = True): 41 | dtype_regex_list = self.y_dtype_regex_list if y else self.x_dtype_regex_list 42 | # base case 43 | if dtype_regex_list is None: 44 | return 45 | 46 | for dtype_regex_str in dtype_regex_list: 47 | m = re.compile(dtype_regex_str).match(str(arr.dtype)) 48 | if m is not None: # a match is found 49 | return 50 | raise ValueError( 51 | f"{arr.dtype} doesn't match with any regex in {dtype_regex_list} " 52 | f"for the {'y' if y else 'x'}-data" 53 | ) 54 | 55 | @staticmethod 56 | def _check_valid_downsample_args( 57 | *args, 58 | ) -> Tuple[Union[np.ndarray, None], np.ndarray]: 59 | if len(args) == 2: 60 | x, y = args 61 | elif len(args) == 1: 62 | x, y = None, args[0] 63 | else: 64 | raise ValueError( 65 | "downsample() takes 1 or 2 positional arguments but " 66 | f"{len(args)} were given" 67 | ) 68 | 69 | if x is not None and not isinstance(x, np.ndarray): 70 | x = np.array(x) 71 | if not isinstance(y, np.ndarray): 72 | y = np.array(y) 73 | 74 | # y must be 1D array 75 | if y.ndim != 1: 76 | raise ValueError("y must be 1D array") 77 | # x must be 1D array with same length as y or None 78 | if x is not None: 79 | if x.ndim != 1: 80 | raise ValueError("x must be 1D array") 81 | if len(x) != len(y): 82 | raise ValueError("x and y must have the same length") 83 | 84 | return x, y 85 | 86 | @staticmethod 87 | def _check_valid_n_out(n_out: int): 88 | if n_out <= 0: 89 | raise ValueError("n_out must be greater than 0") 90 | 91 | @abstractmethod 92 | def _downsample( 93 | self, x: Union[np.ndarray, None], y: np.ndarray, n_out: int, **kwargs 94 | ) -> np.ndarray: 95 | """Downsample the data in x and y. 96 | 97 | Returns 98 | ------- 99 | np.ndarray 100 | The selected indices. 101 | """ 102 | raise NotImplementedError 103 | 104 | def downsample(self, *args, n_out: int, **kwargs): # x and y are optional 105 | """Downsample y (and x). 106 | 107 | Call signatures:: 108 | downsample([x], y, n_out, **kwargs) 109 | 110 | 111 | Parameters 112 | ---------- 113 | x, y : array-like 114 | The horizontal / vertical coordinates of the data points. 115 | *x* values are optional. 116 | These parameters should be 1D arrays. 117 | These arguments cannot be passed as keywords. 118 | n_out : int 119 | The number of points to keep. 120 | **kwargs 121 | Additional keyword arguments are passed to the downsampler. 122 | 123 | Returns 124 | ------- 125 | np.ndarray 126 | The selected indices. 127 | """ 128 | self._check_valid_n_out(n_out) 129 | x, y = self._check_valid_downsample_args(*args) 130 | self._supports_dtype(y, y=True) 131 | self._check_contiguous(y, y=True) 132 | if x is not None: 133 | self._supports_dtype(x, y=False) 134 | self._check_contiguous(x, y=False) 135 | return self._downsample(x, y, n_out, **kwargs) 136 | 137 | 138 | # ------------------- Rust Downsample Interface ------------------- 139 | DOWNSAMPLE_F = "downsample" 140 | 141 | 142 | # the following dtypes are supported by the rust downsamplers (x and y) 143 | _rust_dtypes = [ 144 | "float32", 145 | "float64", 146 | "uint16", 147 | "uint32", 148 | "uint64", 149 | "int16", 150 | "int32", 151 | "int64", 152 | "datetime64", 153 | "timedelta64", 154 | ] 155 | # <= 8-bit x-dtypes are not supported as the range of the values is too small to require 156 | # downsampling 157 | _y_rust_dtypes = _rust_dtypes + ["float16", "int8", "uint8", "bool"] 158 | 159 | 160 | class AbstractRustDownsampler(AbstractDownsampler, ABC): 161 | """RustDownsampler interface-class, subclassed by concrete downsamplers.""" 162 | 163 | def __init__(self): 164 | super().__init__(True, _rust_dtypes, _y_rust_dtypes) # same for x and y 165 | 166 | @property 167 | def _downsample_func_prefix(self) -> str: 168 | """The prefix of the downsample functions in the rust module.""" 169 | return DOWNSAMPLE_F 170 | 171 | @property 172 | def rust_mod(self) -> ModuleType: 173 | """The compiled Rust module for the current downsampler.""" 174 | raise NotImplementedError 175 | 176 | @property 177 | def mod_single_core(self) -> ModuleType: 178 | """Get the single-core Rust module. 179 | 180 | Returns 181 | ------- 182 | ModuleType 183 | If SIMD compiled module is available, that one is returned. Otherwise, the 184 | scalar compiled module is returned. 185 | """ 186 | return self.rust_mod.sequential 187 | 188 | @property 189 | def mod_multi_core(self) -> Union[ModuleType, None]: 190 | """Get the multi-core Rust module. 191 | 192 | Returns 193 | ------- 194 | ModuleType or None 195 | If SIMD parallel compiled module is available, that one is returned. 196 | Otherwise, the scalar parallel compiled module is returned. 197 | If no parallel compiled module is available, None is returned. 198 | """ 199 | if hasattr(self.rust_mod, "parallel"): 200 | # use SIMD implementation if available 201 | return self.rust_mod.parallel 202 | return None # no parallel compiled module available 203 | 204 | @staticmethod 205 | def _view_x(x: np.ndarray) -> np.ndarray: 206 | """View the x-data as different dtype (if necessary).""" 207 | if np.issubdtype(x.dtype, np.datetime64): 208 | # datetime64 is viewed as int64 209 | return x.view(dtype=np.int64) 210 | elif np.issubdtype(x.dtype, np.timedelta64): 211 | # timedelta64 is viewed as int64 212 | return x.view(dtype=np.int64) 213 | return x 214 | 215 | @staticmethod 216 | def _view_y(y: np.ndarray) -> np.ndarray: 217 | """View the y-data as different dtype (if necessary).""" 218 | if y.dtype == "bool": 219 | # bool is viewed as int8 220 | return y.view(dtype=np.int8) 221 | elif np.issubdtype(y.dtype, np.datetime64): 222 | # datetime64 is viewed as int64 223 | return y.view(dtype=np.int64) 224 | elif np.issubdtype(y.dtype, np.timedelta64): 225 | # timedelta64 is viewed as int64 226 | return y.view(dtype=np.int64) 227 | return y 228 | 229 | def _switch_mod_with_y( 230 | self, y_dtype: np.dtype, mod: ModuleType, downsample_func: Optional[str] = None 231 | ) -> Callable: 232 | """Select the appropriate function from the rust module for the y-data. 233 | 234 | Assumes equal binning (when no data for x is passed -> only this function is 235 | executed). 236 | Equidistant binning is utilized when a `downsample_func` is passed from the 237 | `_switch_mod_with_x_and_y` method (since the x-data is considered in the 238 | downsampling). 239 | 240 | Parameters 241 | ---------- 242 | y_dtype : np.dtype 243 | The dtype of the y-data 244 | mod : ModuleType 245 | The module to select the appropriate function from 246 | downsample_func : str, optional 247 | The name of the function to use, by default DOWNSAMPLE_FUNC. 248 | This argument is passed from the `_switch_mod_with_x_and_y` method when 249 | the x-data is considered in the downsampling. 250 | """ 251 | if downsample_func is None: 252 | downsample_func = self._downsample_func_prefix 253 | # FLOATS 254 | if np.issubdtype(y_dtype, np.floating): 255 | if y_dtype == np.float16: 256 | return getattr(mod, downsample_func + "_f16") 257 | elif y_dtype == np.float32: 258 | return getattr(mod, downsample_func + "_f32") 259 | elif y_dtype == np.float64: 260 | return getattr(mod, downsample_func + "_f64") 261 | # UINTS 262 | elif np.issubdtype(y_dtype, np.unsignedinteger): 263 | if y_dtype == np.uint8: 264 | return getattr(mod, downsample_func + "_u8") 265 | elif y_dtype == np.uint16: 266 | return getattr(mod, downsample_func + "_u16") 267 | elif y_dtype == np.uint32: 268 | return getattr(mod, downsample_func + "_u32") 269 | elif y_dtype == np.uint64: 270 | return getattr(mod, downsample_func + "_u64") 271 | # INTS (need to be last because uint is subdtype of int) 272 | elif np.issubdtype(y_dtype, np.integer): 273 | if y_dtype == np.int8: 274 | return getattr(mod, downsample_func + "_i8") 275 | elif y_dtype == np.int16: 276 | return getattr(mod, downsample_func + "_i16") 277 | elif y_dtype == np.int32: 278 | return getattr(mod, downsample_func + "_i32") 279 | elif y_dtype == np.int64: 280 | return getattr(mod, downsample_func + "_i64") 281 | # DATETIME -> i64 (datetime64 is viewed as int64) 282 | # TIMEDELTA -> i64 (timedelta64 is viewed as int64) 283 | # BOOLS -> int8 (bool is viewed as int8) 284 | raise ValueError(f"Unsupported data type (for y): {y_dtype}") 285 | 286 | def _switch_mod_with_x_and_y( 287 | self, # necessary to access the class its _switch_mod_with_y method 288 | x_dtype: np.dtype, 289 | y_dtype: np.dtype, 290 | mod: ModuleType, 291 | downsample_func: Optional[str] = None, 292 | ) -> Callable: 293 | """The x-data is considered in the downsampling 294 | 295 | Assumes equal binning. 296 | 297 | Parameters 298 | ---------- 299 | x_dtype : np.dtype 300 | The dtype of the x-data 301 | y_dtype : np.dtype 302 | The dtype of the y-data 303 | mod : ModuleType 304 | The module to select the appropriate function from 305 | downsample_func : str, optional 306 | The name of the function to use, by default DOWNSAMPLE_FUNC. 307 | """ 308 | if downsample_func is None: 309 | downsample_func = self._downsample_func_prefix 310 | # FLOATS 311 | if np.issubdtype(x_dtype, np.floating): 312 | if x_dtype == np.float16: 313 | return self._switch_mod_with_y(y_dtype, mod, f"{downsample_func}_f16") 314 | elif x_dtype == np.float32: 315 | return self._switch_mod_with_y(y_dtype, mod, f"{downsample_func}_f32") 316 | elif x_dtype == np.float64: 317 | return self._switch_mod_with_y(y_dtype, mod, f"{downsample_func}_f64") 318 | # UINTS 319 | elif np.issubdtype(x_dtype, np.unsignedinteger): 320 | if x_dtype == np.uint16: 321 | return self._switch_mod_with_y(y_dtype, mod, f"{downsample_func}_u16") 322 | elif x_dtype == np.uint32: 323 | return self._switch_mod_with_y(y_dtype, mod, f"{downsample_func}_u32") 324 | elif x_dtype == np.uint64: 325 | return self._switch_mod_with_y(y_dtype, mod, f"{downsample_func}_u64") 326 | # INTS (need to be last because uint is subdtype of int) 327 | elif np.issubdtype(x_dtype, np.integer): 328 | if x_dtype == np.int16: 329 | return self._switch_mod_with_y(y_dtype, mod, f"{downsample_func}_i16") 330 | elif x_dtype == np.int32: 331 | return self._switch_mod_with_y(y_dtype, mod, f"{downsample_func}_i32") 332 | elif x_dtype == np.int64: 333 | return self._switch_mod_with_y(y_dtype, mod, f"{downsample_func}_i64") 334 | # DATETIME -> i64 (datetime64 is viewed as int64) 335 | # TIMEDELTA -> i64 (timedelta64 is viewed as int64) 336 | raise ValueError(f"Unsupported data type (for x): {x_dtype}") 337 | 338 | def _downsample( 339 | self, 340 | x: Union[np.ndarray, None], 341 | y: np.ndarray, 342 | n_out: int, 343 | parallel: bool = False, 344 | **kwargs, 345 | ) -> np.ndarray: 346 | """Downsample the data in x and y.""" 347 | mod = self.mod_single_core 348 | if parallel: 349 | if self.mod_multi_core is None: 350 | name = self.__class__.__name__ 351 | warnings.warn( 352 | f"No parallel implementation available for {name}. " 353 | "Falling back to single-core implementation." 354 | ) 355 | else: 356 | mod = self.mod_multi_core 357 | ## Viewing the y-data as different dtype (if necessary) 358 | y = self._view_y(y) 359 | ## Viewing the x-data as different dtype (if necessary) 360 | if x is None: 361 | downsample_f = self._switch_mod_with_y(y.dtype, mod) 362 | return downsample_f(y, n_out, **kwargs) 363 | x = self._view_x(x) 364 | ## Getting the appropriate downsample function 365 | downsample_f = self._switch_mod_with_x_and_y(x.dtype, y.dtype, mod) 366 | return downsample_f(x, y, n_out, **kwargs) 367 | 368 | def downsample(self, *args, n_out: int, parallel: bool = False, **kwargs): 369 | """Downsample the data in x and y. 370 | 371 | The x and y arguments are positional-only arguments. If only one argument is 372 | passed, it is considered to be the y-data. If two arguments are passed, the 373 | first argument is considered to be the x-data and the second argument is 374 | considered to be the y-data. 375 | """ 376 | return super().downsample(*args, n_out=n_out, parallel=parallel, **kwargs) 377 | 378 | def __deepcopy__(self, memo): 379 | """Deepcopy the object.""" 380 | cls = self.__class__ 381 | result = cls.__new__(cls) 382 | memo[id(self)] = result 383 | for k, v in self.__dict__.items(): 384 | if k.endswith("_mod") or k.startswith("mod_"): 385 | # Don't (deep)copy the compiled modules 386 | setattr(result, k, v) 387 | else: 388 | setattr(result, k, deepcopy(v, memo)) 389 | return result 390 | 391 | 392 | NAN_DOWNSAMPLE_F = "downsample_nan" 393 | 394 | 395 | class AbstractRustNaNDownsampler(AbstractRustDownsampler, ABC): 396 | """RustNaNDownsampler interface-class, subclassed by concrete downsamplers.""" 397 | 398 | @property 399 | def _downsample_func_prefix(self) -> str: 400 | """The prefix of the downsample functions in the rust module.""" 401 | return NAN_DOWNSAMPLE_F 402 | 403 | def _switch_mod_with_y( 404 | self, y_dtype: np.dtype, mod: ModuleType, downsample_func: Optional[str] = None 405 | ) -> Callable: 406 | """Select the appropriate function from the rust module for the y-data. 407 | 408 | Assumes equal binning (when no data for x is passed -> only this function is 409 | executed). 410 | Equidistant binning is utilized when a `downsample_func` is passed from the 411 | `_switch_mod_with_x_and_y` method (since the x-data is considered in the 412 | downsampling). 413 | 414 | Parameters 415 | ---------- 416 | y_dtype : np.dtype 417 | The dtype of the y-data 418 | mod : ModuleType 419 | The module to select the appropriate function from 420 | downsample_func : str, optional 421 | The name of the function to use, by default NAN_DOWNSAMPLE_F. 422 | This argument is passed from the `_switch_mod_with_x_and_y` method when 423 | the x-data is considered in the downsampling. 424 | """ 425 | if downsample_func is None: 426 | downsample_func = self._downsample_func_prefix 427 | if not np.issubdtype(y_dtype, np.floating): 428 | # When y is not a float, we need to remove the _nan suffix to use the 429 | # regular downsample function as the _nan suffix is only used for floats. 430 | # (Note that NaNs only exist for floats) 431 | downsample_func = downsample_func.replace("_nan", "") 432 | return super()._switch_mod_with_y(y_dtype, mod, downsample_func) 433 | -------------------------------------------------------------------------------- /downsample_rs/src/m4.rs: -------------------------------------------------------------------------------- 1 | use argminmax::{ArgMinMax, NaNArgMinMax}; 2 | use num_traits::{AsPrimitive, FromPrimitive}; 3 | use rayon::iter::IndexedParallelIterator; 4 | use rayon::prelude::*; 5 | 6 | use super::searchsorted::{ 7 | get_equidistant_bin_idx_iterator, get_equidistant_bin_idx_iterator_parallel, 8 | }; 9 | use super::types::Num; 10 | use super::POOL; 11 | 12 | // ----------------------------------- NON-PARALLEL ------------------------------------ 13 | 14 | // ----------- WITH X 15 | 16 | macro_rules! m4_with_x { 17 | ($func_name:ident, $trait:path, $f_argminmax:expr) => { 18 | pub fn $func_name(x: &[Tx], arr: &[Ty], n_out: usize) -> Vec 19 | where 20 | for<'a> &'a [Ty]: $trait, 21 | Tx: Num + FromPrimitive + AsPrimitive, 22 | Ty: Copy + PartialOrd, 23 | { 24 | assert_eq!(n_out % 4, 0); 25 | let bin_idx_iterator = get_equidistant_bin_idx_iterator(x, n_out / 4); 26 | m4_generic_with_x(arr, bin_idx_iterator, n_out, $f_argminmax) 27 | } 28 | }; 29 | } 30 | 31 | m4_with_x!(m4_with_x, ArgMinMax, |arr| arr.argminmax()); 32 | m4_with_x!(m4_with_x_nan, NaNArgMinMax, |arr| arr.nanargminmax()); 33 | 34 | // ----------- WITHOUT X 35 | 36 | macro_rules! m4_without_x { 37 | ($func_name:ident, $trait:path, $f_argminmax:expr) => { 38 | pub fn $func_name(arr: &[T], n_out: usize) -> Vec 39 | where 40 | for<'a> &'a [T]: $trait, 41 | { 42 | assert_eq!(n_out % 4, 0); 43 | m4_generic(arr, n_out, $f_argminmax) 44 | } 45 | }; 46 | } 47 | 48 | m4_without_x!(m4_without_x, ArgMinMax, |arr| arr.argminmax()); 49 | m4_without_x!(m4_without_x_nan, NaNArgMinMax, |arr| arr.nanargminmax()); 50 | 51 | // ------------------------------------- PARALLEL -------------------------------------- 52 | 53 | // ----------- WITH X 54 | 55 | macro_rules! m4_with_x_parallel { 56 | ($func_name:ident, $trait:path, $f_argminmax:expr) => { 57 | pub fn $func_name(x: &[Tx], arr: &[Ty], n_out: usize) -> Vec 58 | where 59 | for<'a> &'a [Ty]: $trait, 60 | Tx: Num + FromPrimitive + AsPrimitive + Send + Sync, 61 | Ty: Copy + PartialOrd + Send + Sync, 62 | { 63 | assert_eq!(n_out % 4, 0); 64 | let bin_idx_iterator = get_equidistant_bin_idx_iterator_parallel(x, n_out / 4); 65 | m4_generic_with_x_parallel(arr, bin_idx_iterator, n_out, $f_argminmax) 66 | } 67 | }; 68 | } 69 | 70 | m4_with_x_parallel!(m4_with_x_parallel, ArgMinMax, |arr| arr.argminmax()); 71 | m4_with_x_parallel!(m4_with_x_parallel_nan, NaNArgMinMax, |arr| arr 72 | .nanargminmax()); 73 | 74 | // ----------- WITHOUT X 75 | 76 | macro_rules! m4_without_x_parallel { 77 | ($func_name:ident, $trait:path, $f_argminmax:expr) => { 78 | pub fn $func_name(arr: &[T], n_out: usize) -> Vec 79 | where 80 | for<'a> &'a [T]: $trait, 81 | { 82 | assert_eq!(n_out % 4, 0); 83 | m4_generic_parallel(arr, n_out, $f_argminmax) 84 | } 85 | }; 86 | } 87 | 88 | m4_without_x_parallel!(m4_without_x_parallel, ArgMinMax, |arr| arr.argminmax()); 89 | m4_without_x_parallel!(m4_without_x_parallel_nan, NaNArgMinMax, |arr| arr 90 | .nanargminmax()); 91 | 92 | // TODO: check for duplicate data in the output array 93 | // -> In the current implementation we always add 4 datapoints per bin (if of 94 | // course the bin has >= 4 datapoints). However, the argmin and argmax might 95 | // be the start and end of the bin, which would result in duplicate data in 96 | // the output array. (this is for example the case for monotonic data). 97 | 98 | // ----------------------------------- GENERICS ------------------------------------ 99 | 100 | // --------------------- WITHOUT X 101 | 102 | #[inline(always)] 103 | pub(crate) fn m4_generic( 104 | arr: &[T], 105 | n_out: usize, 106 | f_argminmax: fn(&[T]) -> (usize, usize), 107 | ) -> Vec { 108 | // Assumes n_out is a multiple of 4 109 | if n_out >= arr.len() { 110 | return (0..arr.len()).collect(); 111 | } 112 | 113 | // arr.len() - 1 is used to match the delta of a range-index (0..arr.len()-1) 114 | let block_size: f64 = (arr.len() - 1) as f64 / (n_out / 4) as f64; 115 | 116 | let mut sampled_indices: Vec = vec![usize::default(); n_out]; 117 | 118 | let mut start_idx: usize = 0; 119 | for i in 0..n_out / 4 { 120 | // Decided to use multiplication instead of adding to the accumulator (end) 121 | // as multiplication seems to be less prone to rounding errors. 122 | let end: f64 = block_size * (i + 1) as f64; 123 | let end_idx: usize = end as usize + 1; 124 | 125 | let (min_index, max_index) = f_argminmax(&arr[start_idx..end_idx]); 126 | 127 | // Add the indexes in sorted order 128 | sampled_indices[4 * i] = start_idx; 129 | if min_index < max_index { 130 | sampled_indices[4 * i + 1] = min_index + start_idx; 131 | sampled_indices[4 * i + 2] = max_index + start_idx; 132 | } else { 133 | sampled_indices[4 * i + 1] = max_index + start_idx; 134 | sampled_indices[4 * i + 2] = min_index + start_idx; 135 | } 136 | sampled_indices[4 * i + 3] = end_idx - 1; 137 | 138 | start_idx = end_idx; 139 | } 140 | 141 | sampled_indices 142 | } 143 | 144 | #[inline(always)] 145 | pub(crate) fn m4_generic_parallel( 146 | arr: &[T], 147 | n_out: usize, 148 | f_argminmax: fn(&[T]) -> (usize, usize), 149 | ) -> Vec { 150 | // Assumes n_out is a multiple of 4 151 | if n_out >= arr.len() { 152 | return (0..arr.len()).collect::>(); 153 | } 154 | 155 | // arr.len() - 1 is used to match the delta of a range-index (0..arr.len()-1) 156 | let block_size: f64 = (arr.len() - 1) as f64 / (n_out / 4) as f64; 157 | 158 | // Store the enumerated indexes in the output array 159 | // These indexes are used to calculate the start and end indexes of each bin in 160 | // the multi-threaded execution 161 | let mut sampled_indices: Vec = (0..n_out).collect::>(); 162 | 163 | POOL.install(|| { 164 | sampled_indices 165 | .par_chunks_exact_mut(4) 166 | .for_each(|sampled_index_chunk| { 167 | let i: f64 = unsafe { *sampled_index_chunk.get_unchecked(0) >> 2 } as f64; 168 | let start_idx: usize = (block_size * i) as usize + (i != 0.0) as usize; 169 | let end_idx: usize = (block_size * (i + 1.0)) as usize + 1; 170 | 171 | let (min_index, max_index) = f_argminmax(&arr[start_idx..end_idx]); 172 | 173 | sampled_index_chunk[0] = start_idx; 174 | // Add the indexes in sorted order 175 | if min_index < max_index { 176 | sampled_index_chunk[1] = min_index + start_idx; 177 | sampled_index_chunk[2] = max_index + start_idx; 178 | } else { 179 | sampled_index_chunk[1] = max_index + start_idx; 180 | sampled_index_chunk[2] = min_index + start_idx; 181 | } 182 | sampled_index_chunk[3] = end_idx - 1; 183 | }) 184 | }); 185 | 186 | sampled_indices 187 | } 188 | 189 | // --------------------- WITH X 190 | 191 | #[inline(always)] 192 | pub(crate) fn m4_generic_with_x( 193 | arr: &[T], 194 | bin_idx_iterator: impl Iterator>, 195 | n_out: usize, 196 | f_argminmax: fn(&[T]) -> (usize, usize), 197 | ) -> Vec { 198 | // Assumes n_out is a multiple of 4 199 | if n_out >= arr.len() { 200 | return (0..arr.len()).collect::>(); 201 | } 202 | 203 | let mut sampled_indices: Vec = Vec::with_capacity(n_out); 204 | 205 | bin_idx_iterator.for_each(|bin| { 206 | if let Some((start, end)) = bin { 207 | if end <= start + 4 { 208 | // If the bin has <= 4 elements, just add them all 209 | for i in start..end { 210 | sampled_indices.push(i); 211 | } 212 | } else { 213 | // If the bin has > 4 elements, add the first and last + argmin and argmax 214 | let step = &arr[start..end]; 215 | let (min_index, max_index) = f_argminmax(step); 216 | 217 | sampled_indices.push(start); 218 | 219 | // Add the indexes in sorted order 220 | if min_index < max_index { 221 | sampled_indices.push(min_index + start); 222 | sampled_indices.push(max_index + start); 223 | } else { 224 | sampled_indices.push(max_index + start); 225 | sampled_indices.push(min_index + start); 226 | } 227 | 228 | sampled_indices.push(end - 1); 229 | } 230 | } 231 | }); 232 | 233 | sampled_indices 234 | } 235 | 236 | #[inline(always)] 237 | pub(crate) fn m4_generic_with_x_parallel( 238 | arr: &[T], 239 | bin_idx_iterator: impl IndexedParallelIterator>>, 240 | n_out: usize, 241 | f_argminmax: fn(&[T]) -> (usize, usize), 242 | ) -> Vec { 243 | // Assumes n_out is a multiple of 4 244 | if n_out >= arr.len() { 245 | return (0..arr.len()).collect::>(); 246 | } 247 | 248 | POOL.install(|| { 249 | bin_idx_iterator 250 | .flat_map(|bin_idx_iterator| { 251 | bin_idx_iterator 252 | .map(|bin| { 253 | match bin { 254 | Some((start, end)) => { 255 | if end <= start + 4 { 256 | // If the bin has <= 4 elements, just return them all 257 | return (start..end).collect::>(); 258 | } 259 | 260 | // If the bin has > 4 elements, return the first and last + argmin and argmax 261 | let step = &arr[start..end]; 262 | let (min_index, max_index) = f_argminmax(step); 263 | 264 | // Return the indexes in sorted order 265 | let mut sampled_index = vec![start, 0, 0, end - 1]; 266 | if min_index < max_index { 267 | sampled_index[1] = min_index + start; 268 | sampled_index[2] = max_index + start; 269 | } else { 270 | sampled_index[1] = max_index + start; 271 | sampled_index[2] = min_index + start; 272 | } 273 | sampled_index 274 | } // If the bin is empty, return empty Vec 275 | None => { 276 | vec![] 277 | } 278 | } 279 | }) 280 | .collect::>>() 281 | }) 282 | .flatten() 283 | .collect::>() 284 | }) 285 | } 286 | 287 | #[cfg(test)] 288 | mod tests { 289 | use num_traits::AsPrimitive; 290 | use rstest::rstest; 291 | use rstest_reuse::{self, *}; 292 | 293 | use super::{m4_with_x, m4_without_x}; 294 | use super::{m4_with_x_parallel, m4_without_x_parallel}; 295 | 296 | use dev_utils::utils; 297 | 298 | fn get_array_f32(n: usize) -> Vec { 299 | utils::get_random_array(n, f32::MIN, f32::MAX) 300 | } 301 | 302 | // Template for n_out 303 | #[template] 304 | #[rstest] 305 | #[case(196)] 306 | #[case(200)] 307 | #[case(204)] 308 | fn n_outs(#[case] n_out: usize) {} 309 | 310 | #[test] 311 | fn test_m4_scalar_without_x_correct() { 312 | let arr: [f32; 100] = core::array::from_fn(|i| i.as_()); 313 | 314 | let sampled_indices = m4_without_x(&arr, 12); 315 | let sampled_values = sampled_indices 316 | .iter() 317 | .map(|x| arr[*x]) 318 | .collect::>(); 319 | 320 | let expected_indices = vec![0, 0, 33, 33, 34, 34, 66, 66, 67, 67, 99, 99]; 321 | let expected_values = expected_indices 322 | .iter() 323 | .map(|x| *x as f32) 324 | .collect::>(); 325 | 326 | assert_eq!(sampled_indices, expected_indices); 327 | assert_eq!(sampled_values, expected_values); 328 | } 329 | 330 | #[test] 331 | fn test_m4_scalar_without_x_parallel_correct() { 332 | let arr: [f32; 100] = core::array::from_fn(|i| i.as_()); 333 | 334 | let sampled_indices = m4_without_x_parallel(&arr, 12); 335 | let sampled_values = sampled_indices 336 | .iter() 337 | .map(|x| arr[*x]) 338 | .collect::>(); 339 | 340 | let expected_indices = vec![0, 0, 33, 33, 34, 34, 66, 66, 67, 67, 99, 99]; 341 | let expected_values = expected_indices 342 | .iter() 343 | .map(|x| *x as f32) 344 | .collect::>(); 345 | 346 | assert_eq!(sampled_indices, expected_indices); 347 | assert_eq!(sampled_values, expected_values); 348 | } 349 | 350 | #[test] 351 | fn test_m4_scalar_with_x_correct() { 352 | let x: [i32; 100] = core::array::from_fn(|i| i.as_()); 353 | let arr: [f32; 100] = core::array::from_fn(|i| i.as_()); 354 | 355 | let sampled_indices = m4_with_x(&x, &arr, 12); 356 | let sampled_values = sampled_indices 357 | .iter() 358 | .map(|x| arr[*x]) 359 | .collect::>(); 360 | 361 | let expected_indices = vec![0, 0, 33, 33, 34, 34, 66, 66, 67, 67, 99, 99]; 362 | let expected_values = expected_indices 363 | .iter() 364 | .map(|x| *x as f32) 365 | .collect::>(); 366 | 367 | assert_eq!(sampled_indices, expected_indices); 368 | assert_eq!(sampled_values, expected_values); 369 | } 370 | 371 | #[test] 372 | fn test_m4_scalar_with_x_parallel_correct() { 373 | let x: [i32; 100] = core::array::from_fn(|i| i.as_()); 374 | let arr: [f32; 100] = core::array::from_fn(|i| i.as_()); 375 | 376 | let sampled_indices = m4_with_x_parallel(&x, &arr, 12); 377 | let sampled_values = sampled_indices 378 | .iter() 379 | .map(|x| arr[*x]) 380 | .collect::>(); 381 | 382 | let expected_indices = vec![0, 0, 33, 33, 34, 34, 66, 66, 67, 67, 99, 99]; 383 | let expected_values = expected_indices 384 | .iter() 385 | .map(|x| *x as f32) 386 | .collect::>(); 387 | 388 | assert_eq!(sampled_indices, expected_indices); 389 | assert_eq!(sampled_values, expected_values); 390 | } 391 | 392 | #[test] 393 | fn test_m4_scalar_with_x_gap() { 394 | // We will create a gap in the middle of the array 395 | // Increment the second half of the array by 50 396 | let x: [i32; 100] = core::array::from_fn(|i| if i > 50 { (i + 50).as_() } else { i.as_() }); 397 | let arr: [f32; 100] = core::array::from_fn(|i| i.as_()); 398 | 399 | let sampled_indices = m4_with_x(&x, &arr, 20); 400 | assert_eq!(sampled_indices.len(), 16); // One full gap 401 | let expected_indices = vec![0, 0, 29, 29, 30, 30, 50, 50, 51, 51, 69, 69, 70, 70, 99, 99]; 402 | assert_eq!(sampled_indices, expected_indices); 403 | 404 | // Increment the second half of the array by 50 again 405 | let x = x.map(|x| if x > 101 { x + 50 } else { x }); 406 | 407 | let sampled_indices = m4_with_x(&x, &arr, 20); 408 | assert_eq!(sampled_indices.len(), 17); // Gap with 1 value 409 | let expected_indices = vec![ 410 | 0, 0, 39, 39, 40, 40, 50, 50, 51, 52, 52, 59, 59, 60, 60, 99, 99, 411 | ]; 412 | assert_eq!(sampled_indices, expected_indices); 413 | } 414 | 415 | #[test] 416 | fn test_m4_scalar_with_x_gap_parallel() { 417 | // We will create a gap in the middle of the array 418 | // Increment the second half of the array by 50 419 | let x: [i32; 100] = core::array::from_fn(|i| if i > 50 { (i + 50).as_() } else { i.as_() }); 420 | let arr: [f32; 100] = core::array::from_fn(|i| i.as_()); 421 | 422 | let sampled_indices = m4_with_x_parallel(&x, &arr, 20); 423 | assert_eq!(sampled_indices.len(), 16); // One full gap 424 | let expected_indices = vec![0, 0, 29, 29, 30, 30, 50, 50, 51, 51, 69, 69, 70, 70, 99, 99]; 425 | assert_eq!(sampled_indices, expected_indices); 426 | 427 | // Increment the second half of the array by 50 again 428 | let x = x.map(|x| if x > 101 { x + 50 } else { x }); 429 | 430 | let sampled_indices = m4_with_x_parallel(&x, &arr, 20); 431 | assert_eq!(sampled_indices.len(), 17); // Gap with 1 value 432 | let expected_indices = vec![ 433 | 0, 0, 39, 39, 40, 40, 50, 50, 51, 52, 52, 59, 59, 60, 60, 99, 99, 434 | ]; 435 | assert_eq!(sampled_indices, expected_indices); 436 | } 437 | 438 | #[apply(n_outs)] 439 | fn test_many_random_runs_correct(n_out: usize) { 440 | const N: usize = 20_003; 441 | let x: [i32; N] = core::array::from_fn(|i| i.as_()); 442 | for _ in 0..100 { 443 | let arr = get_array_f32(N); 444 | let idxs1 = m4_without_x(arr.as_slice(), n_out); 445 | let idxs2 = m4_with_x(&x, arr.as_slice(), n_out); 446 | assert_eq!(idxs1, idxs2); 447 | let idxs3 = m4_without_x_parallel(arr.as_slice(), n_out); 448 | let idxs4 = m4_with_x_parallel(&x, arr.as_slice(), n_out); 449 | assert_eq!(idxs1, idxs3); 450 | // TODO: check whether this still fails after fixing the sequential_add_mul 451 | assert_eq!(idxs1, idxs4); // TODO: this fails when nb. of threads = 16 452 | } 453 | } 454 | } 455 | --------------------------------------------------------------------------------